From a45226893de839b97a7d6ba1210f1a313a2bc491 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Tue, 3 Jan 2023 12:47:36 +0100 Subject: [PATCH 01/26] feat: add video url and tensors to proto Signed-off-by: anna-charlotte --- docarray/proto/docarray.proto | 6 ++++++ docarray/proto/pb2/docarray_pb2.py | 28 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/docarray/proto/docarray.proto b/docarray/proto/docarray.proto index 0646453294e..39f8354b223 100644 --- a/docarray/proto/docarray.proto +++ b/docarray/proto/docarray.proto @@ -69,6 +69,12 @@ message NodeProto { NdArrayProto audio_torch_tensor = 16; + string video_url = 17; + + NdArrayProto video_ndarray = 18; + + NdArrayProto video_torch_tensor = 19; + } } diff --git a/docarray/proto/pb2/docarray_pb2.py b/docarray/proto/pb2/docarray_pb2.py index 1d5fb2d954b..da5d3df5a46 100644 --- a/docarray/proto/pb2/docarray_pb2.py +++ b/docarray/proto/pb2/docarray_pb2.py @@ -15,7 +15,7 @@ from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x0e\x64ocarray.proto\x12\x08\x64ocarray\x1a\x1cgoogle/protobuf/struct.proto\"A\n\x11\x44\x65nseNdArrayProto\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\r\n\x05shape\x18\x02 \x03(\r\x12\r\n\x05\x64type\x18\x03 \x01(\t\"g\n\x0cNdArrayProto\x12*\n\x05\x64\x65nse\x18\x01 \x01(\x0b\x32\x1b.docarray.DenseNdArrayProto\x12+\n\nparameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\x8e\x04\n\tNodeProto\x12\x0e\n\x04\x62lob\x18\x01 \x01(\x0cH\x00\x12)\n\x07ndarray\x18\x02 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x0e\n\x04text\x18\x03 \x01(\tH\x00\x12)\n\x06nested\x18\x04 \x01(\x0b\x32\x17.docarray.DocumentProtoH\x00\x12.\n\x06\x63hunks\x18\x05 \x01(\x0b\x32\x1c.docarray.DocumentArrayProtoH\x00\x12+\n\tembedding\x18\x06 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x11\n\x07\x61ny_url\x18\x07 \x01(\tH\x00\x12\x13\n\timage_url\x18\x08 \x01(\tH\x00\x12\x12\n\x08text_url\x18\t \x01(\tH\x00\x12\x0c\n\x02id\x18\n \x01(\tH\x00\x12.\n\x0ctorch_tensor\x18\x0b \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x12\n\x08mesh_url\x18\x0c \x01(\tH\x00\x12\x19\n\x0fpoint_cloud_url\x18\r \x01(\tH\x00\x12\x13\n\taudio_url\x18\x0e \x01(\tH\x00\x12/\n\raudio_ndarray\x18\x0f \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x34\n\x12\x61udio_torch_tensor\x18\x10 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x42\t\n\x07\x63ontent\"\x82\x01\n\rDocumentProto\x12/\n\x04\x64\x61ta\x18\x01 \x03(\x0b\x32!.docarray.DocumentProto.DataEntry\x1a@\n\tDataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\"\n\x05value\x18\x02 \x01(\x0b\x32\x13.docarray.NodeProto:\x02\x38\x01\";\n\x12\x44ocumentArrayProto\x12%\n\x04\x64ocs\x18\x01 \x03(\x0b\x32\x17.docarray.DocumentProto\"\x86\x01\n\x0fUnionArrayProto\x12=\n\x0e\x64ocument_array\x18\x01 \x01(\x0b\x32#.docarray.DocumentArrayStackedProtoH\x00\x12)\n\x07ndarray\x18\x02 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x42\t\n\x07\x63ontent\"\xd6\x01\n\x19\x44ocumentArrayStackedProto\x12+\n\x05list_\x18\x01 \x01(\x0b\x32\x1c.docarray.DocumentArrayProto\x12\x41\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x30.docarray.DocumentArrayStackedProto.ColumnsEntry\x1aI\n\x0c\x43olumnsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.docarray.UnionArrayProto:\x02\x38\x01\x62\x06proto3' + b'\n\x0e\x64ocarray.proto\x12\x08\x64ocarray\x1a\x1cgoogle/protobuf/struct.proto\"A\n\x11\x44\x65nseNdArrayProto\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\r\n\x05shape\x18\x02 \x03(\r\x12\r\n\x05\x64type\x18\x03 \x01(\t\"g\n\x0cNdArrayProto\x12*\n\x05\x64\x65nse\x18\x01 \x01(\x0b\x32\x1b.docarray.DenseNdArrayProto\x12+\n\nparameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\x8a\x05\n\tNodeProto\x12\x0e\n\x04\x62lob\x18\x01 \x01(\x0cH\x00\x12)\n\x07ndarray\x18\x02 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x0e\n\x04text\x18\x03 \x01(\tH\x00\x12)\n\x06nested\x18\x04 \x01(\x0b\x32\x17.docarray.DocumentProtoH\x00\x12.\n\x06\x63hunks\x18\x05 \x01(\x0b\x32\x1c.docarray.DocumentArrayProtoH\x00\x12+\n\tembedding\x18\x06 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x11\n\x07\x61ny_url\x18\x07 \x01(\tH\x00\x12\x13\n\timage_url\x18\x08 \x01(\tH\x00\x12\x12\n\x08text_url\x18\t \x01(\tH\x00\x12\x0c\n\x02id\x18\n \x01(\tH\x00\x12.\n\x0ctorch_tensor\x18\x0b \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x12\n\x08mesh_url\x18\x0c \x01(\tH\x00\x12\x19\n\x0fpoint_cloud_url\x18\r \x01(\tH\x00\x12\x13\n\taudio_url\x18\x0e \x01(\tH\x00\x12/\n\raudio_ndarray\x18\x0f \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x34\n\x12\x61udio_torch_tensor\x18\x10 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x13\n\tvideo_url\x18\x11 \x01(\tH\x00\x12/\n\rvideo_ndarray\x18\x12 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x12\x34\n\x12video_torch_tensor\x18\x13 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x42\t\n\x07\x63ontent\"\x82\x01\n\rDocumentProto\x12/\n\x04\x64\x61ta\x18\x01 \x03(\x0b\x32!.docarray.DocumentProto.DataEntry\x1a@\n\tDataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\"\n\x05value\x18\x02 \x01(\x0b\x32\x13.docarray.NodeProto:\x02\x38\x01\";\n\x12\x44ocumentArrayProto\x12%\n\x04\x64ocs\x18\x01 \x03(\x0b\x32\x17.docarray.DocumentProto\"\x86\x01\n\x0fUnionArrayProto\x12=\n\x0e\x64ocument_array\x18\x01 \x01(\x0b\x32#.docarray.DocumentArrayStackedProtoH\x00\x12)\n\x07ndarray\x18\x02 \x01(\x0b\x32\x16.docarray.NdArrayProtoH\x00\x42\t\n\x07\x63ontent\"\xd6\x01\n\x19\x44ocumentArrayStackedProto\x12+\n\x05list_\x18\x01 \x01(\x0b\x32\x1c.docarray.DocumentArrayProto\x12\x41\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x30.docarray.DocumentArrayStackedProto.ColumnsEntry\x1aI\n\x0c\x43olumnsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.docarray.UnionArrayProto:\x02\x38\x01\x62\x06proto3' ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -32,17 +32,17 @@ _NDARRAYPROTO._serialized_start = 125 _NDARRAYPROTO._serialized_end = 228 _NODEPROTO._serialized_start = 231 - _NODEPROTO._serialized_end = 757 - _DOCUMENTPROTO._serialized_start = 760 - _DOCUMENTPROTO._serialized_end = 890 - _DOCUMENTPROTO_DATAENTRY._serialized_start = 826 - _DOCUMENTPROTO_DATAENTRY._serialized_end = 890 - _DOCUMENTARRAYPROTO._serialized_start = 892 - _DOCUMENTARRAYPROTO._serialized_end = 951 - _UNIONARRAYPROTO._serialized_start = 954 - _UNIONARRAYPROTO._serialized_end = 1088 - _DOCUMENTARRAYSTACKEDPROTO._serialized_start = 1091 - _DOCUMENTARRAYSTACKEDPROTO._serialized_end = 1305 - _DOCUMENTARRAYSTACKEDPROTO_COLUMNSENTRY._serialized_start = 1232 - _DOCUMENTARRAYSTACKEDPROTO_COLUMNSENTRY._serialized_end = 1305 + _NODEPROTO._serialized_end = 881 + _DOCUMENTPROTO._serialized_start = 884 + _DOCUMENTPROTO._serialized_end = 1014 + _DOCUMENTPROTO_DATAENTRY._serialized_start = 950 + _DOCUMENTPROTO_DATAENTRY._serialized_end = 1014 + _DOCUMENTARRAYPROTO._serialized_start = 1016 + _DOCUMENTARRAYPROTO._serialized_end = 1075 + _UNIONARRAYPROTO._serialized_start = 1078 + _UNIONARRAYPROTO._serialized_end = 1212 + _DOCUMENTARRAYSTACKEDPROTO._serialized_start = 1215 + _DOCUMENTARRAYSTACKEDPROTO._serialized_end = 1429 + _DOCUMENTARRAYSTACKEDPROTO_COLUMNSENTRY._serialized_start = 1356 + _DOCUMENTARRAYSTACKEDPROTO_COLUMNSENTRY._serialized_end = 1429 # @@protoc_insertion_point(module_scope) From 3ccb697212346ba4dab387e06292c6905151b8f1 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Tue, 3 Jan 2023 13:12:28 +0100 Subject: [PATCH 02/26] feat: add video url and video ndarray Signed-off-by: anna-charlotte --- docarray/typing/__init__.py | 11 +- docarray/typing/tensor/video/video_ndarray.py | 59 ++++++++++ docarray/typing/url/__init__.py | 11 +- docarray/typing/url/video_url.py | 92 +++++++++++++++ tests/units/typing/url/test_video_url.py | 108 ++++++++++++++++++ 5 files changed, 278 insertions(+), 3 deletions(-) create mode 100644 docarray/typing/tensor/video/video_ndarray.py create mode 100644 docarray/typing/url/video_url.py create mode 100644 tests/units/typing/url/test_video_url.py diff --git a/docarray/typing/__init__.py b/docarray/typing/__init__.py index 4cedcc689fc..1f16a4ec3ee 100644 --- a/docarray/typing/__init__.py +++ b/docarray/typing/__init__.py @@ -3,6 +3,7 @@ from docarray.typing.tensor.embedding.embedding import Embedding from docarray.typing.tensor.ndarray import NdArray from docarray.typing.tensor.tensor import AnyTensor +from docarray.typing.tensor.video import VideoNdArray from docarray.typing.url import ( AnyUrl, AudioUrl, @@ -10,17 +11,20 @@ Mesh3DUrl, PointCloud3DUrl, TextUrl, + VideoUrl, ) __all__ = [ - 'AudioNdArray', 'NdArray', + 'AudioNdArray', + 'VideoNdArray', 'Embedding', 'ImageUrl', 'AudioUrl', 'TextUrl', 'Mesh3DUrl', 'PointCloud3DUrl', + 'VideoUrl', 'AnyUrl', 'ID', 'AnyTensor', @@ -33,5 +37,8 @@ else: from docarray.typing.tensor import TorchEmbedding, TorchTensor # noqa: F401 from docarray.typing.tensor.audio.audio_torch_tensor import AudioTorchTensor # noqa + from docarray.typing.tensor.video.video_torch_tensor import VideoTorchTensor # noqa - __all__.extend(['AudioTorchTensor', 'TorchEmbedding', 'TorchTensor']) + __all__.extend( + ['AudioTorchTensor', 'TorchEmbedding', 'TorchTensor', 'VideoTorchTensor'] + ) diff --git a/docarray/typing/tensor/video/video_ndarray.py b/docarray/typing/tensor/video/video_ndarray.py new file mode 100644 index 00000000000..f44b332770b --- /dev/null +++ b/docarray/typing/tensor/video/video_ndarray.py @@ -0,0 +1,59 @@ +from typing import TypeVar + +import numpy as np + +from docarray.typing.tensor.ndarray import NdArray +from docarray.typing.tensor.video.abstract_video_tensor import AbstractVideoTensor + +T = TypeVar('T', bound='VideoNdArray') + + +class VideoNdArray(AbstractVideoTensor, NdArray): + """ + Subclass of NdArray, to represent a video tensor. + + Additionally, this allows storing such a tensor as a .wav audio file. + + EXAMPLE USAGE + + .. code-block:: python + + from typing import Optional + from pydantic import parse_obj_as + from docarray import Document + from docarray.typing import AudioNdArray, AudioUrl + import numpy as np + + + class MyAudioDoc(Document): + title: str + audio_tensor: Optional[AudioNdArray] + url: Optional[AudioUrl] + + + # from tensor + doc_1 = MyAudioDoc( + title='my_first_audio_doc', + audio_tensor=np.random.rand(1000, 2), + ) + doc_1.audio_tensor.save_to_wav_file(file_path='path/to/file_1.wav') + # from url + doc_2 = MyAudioDoc( + title='my_second_audio_doc', + url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav', + ) + doc_2.audio_tensor = parse_obj_as(AudioNdArray, doc_2.url.load()) + doc_2.audio_tensor.save_to_wav_file(file_path='path/to/file_2.wav') + """ + + _PROTO_FIELD_NAME = 'video_ndarray' + + def check_shape(self) -> None: + if self.ndim != 4 or self.shape[-1] != 3 or self.dtype != np.uint8: + raise ValueError( + f'expects `` with dtype=uint8 and ndim=4 and the last dimension is 3, ' + f'but receiving {self.shape} in {self.dtype}' + ) + + def to_numpy(self) -> np.ndarray: + return self diff --git a/docarray/typing/url/__init__.py b/docarray/typing/url/__init__.py index 29efa353c16..b1a4416744d 100644 --- a/docarray/typing/url/__init__.py +++ b/docarray/typing/url/__init__.py @@ -4,5 +4,14 @@ from docarray.typing.url.text_url import TextUrl from docarray.typing.url.url_3d.mesh_url import Mesh3DUrl from docarray.typing.url.url_3d.point_cloud_url import PointCloud3DUrl +from docarray.typing.url.video_url import VideoUrl -__all__ = ['ImageUrl', 'AudioUrl', 'AnyUrl', 'TextUrl', 'Mesh3DUrl', 'PointCloud3DUrl'] +__all__ = [ + 'ImageUrl', + 'AudioUrl', + 'AnyUrl', + 'TextUrl', + 'Mesh3DUrl', + 'PointCloud3DUrl', + 'VideoUrl', +] diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py new file mode 100644 index 00000000000..bcfdec16191 --- /dev/null +++ b/docarray/typing/url/video_url.py @@ -0,0 +1,92 @@ +from typing import TYPE_CHECKING, Any, Tuple, Type, TypeVar, Union + +import numpy as np +from pydantic.tools import parse_obj_as + +from docarray.typing.tensor.video import VideoNdArray +from docarray.typing.url.any_url import AnyUrl + +if TYPE_CHECKING: + from pydantic import BaseConfig + from pydantic.fields import ModelField + + from docarray.proto import NodeProto + +T = TypeVar('T', bound='VideoUrl') + +VIDEO_FILE_FORMATS = ['mp4'] + + +class VideoUrl(AnyUrl): + """ + URL to a .wav file. + Can be remote (web) URL, or a local file path. + """ + + def _to_node_protobuf(self: T) -> 'NodeProto': + """Convert Document into a NodeProto protobuf message. This function should + be called when the Document is nested into another Document that needs to + be converted into a protobuf + :return: the nested item protobuf message + """ + from docarray.proto import NodeProto + + return NodeProto(video_url=str(self)) + + @classmethod + def validate( + cls: Type[T], + value: Union[T, np.ndarray, Any], + field: 'ModelField', + config: 'BaseConfig', + ) -> T: + url = super().validate(value, field, config) + has_video_extension = any(ext in url for ext in VIDEO_FILE_FORMATS) + if not has_video_extension: + raise ValueError( + f'Video URL must have one of the following extensions:' + f'{VIDEO_FILE_FORMATS}' + ) + return cls(str(url), scheme=None) + + def load( + self: T, only_keyframes: bool = False, **kwargs + ) -> Union[VideoNdArray, Tuple[VideoNdArray, VideoNdArray]]: + """ + Load the data from the url into a numpy.ndarray. + + + + :param only_keyframes: if True keep only the keyframes, if False keep all frames + and store the indices of the keyframes in :attr:`.tags` + :param kwargs: supports all keyword arguments that are being supported by + av.open() as described in: + https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open + :return: np.ndarray representing the audio file content, list of key frame + indices if only_keyframe False. + """ + import av + + with av.open(self, **kwargs) as container: + if only_keyframes: + stream = container.streams.video[0] + stream.codec_context.skip_frame = 'NONKEY' + + frames = [] + keyframe_indices = [] + for i, frame in enumerate(container.decode(video=0)): + + img = frame.to_image() + frames.append(img) + if not only_keyframes and frame.key_frame == 1: + keyframe_indices.append(i) + + frames = parse_obj_as(VideoNdArray, np.moveaxis(np.stack(frames), 1, 2)) + + if only_keyframes: + return frames + else: + indices = parse_obj_as( + VideoNdArray, np.ndarray(keyframe_indices, dtype=np.int32) + ) + return frames, indices diff --git a/tests/units/typing/url/test_video_url.py b/tests/units/typing/url/test_video_url.py new file mode 100644 index 00000000000..39ad487e8fc --- /dev/null +++ b/tests/units/typing/url/test_video_url.py @@ -0,0 +1,108 @@ +from typing import Optional + +import numpy as np +import pytest +from pydantic.tools import parse_obj_as, schema_json_of + +from docarray import BaseDocument +from docarray.document.io.json import orjson_dumps +from docarray.typing import VideoNdArray, VideoTorchTensor, VideoUrl +from tests import TOYDATA_DIR + +LOCAL_VIDEO_FILE = str(TOYDATA_DIR / 'mov_bbb.mp4') +REMOTE_VIDEO_FILE = 'https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' # noqa: E501 + + +@pytest.mark.slow +@pytest.mark.internet +@pytest.mark.parametrize( + 'file_url', + [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE], +) +def test_load_with_only_keyframes_false(file_url): + url = parse_obj_as(VideoUrl, file_url) + tensor, indices = url.load(only_keyframes=False) + + assert isinstance(tensor, np.ndarray) + assert isinstance(tensor, VideoNdArray) + + assert isinstance(indices, np.ndarray) + assert isinstance(indices, VideoNdArray) + + +@pytest.mark.slow +@pytest.mark.internet +@pytest.mark.parametrize( + 'file_url', + [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE], +) +def test_load_with_only_keyframes_true(file_url): + url = parse_obj_as(VideoUrl, file_url) + tensor = url.load(only_keyframes=True) + + assert isinstance(tensor, np.ndarray) + assert isinstance(tensor, VideoNdArray) + + +@pytest.mark.slow +@pytest.mark.internet +@pytest.mark.parametrize( + 'file_url', + [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE], +) +def test_load_video_url_to_video_torch_tensor_field(file_url): + class MyVideoDoc(BaseDocument): + video_url: VideoUrl + tensor: Optional[VideoTorchTensor] + + doc = MyVideoDoc(video_url=file_url) + doc.tensor = doc.video_url.load(only_keyframes=True) + + assert isinstance(doc.tensor, np.ndarray) + assert isinstance(doc.tensor, VideoNdArray) + + +def test_json_schema(): + schema_json_of(VideoUrl) + + +def test_dump_json(): + url = parse_obj_as(VideoUrl, REMOTE_VIDEO_FILE) + orjson_dumps(url) + + +@pytest.mark.parametrize( + 'path_to_file', + [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE], +) +def test_validation(path_to_file): + url = parse_obj_as(VideoUrl, path_to_file) + assert isinstance(url, VideoUrl) + assert isinstance(url, str) + + +@pytest.mark.parametrize( + 'path_to_file', + [ + 'illegal', + 'https://www.google.com', + 'my/local/text/file.txt', + 'my/local/text/file.png', + 'my/local/file.mp3', + ], +) +def test_illegal_validation(path_to_file): + with pytest.raises(ValueError, match='VideoUrl'): + parse_obj_as(VideoUrl, path_to_file) + + +@pytest.mark.slow +@pytest.mark.internet +@pytest.mark.parametrize( + 'file_url', + [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE], +) +def test_proto_video_url(file_url): + uri = parse_obj_as(VideoUrl, file_url) + proto = uri._to_node_protobuf() + assert str(proto).startswith('video_url') From dc957d19bc20bf7072f0dda8591b5de1d668406e Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 4 Jan 2023 10:27:31 +0100 Subject: [PATCH 03/26] feat: add video torch tensor and tests Signed-off-by: anna-charlotte --- docarray/__init__.py | 3 +- docarray/predefined_document/__init__.py | 3 +- docarray/predefined_document/video.py | 31 +++++++ docarray/typing/tensor/video/__init__.py | 12 +++ .../tensor/video/abstract_video_tensor.py | 65 ++++++++++++++ docarray/typing/tensor/video/video_ndarray.py | 54 ++++-------- docarray/typing/tensor/video/video_tensor.py | 13 +++ .../typing/tensor/video/video_torch_tensor.py | 43 +++++++++ docarray/typing/url/video_url.py | 13 ++- .../predefined_document/test_video.py | 43 +++++++++ .../units/typing/tensor/test_video_tensor.py | 87 +++++++++++++++++++ 11 files changed, 323 insertions(+), 44 deletions(-) create mode 100644 docarray/predefined_document/video.py create mode 100644 docarray/typing/tensor/video/__init__.py create mode 100644 docarray/typing/tensor/video/abstract_video_tensor.py create mode 100644 docarray/typing/tensor/video/video_tensor.py create mode 100644 docarray/typing/tensor/video/video_torch_tensor.py create mode 100644 tests/integrations/predefined_document/test_video.py create mode 100644 tests/units/typing/tensor/test_video_tensor.py diff --git a/docarray/__init__.py b/docarray/__init__.py index f5a2e8f7893..f54c3ad460d 100644 --- a/docarray/__init__.py +++ b/docarray/__init__.py @@ -2,7 +2,7 @@ from docarray.array.array import DocumentArray from docarray.document.document import BaseDocument -from docarray.predefined_document import Audio, Image, Mesh3D, PointCloud3D, Text +from docarray.predefined_document import Audio, Image, Mesh3D, PointCloud3D, Text, Video __all__ = [ 'BaseDocument', @@ -12,4 +12,5 @@ 'Text', 'Mesh3D', 'PointCloud3D', + 'Video', ] diff --git a/docarray/predefined_document/__init__.py b/docarray/predefined_document/__init__.py index cf67088fc2c..6dcec5276a6 100644 --- a/docarray/predefined_document/__init__.py +++ b/docarray/predefined_document/__init__.py @@ -3,5 +3,6 @@ from docarray.predefined_document.mesh import Mesh3D from docarray.predefined_document.point_cloud import PointCloud3D from docarray.predefined_document.text import Text +from docarray.predefined_document.video import Video -__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D'] +__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D', 'Video'] diff --git a/docarray/predefined_document/video.py b/docarray/predefined_document/video.py new file mode 100644 index 00000000000..0536a543d1b --- /dev/null +++ b/docarray/predefined_document/video.py @@ -0,0 +1,31 @@ +from typing import Optional, TypeVar + +from docarray.document import BaseDocument +from docarray.typing import AnyTensor, Embedding +from docarray.typing.tensor.video.video_tensor import VideoTensor +from docarray.typing.url.video_url import VideoUrl + +T = TypeVar('T', bound='Video') + + +class Video(BaseDocument): + """ + Document for handling video. + The Video Document can contain a VideoUrl (`Video.url`), a VideoTensor + (`Video.tensor`), an AnyTensor ('Video.key_frame_indices), and an Embedding + (`Video.embedding`). + + EXAMPLE USAGE: + + You can use this Document directly: + + You can extend this Document: + + You can use this Document for composition: + + """ + + url: Optional[VideoUrl] + tensor: Optional[VideoTensor] + key_frame_indices: Optional[AnyTensor] + embedding: Optional[Embedding] diff --git a/docarray/typing/tensor/video/__init__.py b/docarray/typing/tensor/video/__init__.py new file mode 100644 index 00000000000..b2fb90cd1e5 --- /dev/null +++ b/docarray/typing/tensor/video/__init__.py @@ -0,0 +1,12 @@ +from docarray.typing.tensor.video.video_ndarray import VideoNdArray + +__all__ = ['VideoNdArray'] + +try: + import torch # noqa: F401 +except ImportError: + pass +else: + from docarray.typing.tensor.video.video_torch_tensor import VideoTorchTensor # noqa + + __all__.extend(['VideoTorchTensor']) diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/abstract_video_tensor.py new file mode 100644 index 00000000000..f9134037710 --- /dev/null +++ b/docarray/typing/tensor/video/abstract_video_tensor.py @@ -0,0 +1,65 @@ +from abc import ABC, abstractmethod +from typing import BinaryIO, Dict, Generator, Optional, Tuple, Type, TypeVar, Union + +import numpy as np + +from docarray.typing.tensor.abstract_tensor import AbstractTensor + +T = TypeVar('T', bound='AbstractVideoTensor') + + +class AbstractVideoTensor(AbstractTensor, ABC): + @abstractmethod + def to_numpy(self) -> np.ndarray: + """ + Convert video tensor to numpy.ndarray. + """ + ... + + def save_to_file( + self: 'T', + file_path: Union[str, BinaryIO], + frame_rate: int = 30, + codec: str = 'h264', + ) -> None: + """ + Save video tensor to a .wav file. Mono/stereo is preserved. + + + :param file_path: path to a .wav file. If file is a string, open the file by + that name, otherwise treat it as a file-like object. + :param frame_rate: frames per second. + :param codec: the name of a decoder/encoder. + """ + np_tensor = self.to_numpy() + + video_tensor = np.moveaxis(np.clip(np_tensor, 0, 255), 1, 2).astype('uint8') + + import av + + with av.open(file_path, mode='w') as container: + stream = container.add_stream(codec, rate=frame_rate) + stream.width = np_tensor.shape[1] + stream.height = np_tensor.shape[2] + stream.pix_fmt = 'yuv420p' + + for b in video_tensor: + frame = av.VideoFrame.from_ndarray(b, format='rgb24') + for packet in stream.encode(frame): + container.mux(packet) + + for packet in stream.encode(): + container.mux(packet) + + @classmethod + def generator_from_webcam( + cls: Type['T'], + height_width: Optional[Tuple[int, int]] = None, + show_window: bool = True, + window_title: str = 'webcam', + fps: int = 30, + exit_key: int = 27, + exit_event=None, + tags: Optional[Dict] = None, + ) -> Generator['T', None, None]: + ... diff --git a/docarray/typing/tensor/video/video_ndarray.py b/docarray/typing/tensor/video/video_ndarray.py index f44b332770b..5362bb05dc1 100644 --- a/docarray/typing/tensor/video/video_ndarray.py +++ b/docarray/typing/tensor/video/video_ndarray.py @@ -1,4 +1,4 @@ -from typing import TypeVar +from typing import TYPE_CHECKING, Any, List, Tuple, Type, TypeVar, Union import numpy as np @@ -7,53 +7,37 @@ T = TypeVar('T', bound='VideoNdArray') +if TYPE_CHECKING: + from pydantic import BaseConfig + from pydantic.fields import ModelField + class VideoNdArray(AbstractVideoTensor, NdArray): """ Subclass of NdArray, to represent a video tensor. - - Additionally, this allows storing such a tensor as a .wav audio file. + Adds video-specific features to the tensor. EXAMPLE USAGE - .. code-block:: python - - from typing import Optional - from pydantic import parse_obj_as - from docarray import Document - from docarray.typing import AudioNdArray, AudioUrl - import numpy as np - - - class MyAudioDoc(Document): - title: str - audio_tensor: Optional[AudioNdArray] - url: Optional[AudioUrl] - - - # from tensor - doc_1 = MyAudioDoc( - title='my_first_audio_doc', - audio_tensor=np.random.rand(1000, 2), - ) - doc_1.audio_tensor.save_to_wav_file(file_path='path/to/file_1.wav') - # from url - doc_2 = MyAudioDoc( - title='my_second_audio_doc', - url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav', - ) - doc_2.audio_tensor = parse_obj_as(AudioNdArray, doc_2.url.load()) - doc_2.audio_tensor.save_to_wav_file(file_path='path/to/file_2.wav') """ _PROTO_FIELD_NAME = 'video_ndarray' - def check_shape(self) -> None: - if self.ndim != 4 or self.shape[-1] != 3 or self.dtype != np.uint8: + @classmethod + def validate( + cls: Type[T], + value: Union[T, np.ndarray, List[Any], Tuple[Any], Any], + field: 'ModelField', + config: 'BaseConfig', + ) -> T: + array = super().validate(value=value, field=field, config=config) + if array.ndim not in [3, 4] or array.shape[-1] != 3: raise ValueError( - f'expects `` with dtype=uint8 and ndim=4 and the last dimension is 3, ' - f'but receiving {self.shape} in {self.dtype}' + f'Expects tensor with 3 or 4 dimensions and the last dimension equal' + f' to 3, but received {array.shape} in {array.dtype}' ) + else: + return array def to_numpy(self) -> np.ndarray: return self diff --git a/docarray/typing/tensor/video/video_tensor.py b/docarray/typing/tensor/video/video_tensor.py new file mode 100644 index 00000000000..ddf8cad3ee6 --- /dev/null +++ b/docarray/typing/tensor/video/video_tensor.py @@ -0,0 +1,13 @@ +from typing import Union + +from docarray.typing.tensor.video.video_ndarray import VideoNdArray + +try: + import torch # noqa: F401 +except ImportError: + VideoTensor = VideoNdArray + +else: + from docarray.typing.tensor.video.video_torch_tensor import VideoTorchTensor + + VideoTensor = Union[VideoNdArray, VideoTorchTensor] # type: ignore diff --git a/docarray/typing/tensor/video/video_torch_tensor.py b/docarray/typing/tensor/video/video_torch_tensor.py new file mode 100644 index 00000000000..0bc755f8467 --- /dev/null +++ b/docarray/typing/tensor/video/video_torch_tensor.py @@ -0,0 +1,43 @@ +from typing import TYPE_CHECKING, Any, List, Tuple, Type, TypeVar, Union + +import numpy as np + +from docarray.typing.tensor.torch_tensor import TorchTensor, metaTorchAndNode +from docarray.typing.tensor.video.abstract_video_tensor import AbstractVideoTensor + +T = TypeVar('T', bound='VideoTorchTensor') + +if TYPE_CHECKING: + from pydantic import BaseConfig + from pydantic.fields import ModelField + + +class VideoTorchTensor(AbstractVideoTensor, TorchTensor, metaclass=metaTorchAndNode): + """ + Subclass of TorchTensor, to represent a video tensor. + Adds video-specific features to the tensor. + + EXAMPLE USAGE + + """ + + _PROTO_FIELD_NAME = 'video_torch_tensor' + + @classmethod + def validate( + cls: Type[T], + value: Union[T, np.ndarray, List[Any], Tuple[Any], Any], + field: 'ModelField', + config: 'BaseConfig', + ) -> T: + tensor = super().validate(value=value, field=field, config=config) + if tensor.ndim not in [3, 4] or tensor.shape[-1] != 3: + raise ValueError( + f'Expects tensor with 3 or 4 dimensions and the last dimension equal ' + f'to 3, but received {tensor.shape} in {tensor.dtype}' + ) + else: + return tensor + + def to_numpy(self) -> np.ndarray: + return self.cpu().detach().numpy() diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index bcfdec16191..877a17536b3 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -50,15 +50,17 @@ def validate( return cls(str(url), scheme=None) def load( - self: T, only_keyframes: bool = False, **kwargs - ) -> Union[VideoNdArray, Tuple[VideoNdArray, VideoNdArray]]: + self: T, only_keyframes: bool = False, dtype: str = 'int32', **kwargs + ) -> Union[VideoNdArray, Tuple[VideoNdArray, np.ndarray]]: """ - Load the data from the url into a numpy.ndarray. + Load the data from the url into a VideoNdArray or Tuple of VideoNdArray and + np.ndarray. :param only_keyframes: if True keep only the keyframes, if False keep all frames and store the indices of the keyframes in :attr:`.tags` + :param dtype: Data-type of the returned array; default: int32. :param kwargs: supports all keyword arguments that are being supported by av.open() as described in: https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open @@ -86,7 +88,4 @@ def load( if only_keyframes: return frames else: - indices = parse_obj_as( - VideoNdArray, np.ndarray(keyframe_indices, dtype=np.int32) - ) - return frames, indices + return frames, np.ndarray(keyframe_indices, dtype=dtype) diff --git a/tests/integrations/predefined_document/test_video.py b/tests/integrations/predefined_document/test_video.py new file mode 100644 index 00000000000..77acdab4cd1 --- /dev/null +++ b/tests/integrations/predefined_document/test_video.py @@ -0,0 +1,43 @@ +import os + +import numpy as np +import pytest + +from docarray import Video +from docarray.typing import VideoNdArray +from tests import TOYDATA_DIR + +LOCAL_VIDEO_FILE = str(TOYDATA_DIR / 'mov_bbb.mp4') +REMOTE_VIDEO_FILE = 'https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' # noqa: E501 + + +@pytest.mark.slow +@pytest.mark.internet +@pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE]) +def test_video(file_url): + video = Video(url=file_url) + video.tensor, video.key_frame_indices = video.url.load() + + assert isinstance(video.tensor, np.ndarray) + assert isinstance(video.tensor, VideoNdArray) + assert isinstance(video.key_frame_indices, np.ndarray) + + +@pytest.mark.slow +@pytest.mark.internet +@pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE]) +def test_save_video_ndarray(file_url, tmpdir): + tmp_file = str(tmpdir / 'tmp.mp4') + + video = Video(url=file_url) + video.tensor, _ = video.url.load() + + assert isinstance(video.tensor, np.ndarray) + assert isinstance(video.tensor, VideoNdArray) + + video.tensor.save_to_file(tmp_file) + assert os.path.isfile(tmp_file) + + video_from_file = Video(url=tmp_file) + video_from_file.tensor = video_from_file.url.load() + assert np.allclose(video.tensor, video_from_file.tensor) diff --git a/tests/units/typing/tensor/test_video_tensor.py b/tests/units/typing/tensor/test_video_tensor.py new file mode 100644 index 00000000000..bbc94ddaf4d --- /dev/null +++ b/tests/units/typing/tensor/test_video_tensor.py @@ -0,0 +1,87 @@ +import os + +import numpy as np +import pytest +import torch +from pydantic.tools import parse_obj_as + +from docarray import BaseDocument +from docarray.typing import VideoNdArray, VideoTorchTensor + + +@pytest.mark.parametrize( + 'tensor,cls_video_tensor,cls_tensor', + [ + (torch.zeros(1, 224, 224, 3), VideoTorchTensor, torch.Tensor), + (np.zeros((1, 224, 224, 3)), VideoNdArray, np.ndarray), + ], +) +def test_set_video_tensor(tensor, cls_video_tensor, cls_tensor): + class MyVideoDoc(BaseDocument): + tensor: cls_video_tensor + + doc = MyVideoDoc(tensor=tensor) + + assert isinstance(doc.tensor, cls_video_tensor) + assert isinstance(doc.tensor, cls_tensor) + assert (doc.tensor == tensor).all() + + +@pytest.mark.parametrize( + 'cls_tensor,tensor', + [ + (VideoNdArray, np.zeros((1, 224, 224, 3))), + (VideoTorchTensor, torch.zeros(1, 224, 224, 3)), + (VideoTorchTensor, np.zeros((1, 224, 224, 3))), + ], +) +def test_validation(cls_tensor, tensor): + arr = parse_obj_as(cls_tensor, tensor) + assert isinstance(arr, cls_tensor) + + +@pytest.mark.parametrize( + 'cls_tensor,tensor', + [ + (VideoNdArray, torch.zeros(1, 224, 224, 3)), + (VideoTorchTensor, torch.zeros(224, 3)), + (VideoTorchTensor, torch.zeros(1, 224, 224, 100)), + (VideoNdArray, 'hello'), + (VideoTorchTensor, 'hello'), + ], +) +def test_illegal_validation(cls_tensor, tensor): + match = str(cls_tensor).split('.')[-1][:-2] + with pytest.raises(ValueError, match=match): + parse_obj_as(cls_tensor, tensor) + + +@pytest.mark.parametrize( + 'cls_tensor,tensor,proto_key', + [ + ( + VideoTorchTensor, + torch.zeros(1, 224, 224, 3), + VideoTorchTensor._PROTO_FIELD_NAME, + ), + (VideoNdArray, np.zeros((1, 224, 224, 3)), VideoNdArray._PROTO_FIELD_NAME), + ], +) +def test_proto_tensor(cls_tensor, tensor, proto_key): + tensor = parse_obj_as(cls_tensor, tensor) + proto = tensor._to_node_protobuf() + assert str(proto).startswith(proto_key) + + +@pytest.mark.parametrize( + 'cls_tensor,tensor', + [ + (VideoTorchTensor, torch.zeros(1, 224, 224, 3)), + (VideoNdArray, np.zeros((1, 224, 224, 3))), + ], +) +def test_save_video_tensor_to_file(cls_tensor, tensor, tmpdir): + tmp_file = str(tmpdir / 'tmp.mp4') + video_tensor = parse_obj_as(cls_tensor, tensor) + video_tensor.save_to_file(tmp_file) + assert os.path.isfile(tmp_file) From fc869203c3c62c3328ec7d48b9a138e848e155e0 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 4 Jan 2023 10:51:39 +0100 Subject: [PATCH 04/26] fix: mypy checks Signed-off-by: anna-charlotte --- .../typing/tensor/video/abstract_video_tensor.py | 15 +-------------- docarray/typing/url/video_url.py | 8 +++++--- pyproject.toml | 4 ++++ 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/abstract_video_tensor.py index f9134037710..4cb5d7be9b2 100644 --- a/docarray/typing/tensor/video/abstract_video_tensor.py +++ b/docarray/typing/tensor/video/abstract_video_tensor.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import BinaryIO, Dict, Generator, Optional, Tuple, Type, TypeVar, Union +from typing import BinaryIO, TypeVar, Union import numpy as np @@ -50,16 +50,3 @@ def save_to_file( for packet in stream.encode(): container.mux(packet) - - @classmethod - def generator_from_webcam( - cls: Type['T'], - height_width: Optional[Tuple[int, int]] = None, - show_window: bool = True, - window_title: str = 'webcam', - fps: int = 30, - exit_key: int = 27, - exit_event=None, - tags: Optional[Dict] = None, - ) -> Generator['T', None, None]: - ... diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index 877a17536b3..e424c1d5935 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -83,9 +83,11 @@ def load( if not only_keyframes and frame.key_frame == 1: keyframe_indices.append(i) - frames = parse_obj_as(VideoNdArray, np.moveaxis(np.stack(frames), 1, 2)) + frames_vid: VideoNdArray = parse_obj_as( + VideoNdArray, np.moveaxis(np.stack(frames), 1, 2) + ) if only_keyframes: - return frames + return frames_vid else: - return frames, np.ndarray(keyframe_indices, dtype=dtype) + return frames_vid, np.ndarray(keyframe_indices, dtype=dtype) diff --git a/pyproject.toml b/pyproject.toml index 1d29532b696..b2ff89bef7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,10 @@ exclude = ['docarray/proto'] plugins = "pydantic.mypy" check_untyped_defs = true +[[tool.mypy.overrides]] +module = "av" +ignore_missing_imports = true + [[tool.mypy.overrides]] module = "trimesh" ignore_missing_imports = true From 8a55e0b99dcb475bb99c8dc88bd4dcf3ed267965 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 4 Jan 2023 12:17:23 +0100 Subject: [PATCH 05/26] chore: add av to video extra Signed-off-by: anna-charlotte --- poetry.lock | 57 +++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 ++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 5996317fe67..1e95b3fa905 100644 --- a/poetry.lock +++ b/poetry.lock @@ -90,6 +90,14 @@ docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"] tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] +[[package]] +name = "av" +version = "10.0.0" +description = "Pythonic bindings for FFmpeg's libraries." +category = "main" +optional = false +python-versions = "*" + [[package]] name = "babel" version = "2.11.0" @@ -1668,11 +1676,12 @@ common = ["protobuf"] image = ["pillow", "types-pillow"] mesh = ["trimesh"] torch = ["torch"] +video = ["av"] [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "b1aa40aea6ec7f56a8c3b511fd2ce96ed217c6fc81d6f8dd931e519cc0774154" +content-hash = "1856e2f5fdadf5b4cbd1100c6593ac71b72323aa1a4704bf40235265019f3424" [metadata.files] anyio = [ @@ -1721,6 +1730,52 @@ attrs = [ {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, ] +av = [ + {file = "av-10.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d19bb54197155d045a2b683d993026d4bcb06e31c2acad0327e3e8711571899c"}, + {file = "av-10.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7dba96a85cd37315529998e6dbbe3fa05c2344eb19a431dc24996be030a904ee"}, + {file = "av-10.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27d6d38c7c8d46d578c008ffcb8aad1eae14d0621fff41f4ad62395589045fe4"}, + {file = "av-10.0.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:51037f4bde03daf924236af4f444e17345792ad7f6f70760a5e5863407e14f2b"}, + {file = "av-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0577a38664e453b4ffb63d616a0d23c295827b16ae96a090e89527a753de8718"}, + {file = "av-10.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:07c971573035d22ce50069d3f2bbdb4d6d02d626ab13db12fda3ce519cda3f22"}, + {file = "av-10.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e5085d11345484c0097898994bb3f515002e7e1deeb43dd11d30dd6f45402c49"}, + {file = "av-10.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:157bde3ffd1615a9006b56e4daf3b46848d3ee2bd46b0394f7568e43ed7ab5a9"}, + {file = "av-10.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:115e144d5a1f205378a4b3a3657b7ed3e45918ebe5d2003a891e45984e8f443a"}, + {file = "av-10.0.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a7d6e2b3fbda6464f74fe010dbcff361394bb014b0cb4aa4dc9f2bb713ce882"}, + {file = "av-10.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69fd5a38395191a0f4b71adf31057ff177c9f0762914d73d8797742339ad67d0"}, + {file = "av-10.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:836d69a9543d284976b229cc8d4343ffcfc0bbaf05239e13fb7e613b13d5291d"}, + {file = "av-10.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:eba192274538617bbe60097a013d83637f1a5ba9844bbbcf3ca7e43c6499b9d5"}, + {file = "av-10.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1301e4cf1a2c899851073720cd541066c8539b64f9eb0d52216f8d0a59f20429"}, + {file = "av-10.0.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eebd5aa9d8b1e33e715c5409544a712f13ec805bb0110d75f394ff28d2fb64ad"}, + {file = "av-10.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04cd0ce13a87870fb0a0ea4673f04934af2b9ac7ae844eafe92e2c19c092ab11"}, + {file = "av-10.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:10facb5b933551dd6a30d8015bc91eef5d1c864ee86aa3463ffbaff1a99f6c6a"}, + {file = "av-10.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:088636ded03724a2ab51136f6f4be0bc457bdb3c0d2ac7158792fe81150d4c1a"}, + {file = "av-10.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ff0f7d3b1003a9ed0d06038f3f521a5ff0d3e056ec5111e2a78e303f98b815a7"}, + {file = "av-10.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccaf786e747b126a5b3b9a8f5ffbb6a20c5f528775cc7084c95732ca72606fba"}, + {file = "av-10.0.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c579d718b52beb812ea2a7bd68f812d0920b00937804d52d31d41bb71aa5557"}, + {file = "av-10.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2cfd39baa5d82768d2a8898de7bfd450a083ef22b837d57e5dc1b6de3244218"}, + {file = "av-10.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:81b5264d9752f49286bc1dc4d2cc66187418c4948a326dbed837c766c9892139"}, + {file = "av-10.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:16bd82b63d0b4c1b855b3c36b13337f7cdc5925bd8284fab893bdf6c290fc3a9"}, + {file = "av-10.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a6c8f3f8c26d35eefe45b849c81fd0816ba4b6f589baec7357c25b4c5537d3c4"}, + {file = "av-10.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91ea46fea7259abdfabe00b0ed3a9ca18e7fff7ce80d2a2c66a28f797cce838a"}, + {file = "av-10.0.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a62edd533d330aa61902ae8cd82966affa487fa337a0c4f58ae8866ccb5d31c0"}, + {file = "av-10.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b67b7d028c9cf68215376662fd2e0be6ca0cc02d32d3ed8514fec67b12db9cbd"}, + {file = "av-10.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:0f9c88062ebfd2ce547c522b64f79e487ed2b0a6a9d6693c801b28df0d944607"}, + {file = "av-10.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:63dbafcd02415127d97509523bc285f1ab260988f87b744d7fb1baee6ffbdf96"}, + {file = "av-10.0.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2ea4424d0be62fe18c843420284a0907bcb38d577062d62c4b75a8e940e6057"}, + {file = "av-10.0.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8b6326fd0755761e3ee999e4bf90339e869fe71d548b679fee89157858b8d04a"}, + {file = "av-10.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3fae238751ec0db6377b2106e13762ca84dbe104bd44c1ce9b424163aef4ab5"}, + {file = "av-10.0.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:86bb3f6e8cce62ad18cd34eb2eadd091d99f51b40be81c929b53fbd8fecf6d90"}, + {file = "av-10.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f7b508813abbc100162d305a1ac9b2dd16e5128d56f2ac69639fc6a4b5aca69e"}, + {file = "av-10.0.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98cc376199c0aa6e9365d03e0f4e67cfb209e40fe9c0cf566372f9daf2a0c779"}, + {file = "av-10.0.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b459ca0ef25c1a0e370112556bdc5b7752f76dc9bd497acaf3e653171e4b946"}, + {file = "av-10.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab930735112c1f788cc4d47c42c59ba0dd214d815aa906e1addf39af91d15194"}, + {file = "av-10.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:13fe0b48b9211539323ecebbf84154c86c72d16723c6d0af76e29ae5c3a614b2"}, + {file = "av-10.0.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2eeec7beaebfe9e2213b3c94b482381187d0afdcb632f93239b44dc668b97df"}, + {file = "av-10.0.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dac2a8b0791c3373270e32f6cd27e6b60628565a188e40a5d9660d3aab05e33"}, + {file = "av-10.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1cdede2325cb750b5bf79238bbf06f9c2a70b757b12726003769a43493b7233a"}, + {file = "av-10.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:9788e6e15db0910fb8e1548ba7540799d07066177710590a5794a524c4910e05"}, + {file = "av-10.0.0.tar.gz", hash = "sha256:8afd3d5610e1086f3b2d8389d66672ea78624516912c93612de64dcaa4c67e05"}, +] babel = [ {file = "Babel-2.11.0-py3-none-any.whl", hash = "sha256:1ad3eca1c885218f6dce2ab67291178944f810a10a9b5f3cb8382a5a232b64fe"}, {file = "Babel-2.11.0.tar.gz", hash = "sha256:5ef4b3226b0180dedded4229651c8b0e1a3a6a2837d45a073272f313e4cf97f6"}, diff --git a/pyproject.toml b/pyproject.toml index b2ff89bef7d..2ed336c0bcb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,11 +17,13 @@ types-pillow = {version = "^9.3.0.1", optional = true } trimesh = {version = "^3.17.1", optional = true} typing-inspect = "^0.8.0" types-requests = "^2.28.11.6" +av = "^10.0.0" [tool.poetry.extras] common = ["protobuf"] torch = ["torch"] image = ["pillow", "types-pillow"] +video = ["av"] mesh = ["trimesh"] [tool.poetry.dev-dependencies] From 5cb098a33ed965839c6bc86eede8f3f15f0137b9 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 4 Jan 2023 15:16:34 +0100 Subject: [PATCH 06/26] fix: allow dim 3 Signed-off-by: anna-charlotte --- docarray/typing/tensor/video/abstract_video_tensor.py | 9 ++++++--- docarray/typing/url/video_url.py | 9 +++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/abstract_video_tensor.py index 4cb5d7be9b2..2751f393278 100644 --- a/docarray/typing/tensor/video/abstract_video_tensor.py +++ b/docarray/typing/tensor/video/abstract_video_tensor.py @@ -33,7 +33,7 @@ def save_to_file( """ np_tensor = self.to_numpy() - video_tensor = np.moveaxis(np.clip(np_tensor, 0, 255), 1, 2).astype('uint8') + video_tensor = np.moveaxis(np.clip(np_tensor, 0, 255), -3, -2).astype('uint8') import av @@ -43,8 +43,11 @@ def save_to_file( stream.height = np_tensor.shape[2] stream.pix_fmt = 'yuv420p' - for b in video_tensor: - frame = av.VideoFrame.from_ndarray(b, format='rgb24') + if video_tensor.ndim == 3: + video_tensor = np.expand_dims(video_tensor, axis=0) + + for vid in video_tensor: + frame = av.VideoFrame.from_ndarray(vid) for packet in stream.encode(frame): container.mux(packet) diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index e424c1d5935..0c705af5917 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -50,7 +50,7 @@ def validate( return cls(str(url), scheme=None) def load( - self: T, only_keyframes: bool = False, dtype: str = 'int32', **kwargs + self: T, only_keyframes: bool = False, **kwargs ) -> Union[VideoNdArray, Tuple[VideoNdArray, np.ndarray]]: """ Load the data from the url into a VideoNdArray or Tuple of VideoNdArray and @@ -60,7 +60,6 @@ def load( :param only_keyframes: if True keep only the keyframes, if False keep all frames and store the indices of the keyframes in :attr:`.tags` - :param dtype: Data-type of the returned array; default: int32. :param kwargs: supports all keyword arguments that are being supported by av.open() as described in: https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open @@ -83,11 +82,9 @@ def load( if not only_keyframes and frame.key_frame == 1: keyframe_indices.append(i) - frames_vid: VideoNdArray = parse_obj_as( - VideoNdArray, np.moveaxis(np.stack(frames), 1, 2) - ) + frames_vid = parse_obj_as(VideoNdArray, np.moveaxis(np.stack(frames), -3, -2)) if only_keyframes: return frames_vid else: - return frames_vid, np.ndarray(keyframe_indices, dtype=dtype) + return frames_vid, np.ndarray(keyframe_indices) From 3ba1f788101e061f2b91b9a2b6f398186bef1632 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 5 Jan 2023 14:10:11 +0100 Subject: [PATCH 07/26] test: wip video load and save Signed-off-by: anna-charlotte --- .../tensor/video/abstract_video_tensor.py | 25 ++++----- docarray/typing/url/video_url.py | 11 ++-- .../predefined_document/test_video.py | 53 +++++++++++++++---- 3 files changed, 59 insertions(+), 30 deletions(-) diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/abstract_video_tensor.py index 2751f393278..5b89ca2368f 100644 --- a/docarray/typing/tensor/video/abstract_video_tensor.py +++ b/docarray/typing/tensor/video/abstract_video_tensor.py @@ -19,37 +19,34 @@ def to_numpy(self) -> np.ndarray: def save_to_file( self: 'T', file_path: Union[str, BinaryIO], - frame_rate: int = 30, + frame_rate: int = 24, codec: str = 'h264', ) -> None: """ - Save video tensor to a .wav file. Mono/stereo is preserved. + Save video tensor to a .mp4 file. - - :param file_path: path to a .wav file. If file is a string, open the file by + :param file_path: path to a .mp4 file. If file is a string, open the file by that name, otherwise treat it as a file-like object. :param frame_rate: frames per second. :param codec: the name of a decoder/encoder. """ np_tensor = self.to_numpy() - - video_tensor = np.moveaxis(np.clip(np_tensor, 0, 255), -3, -2).astype('uint8') - + print(f"np_tensor[0][:2] = {np_tensor[0][:2]}") + video_tensor = np_tensor.astype('uint8') import av with av.open(file_path, mode='w') as container: - stream = container.add_stream(codec, rate=frame_rate) - stream.width = np_tensor.shape[1] - stream.height = np_tensor.shape[2] - stream.pix_fmt = 'yuv420p' - if video_tensor.ndim == 3: video_tensor = np.expand_dims(video_tensor, axis=0) + stream = container.add_stream(codec, rate=frame_rate) + stream.height = video_tensor.shape[-3] + stream.width = video_tensor.shape[-2] + for vid in video_tensor: - frame = av.VideoFrame.from_ndarray(vid) + frame = av.VideoFrame.from_ndarray(vid, format='rgb24') for packet in stream.encode(frame): container.mux(packet) - for packet in stream.encode(): + for packet in stream.encode(None): container.mux(packet) diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index 0c705af5917..932112d8bcd 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -56,8 +56,6 @@ def load( Load the data from the url into a VideoNdArray or Tuple of VideoNdArray and np.ndarray. - - :param only_keyframes: if True keep only the keyframes, if False keep all frames and store the indices of the keyframes in :attr:`.tags` :param kwargs: supports all keyword arguments that are being supported by @@ -75,16 +73,17 @@ def load( frames = [] keyframe_indices = [] + for i, frame in enumerate(container.decode(video=0)): - img = frame.to_image() - frames.append(img) + frame_np = frame.to_ndarray(format='rgb24') + frames.append(frame_np) if not only_keyframes and frame.key_frame == 1: keyframe_indices.append(i) - frames_vid = parse_obj_as(VideoNdArray, np.moveaxis(np.stack(frames), -3, -2)) + frames_vid = parse_obj_as(VideoNdArray, np.stack(frames)) if only_keyframes: return frames_vid else: - return frames_vid, np.ndarray(keyframe_indices) + return frames_vid, np.array(keyframe_indices) diff --git a/tests/integrations/predefined_document/test_video.py b/tests/integrations/predefined_document/test_video.py index 77acdab4cd1..f31f4ae3b88 100644 --- a/tests/integrations/predefined_document/test_video.py +++ b/tests/integrations/predefined_document/test_video.py @@ -25,19 +25,52 @@ def test_video(file_url): @pytest.mark.slow @pytest.mark.internet -@pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE]) +@pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE]) # , REMOTE_VIDEO_FILE]) def test_save_video_ndarray(file_url, tmpdir): - tmp_file = str(tmpdir / 'tmp.mp4') + tmp_file = str(TOYDATA_DIR / 'tmp.mp4') - video = Video(url=file_url) - video.tensor, _ = video.url.load() + video_1 = Video(url=file_url) + assert video_1.url == file_url - assert isinstance(video.tensor, np.ndarray) - assert isinstance(video.tensor, VideoNdArray) + video_1.tensor, _ = video_1.url.load() + assert isinstance(video_1.tensor, np.ndarray) + assert isinstance(video_1.tensor, VideoNdArray) - video.tensor.save_to_file(tmp_file) + # from PIL import Image + # Image.fromarray(video_1.tensor[0]).show() + + video_1.tensor.save_to_file(tmp_file) assert os.path.isfile(tmp_file) + print(f"video_1.tensor[0][:2] = {video_1.tensor[0][:2]}") + + video_2 = Video(url=tmp_file) + video_2.tensor, _ = video_2.url.load() + video_2.tensor.save_to_file(str(TOYDATA_DIR / 'tmp_2.mp4')) + + # video_3 = Video(url=str(tmpdir / f'tmp_2.mp4')) + # video_3.tensor, _ = video_3.url.load() + # video_3.tensor.save_to_file(str(tmpdir / f'tmp_3.mp4')) + # + # video_4 = Video(url=str(tmpdir / f'tmp_3.mp4')) + # video_4.tensor, _ = video_4.url.load() + # video_4.tensor.save_to_file(str(tmpdir / f'tmp_4.mp4')) + # + # video_5 = Video(url=str(tmpdir / f'tmp_4.mp4')) + # video_5.tensor, _ = video_5.url.load() + # video_5.tensor.save_to_file(str(tmpdir / f'tmp_5.mp4')) + # + # video_6 = Video(url=str(tmpdir / f'tmp_5.mp4')) + # video_6.tensor, _ = video_6.url.load() + # video_6.tensor.save_to_file(str(tmpdir / f'tmp_6.mp4')) + # + print(f"video_2.tensor[0][:2] = {video_2.tensor[0][:2]}") + # print(f"video_3.tensor[0][:2] = {video_3.tensor[0][:2]}") + # print(f"video_4.tensor[0][:2] = {video_3.tensor[0][:2]}") + # print(f"video_5.tensor[0][:2] = {video_3.tensor[0][:2]}") + # print(f"video_6.tensor[0][:2] = {video_3.tensor[0][:2]}") - video_from_file = Video(url=tmp_file) - video_from_file.tensor = video_from_file.url.load() - assert np.allclose(video.tensor, video_from_file.tensor) + # Image.fromarray(video_1.tensor[0]).show() + assert isinstance(video_1.tensor, np.ndarray) + assert isinstance(video_1.tensor, VideoNdArray) + assert video_1.tensor.shape == video_2.tensor.shape + assert np.allclose(video_1.tensor, video_2.tensor, atol=100) From be639262a383385a6562dd708e542c08540b7d32 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Fri, 6 Jan 2023 10:11:41 +0100 Subject: [PATCH 08/26] refactor: move to numpy to computational backend Signed-off-by: anna-charlotte --- docarray/computation/abstract_comp_backend.py | 13 +++++++++++++ docarray/computation/numpy_backend.py | 4 ++++ docarray/computation/torch_backend.py | 5 +++++ .../typing/tensor/video/abstract_video_tensor.py | 12 ++---------- docarray/typing/tensor/video/video_ndarray.py | 3 --- docarray/typing/tensor/video/video_torch_tensor.py | 3 --- 6 files changed, 24 insertions(+), 16 deletions(-) diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py index a58582d658c..7420d7daf29 100644 --- a/docarray/computation/abstract_comp_backend.py +++ b/docarray/computation/abstract_comp_backend.py @@ -2,6 +2,8 @@ from abc import ABC, abstractmethod from typing import List, Optional, Tuple, TypeVar, Union +import numpy as np + # In practice all of the below will be the same type TTensor = TypeVar('TTensor') TTensorRetrieval = TypeVar('TTensorRetrieval') @@ -29,6 +31,17 @@ def stack( @staticmethod @abstractmethod def n_dim(array: 'TTensor') -> int: + """ + Get the number of the array dimensions. + """ + ... + + @staticmethod + @abstractmethod + def to_numpy(array: 'TTensor') -> np.ndarray: + """ + Convert array to np.ndarray. + """ ... class Retrieval(ABC, typing.Generic[TTensorRetrieval]): diff --git a/docarray/computation/numpy_backend.py b/docarray/computation/numpy_backend.py index d5950a70f17..51f0ea7d3bf 100644 --- a/docarray/computation/numpy_backend.py +++ b/docarray/computation/numpy_backend.py @@ -44,6 +44,10 @@ def stack( def n_dim(array: 'np.ndarray') -> int: return array.ndim + @staticmethod + def to_numpy(array: 'np.ndarray') -> np.ndarray: + return array + class Retrieval(AbstractComputationalBackend.Retrieval[np.ndarray]): """ Abstract class for retrieval and ranking functionalities diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py index 52f7ea879c3..402b36350fb 100644 --- a/docarray/computation/torch_backend.py +++ b/docarray/computation/torch_backend.py @@ -1,5 +1,6 @@ from typing import List, Optional, Tuple, Union +import numpy as np import torch from docarray.computation.abstract_comp_backend import AbstractComputationalBackend @@ -43,6 +44,10 @@ def stack( def n_dim(array: 'torch.Tensor') -> int: return array.ndim + @staticmethod + def to_numpy(array: 'torch.Tensor') -> np.ndarray: + return array.cpu().detach().numpy() + class Retrieval(AbstractComputationalBackend.Retrieval[torch.Tensor]): """ Abstract class for retrieval and ranking functionalities diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/abstract_video_tensor.py index 5b89ca2368f..0f4118a5d59 100644 --- a/docarray/typing/tensor/video/abstract_video_tensor.py +++ b/docarray/typing/tensor/video/abstract_video_tensor.py @@ -1,4 +1,4 @@ -from abc import ABC, abstractmethod +from abc import ABC from typing import BinaryIO, TypeVar, Union import numpy as np @@ -9,13 +9,6 @@ class AbstractVideoTensor(AbstractTensor, ABC): - @abstractmethod - def to_numpy(self) -> np.ndarray: - """ - Convert video tensor to numpy.ndarray. - """ - ... - def save_to_file( self: 'T', file_path: Union[str, BinaryIO], @@ -30,8 +23,7 @@ def save_to_file( :param frame_rate: frames per second. :param codec: the name of a decoder/encoder. """ - np_tensor = self.to_numpy() - print(f"np_tensor[0][:2] = {np_tensor[0][:2]}") + np_tensor = self.get_comp_backend().to_numpy(array=self) # type: ignore video_tensor = np_tensor.astype('uint8') import av diff --git a/docarray/typing/tensor/video/video_ndarray.py b/docarray/typing/tensor/video/video_ndarray.py index 5362bb05dc1..10a608ab743 100644 --- a/docarray/typing/tensor/video/video_ndarray.py +++ b/docarray/typing/tensor/video/video_ndarray.py @@ -38,6 +38,3 @@ def validate( ) else: return array - - def to_numpy(self) -> np.ndarray: - return self diff --git a/docarray/typing/tensor/video/video_torch_tensor.py b/docarray/typing/tensor/video/video_torch_tensor.py index 0bc755f8467..05f56bf792d 100644 --- a/docarray/typing/tensor/video/video_torch_tensor.py +++ b/docarray/typing/tensor/video/video_torch_tensor.py @@ -38,6 +38,3 @@ def validate( ) else: return tensor - - def to_numpy(self) -> np.ndarray: - return self.cpu().detach().numpy() From 395a495aaf09e452b9ac7e663dc1f234eaadcbd9 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 11 Jan 2023 09:36:52 +0100 Subject: [PATCH 09/26] fix: video load and save Signed-off-by: anna-charlotte --- docarray/predefined_document/video.py | 10 +++-- .../tensor/video/abstract_video_tensor.py | 45 ++++++++++++++----- docarray/typing/url/video_url.py | 38 +++++++++------- 3 files changed, 62 insertions(+), 31 deletions(-) diff --git a/docarray/predefined_document/video.py b/docarray/predefined_document/video.py index 0536a543d1b..765407f6e75 100644 --- a/docarray/predefined_document/video.py +++ b/docarray/predefined_document/video.py @@ -2,6 +2,7 @@ from docarray.document import BaseDocument from docarray.typing import AnyTensor, Embedding +from docarray.typing.tensor.audio.audio_tensor import AudioTensor from docarray.typing.tensor.video.video_tensor import VideoTensor from docarray.typing.url.video_url import VideoUrl @@ -11,9 +12,9 @@ class Video(BaseDocument): """ Document for handling video. - The Video Document can contain a VideoUrl (`Video.url`), a VideoTensor - (`Video.tensor`), an AnyTensor ('Video.key_frame_indices), and an Embedding - (`Video.embedding`). + The Video Document can contain a VideoUrl (`Video.url`), an AudioTensor + (`Video.audio_tensor`), a VideoTensor (`Video.video_tensor`), an AnyTensor + ('Video.key_frame_indices), and an Embedding (`Video.embedding`). EXAMPLE USAGE: @@ -26,6 +27,7 @@ class Video(BaseDocument): """ url: Optional[VideoUrl] - tensor: Optional[VideoTensor] + audio_tensor: Optional[AudioTensor] + video_tensor: Optional[VideoTensor] key_frame_indices: Optional[AnyTensor] embedding: Optional[Embedding] diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/abstract_video_tensor.py index 0f4118a5d59..b5d8a79b26c 100644 --- a/docarray/typing/tensor/video/abstract_video_tensor.py +++ b/docarray/typing/tensor/video/abstract_video_tensor.py @@ -1,44 +1,65 @@ from abc import ABC -from typing import BinaryIO, TypeVar, Union +from typing import BinaryIO, Optional, TypeVar, Union import numpy as np from docarray.typing.tensor.abstract_tensor import AbstractTensor +from docarray.typing.tensor.audio.audio_tensor import AudioTensor T = TypeVar('T', bound='AbstractVideoTensor') class AbstractVideoTensor(AbstractTensor, ABC): - def save_to_file( + def save_to_mp4_file( self: 'T', file_path: Union[str, BinaryIO], - frame_rate: int = 24, - codec: str = 'h264', + audio_tensor: Optional[AudioTensor] = None, + video_frame_rate: int = 30, + video_codec: str = 'h264', + audio_frame_rate: int = 48000, + audio_codec: str = 'aac', + audio_format: str = 'fltp', ) -> None: """ Save video tensor to a .mp4 file. :param file_path: path to a .mp4 file. If file is a string, open the file by that name, otherwise treat it as a file-like object. - :param frame_rate: frames per second. - :param codec: the name of a decoder/encoder. + :param video_frame_rate: frames per second. + :param video_codec: the name of a decoder/encoder. """ + import av + np_tensor = self.get_comp_backend().to_numpy(array=self) # type: ignore video_tensor = np_tensor.astype('uint8') - import av with av.open(file_path, mode='w') as container: if video_tensor.ndim == 3: video_tensor = np.expand_dims(video_tensor, axis=0) - stream = container.add_stream(codec, rate=frame_rate) - stream.height = video_tensor.shape[-3] - stream.width = video_tensor.shape[-2] + stream_video = container.add_stream(video_codec, rate=video_frame_rate) + stream_video.height = video_tensor.shape[-3] + stream_video.width = video_tensor.shape[-2] + + if audio_tensor is not None: + stream_audio = container.add_stream(audio_codec) + audio_np = audio_tensor.get_comp_backend().to_numpy(array=audio_tensor) + audio_layout = 'stereo' if audio_np.shape[-2] == 2 else 'mono' + + for i, audio in enumerate(audio_np): + frame = av.AudioFrame.from_ndarray( + array=audio, format=audio_format, layout=audio_layout + ) + frame.rate = audio_frame_rate + for packet in stream_audio.encode(frame): + container.mux(packet) for vid in video_tensor: frame = av.VideoFrame.from_ndarray(vid, format='rgb24') - for packet in stream.encode(frame): + for packet in stream_video.encode(frame): container.mux(packet) - for packet in stream.encode(None): + for packet in stream_audio.encode(None): + container.mux(packet) + for packet in stream_video.encode(None): container.mux(packet) diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index 932112d8bcd..b84e79360ff 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -3,6 +3,7 @@ import numpy as np from pydantic.tools import parse_obj_as +from docarray.typing import AudioNdArray, NdArray from docarray.typing.tensor.video import VideoNdArray from docarray.typing.url.any_url import AnyUrl @@ -50,19 +51,20 @@ def validate( return cls(str(url), scheme=None) def load( - self: T, only_keyframes: bool = False, **kwargs - ) -> Union[VideoNdArray, Tuple[VideoNdArray, np.ndarray]]: + self: T, only_keyframes: bool = False, audio_format: str = 'fltp', **kwargs + ) -> Union[VideoNdArray, Tuple[AudioNdArray, VideoNdArray, NdArray]]: """ - Load the data from the url into a VideoNdArray or Tuple of VideoNdArray and - np.ndarray. + Load the data from the url into a VideoNdArray or Tuple of AudioNdArray, + VideoNdArray and NdArray. :param only_keyframes: if True keep only the keyframes, if False keep all frames and store the indices of the keyframes in :attr:`.tags` :param kwargs: supports all keyword arguments that are being supported by av.open() as described in: https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open - :return: np.ndarray representing the audio file content, list of key frame - indices if only_keyframe False. + :return: AudioNdArray representing the audio content, VideoNdArray representing + the images of the video, NdArray of key frame indices if only_keyframe + False, else only VideoNdArray representing the keyframes. """ import av @@ -71,19 +73,25 @@ def load( stream = container.streams.video[0] stream.codec_context.skip_frame = 'NONKEY' - frames = [] + audio_frames = [] + video_frames = [] keyframe_indices = [] - for i, frame in enumerate(container.decode(video=0)): + for frame in container.decode(): + if type(frame) == av.audio.frame.AudioFrame: + audio_frames.append(frame.to_ndarray(format=audio_format)) + elif type(frame) == av.video.frame.VideoFrame: + video_frames.append(frame.to_ndarray(format='rgb24')) - frame_np = frame.to_ndarray(format='rgb24') - frames.append(frame_np) - if not only_keyframes and frame.key_frame == 1: - keyframe_indices.append(i) + if not only_keyframes and frame.key_frame == 1: + curr_index = len(video_frames) + keyframe_indices.append(curr_index) - frames_vid = parse_obj_as(VideoNdArray, np.stack(frames)) + video = parse_obj_as(VideoNdArray, np.stack(video_frames)) if only_keyframes: - return frames_vid + return video else: - return frames_vid, np.array(keyframe_indices) + audio = parse_obj_as(AudioNdArray, np.stack(audio_frames)) + indices = parse_obj_as(NdArray, keyframe_indices) + return audio, video, indices From 406ec8084b054abb4932fdeb6258a73e4a02056e Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 11 Jan 2023 09:37:55 +0100 Subject: [PATCH 10/26] test: adjust tests Signed-off-by: anna-charlotte --- .../predefined_document/test_video.py | 56 +++++++++---------- .../units/typing/tensor/test_video_tensor.py | 2 +- tests/units/typing/url/test_video_url.py | 4 +- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/tests/integrations/predefined_document/test_video.py b/tests/integrations/predefined_document/test_video.py index f31f4ae3b88..719f0dc2295 100644 --- a/tests/integrations/predefined_document/test_video.py +++ b/tests/integrations/predefined_document/test_video.py @@ -4,7 +4,7 @@ import pytest from docarray import Video -from docarray.typing import VideoNdArray +from docarray.typing import AudioNdArray, NdArray, VideoNdArray from tests import TOYDATA_DIR LOCAL_VIDEO_FILE = str(TOYDATA_DIR / 'mov_bbb.mp4') @@ -15,62 +15,62 @@ @pytest.mark.internet @pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE]) def test_video(file_url): - video = Video(url=file_url) - video.tensor, video.key_frame_indices = video.url.load() + vid = Video(url=file_url) + vid.audio_tensor, vid.video_tensor, vid.key_frame_indices = vid.url.load() - assert isinstance(video.tensor, np.ndarray) - assert isinstance(video.tensor, VideoNdArray) - assert isinstance(video.key_frame_indices, np.ndarray) + assert isinstance(vid.audio_tensor, AudioNdArray) + assert isinstance(vid.video_tensor, VideoNdArray) + assert isinstance(vid.key_frame_indices, NdArray) @pytest.mark.slow @pytest.mark.internet -@pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE]) # , REMOTE_VIDEO_FILE]) +@pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE]) def test_save_video_ndarray(file_url, tmpdir): tmp_file = str(TOYDATA_DIR / 'tmp.mp4') video_1 = Video(url=file_url) assert video_1.url == file_url - video_1.tensor, _ = video_1.url.load() - assert isinstance(video_1.tensor, np.ndarray) - assert isinstance(video_1.tensor, VideoNdArray) + audio_tensor, video_1.video_tensor, _ = video_1.url.load() + assert isinstance(video_1.video_tensor, np.ndarray) + assert isinstance(video_1.video_tensor, VideoNdArray) # from PIL import Image # Image.fromarray(video_1.tensor[0]).show() - video_1.tensor.save_to_file(tmp_file) + video_1.video_tensor.save_to_mp4_file(file_path=tmp_file, audio_tensor=audio_tensor) assert os.path.isfile(tmp_file) - print(f"video_1.tensor[0][:2] = {video_1.tensor[0][:2]}") - - video_2 = Video(url=tmp_file) - video_2.tensor, _ = video_2.url.load() - video_2.tensor.save_to_file(str(TOYDATA_DIR / 'tmp_2.mp4')) - - # video_3 = Video(url=str(tmpdir / f'tmp_2.mp4')) + print(f"\nvideo_1.tensor[0][:2] = {video_1.video_tensor[0][:2]}") + # + # video_2 = Video(url=tmp_file) + # video_2.tensor, _ = video_2.url.load() + # video_2.tensor.save_to_file(str(tmpdir / 'tmp_2.mp4')) + # + # video_3 = Video(url=str(tmpdir / f'tmp.mp4')) # video_3.tensor, _ = video_3.url.load() # video_3.tensor.save_to_file(str(tmpdir / f'tmp_3.mp4')) # - # video_4 = Video(url=str(tmpdir / f'tmp_3.mp4')) + # video_4 = Video(url=str(tmpdir / f'tmp.mp4')) # video_4.tensor, _ = video_4.url.load() # video_4.tensor.save_to_file(str(tmpdir / f'tmp_4.mp4')) # - # video_5 = Video(url=str(tmpdir / f'tmp_4.mp4')) + # video_5 = Video(url=str(tmpdir / f'tmp.mp4')) # video_5.tensor, _ = video_5.url.load() # video_5.tensor.save_to_file(str(tmpdir / f'tmp_5.mp4')) # - # video_6 = Video(url=str(tmpdir / f'tmp_5.mp4')) + # video_6 = Video(url=str(tmpdir / f'tmp.mp4')) # video_6.tensor, _ = video_6.url.load() # video_6.tensor.save_to_file(str(tmpdir / f'tmp_6.mp4')) # - print(f"video_2.tensor[0][:2] = {video_2.tensor[0][:2]}") + # print(f"video_2.tensor[0][:2] = {video_2.tensor[0][:2]}") # print(f"video_3.tensor[0][:2] = {video_3.tensor[0][:2]}") # print(f"video_4.tensor[0][:2] = {video_3.tensor[0][:2]}") # print(f"video_5.tensor[0][:2] = {video_3.tensor[0][:2]}") # print(f"video_6.tensor[0][:2] = {video_3.tensor[0][:2]}") - - # Image.fromarray(video_1.tensor[0]).show() - assert isinstance(video_1.tensor, np.ndarray) - assert isinstance(video_1.tensor, VideoNdArray) - assert video_1.tensor.shape == video_2.tensor.shape - assert np.allclose(video_1.tensor, video_2.tensor, atol=100) + # + # # Image.fromarray(video_1.tensor[0]).show() + # assert isinstance(video_1.tensor, np.ndarray) + # assert isinstance(video_1.tensor, VideoNdArray) + # # assert video_1.tensor.shape == video_2.tensor.shape + # # assert np.allclose(video_1.tensor, video_2.tensor) diff --git a/tests/units/typing/tensor/test_video_tensor.py b/tests/units/typing/tensor/test_video_tensor.py index bbc94ddaf4d..08b6d5847d7 100644 --- a/tests/units/typing/tensor/test_video_tensor.py +++ b/tests/units/typing/tensor/test_video_tensor.py @@ -83,5 +83,5 @@ def test_proto_tensor(cls_tensor, tensor, proto_key): def test_save_video_tensor_to_file(cls_tensor, tensor, tmpdir): tmp_file = str(tmpdir / 'tmp.mp4') video_tensor = parse_obj_as(cls_tensor, tensor) - video_tensor.save_to_file(tmp_file) + video_tensor.save_to_mp4_file(tmp_file) assert os.path.isfile(tmp_file) diff --git a/tests/units/typing/url/test_video_url.py b/tests/units/typing/url/test_video_url.py index 39ad487e8fc..b59622390c8 100644 --- a/tests/units/typing/url/test_video_url.py +++ b/tests/units/typing/url/test_video_url.py @@ -6,7 +6,7 @@ from docarray import BaseDocument from docarray.document.io.json import orjson_dumps -from docarray.typing import VideoNdArray, VideoTorchTensor, VideoUrl +from docarray.typing import NdArray, VideoNdArray, VideoTorchTensor, VideoUrl from tests import TOYDATA_DIR LOCAL_VIDEO_FILE = str(TOYDATA_DIR / 'mov_bbb.mp4') @@ -27,7 +27,7 @@ def test_load_with_only_keyframes_false(file_url): assert isinstance(tensor, VideoNdArray) assert isinstance(indices, np.ndarray) - assert isinstance(indices, VideoNdArray) + assert isinstance(indices, NdArray) @pytest.mark.slow From 091e79ae88314220d9d54a842029adc387750c13 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 11 Jan 2023 14:11:29 +0100 Subject: [PATCH 11/26] fix: video load and save and add docstrings Signed-off-by: anna-charlotte --- .../tensor/video/abstract_video_tensor.py | 46 ++++++++++++-- docarray/typing/url/audio_url.py | 2 +- docarray/typing/url/video_url.py | 62 +++++++++++++++++-- .../predefined_document/test_video.py | 56 ----------------- .../units/typing/tensor/test_video_tensor.py | 36 +++++++++-- tests/units/typing/url/test_video_url.py | 23 ++++--- 6 files changed, 144 insertions(+), 81 deletions(-) diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/abstract_video_tensor.py index b5d8a79b26c..aa3b0c4951c 100644 --- a/docarray/typing/tensor/video/abstract_video_tensor.py +++ b/docarray/typing/tensor/video/abstract_video_tensor.py @@ -13,9 +13,9 @@ class AbstractVideoTensor(AbstractTensor, ABC): def save_to_mp4_file( self: 'T', file_path: Union[str, BinaryIO], - audio_tensor: Optional[AudioTensor] = None, - video_frame_rate: int = 30, + video_frame_rate: int = 24, video_codec: str = 'h264', + audio_tensor: Optional[AudioTensor] = None, audio_frame_rate: int = 48000, audio_codec: str = 'aac', audio_format: str = 'fltp', @@ -25,8 +25,40 @@ def save_to_mp4_file( :param file_path: path to a .mp4 file. If file is a string, open the file by that name, otherwise treat it as a file-like object. - :param video_frame_rate: frames per second. - :param video_codec: the name of a decoder/encoder. + :param video_frame_rate: video frames per second. + :param video_codec: the name of a video decoder/encoder. + :param audio_tensor: AudioTensor that should be added as soundtrack. + :param audio_frame_rate: audio frames per second. + :param audio_codec: the name of an audio decoder/encoder. + :param audio_format: the name of one of the audio formats supported by PyAV, + such as 'flt', 'fltp', 's16' or 's16p'. + + EXAMPLE USAGE + + .. code-block:: python + import numpy as np + + from docarray import BaseDocument + from docarray.typing.tensor.audio.audio_tensor import AudioTensor + from docarray.typing.tensor.video.video_tensor import VideoTensor + + + class MyDoc(BaseDocument): + video_tensor: VideoTensor + audio_tensor: AudioTensor + + + doc = MyDoc( + video_tensor=np.random.randint(low=0, high=256, size=(10, 200, 300, 3)), + audio_tensor=np.random.randn(100, 1, 1024).astype("float32"), + ) + + doc.video_tensor.save_to_mp4_file( + file_path="toydata/mp_.mp4", + audio_tensor=doc.audio_tensor, + audio_format="flt", + ) + """ import av @@ -51,15 +83,17 @@ def save_to_mp4_file( array=audio, format=audio_format, layout=audio_layout ) frame.rate = audio_frame_rate + frame.pts = audio.shape[-1] * i for packet in stream_audio.encode(frame): container.mux(packet) + for packet in stream_audio.encode(None): + container.mux(packet) + for vid in video_tensor: frame = av.VideoFrame.from_ndarray(vid, format='rgb24') for packet in stream_video.encode(frame): container.mux(packet) - for packet in stream_audio.encode(None): - container.mux(packet) for packet in stream_video.encode(None): container.mux(packet) diff --git a/docarray/typing/url/audio_url.py b/docarray/typing/url/audio_url.py index 6e9e25a7e7e..1646b4eb0e0 100644 --- a/docarray/typing/url/audio_url.py +++ b/docarray/typing/url/audio_url.py @@ -62,7 +62,7 @@ def load(self: T, dtype: str = 'float32') -> AudioNdArray: .. code-block:: python - from docarray import Document + from docarray import BaseDocument import numpy as np from docarray.typing import AudioUrl diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index b84e79360ff..c660abda121 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -51,20 +51,69 @@ def validate( return cls(str(url), scheme=None) def load( - self: T, only_keyframes: bool = False, audio_format: str = 'fltp', **kwargs + self: T, only_keyframes: bool = False, **kwargs ) -> Union[VideoNdArray, Tuple[AudioNdArray, VideoNdArray, NdArray]]: """ Load the data from the url into a VideoNdArray or Tuple of AudioNdArray, VideoNdArray and NdArray. - :param only_keyframes: if True keep only the keyframes, if False keep all frames - and store the indices of the keyframes in :attr:`.tags` + :param only_keyframes: if True keep only the keyframes, if False return all + frames, key frame indices and audio. :param kwargs: supports all keyword arguments that are being supported by av.open() as described in: https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open + :return: AudioNdArray representing the audio content, VideoNdArray representing the images of the video, NdArray of key frame indices if only_keyframe False, else only VideoNdArray representing the keyframes. + + + EXAMPLE USAGE + + .. code-block:: python + + from typing import Optional + + from docarray import BaseDocument + + from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray + + + class MyDoc(BaseDocument): + video_url: VideoUrl + video: Optional[VideoNdArray] + audio: Optional[AudioNdArray] + key_frame_indices: Optional[NdArray] + + + doc = MyDoc(video_url='toydata/mov_bbb.mp4') + doc.audio, doc.video, doc.key_frame_indices = doc.video_url.load() + + assert isinstance(doc.video, VideoNdArray) + assert isinstance(doc.audio, AudioNdArray) + assert isinstance(doc.key_frame_indices, NdArray) + + You can load only the key frames: + + .. code-block:: python + + from typing import Optional + + from docarray import BaseDocument + + from docarray.typing import VideoUrl, VideoNdArray + + + class MyDoc(BaseDocument): + video_url: VideoUrl + video_key_frames: Optional[VideoNdArray] + + + doc = MyDoc(video_url='toydata/mov_bbb.mp4') + doc.video_key_frames = doc.video_url.load(only_keyframes=True) + + assert isinstance(doc.video_key_frames, VideoNdArray) + """ import av @@ -79,7 +128,7 @@ def load( for frame in container.decode(): if type(frame) == av.audio.frame.AudioFrame: - audio_frames.append(frame.to_ndarray(format=audio_format)) + audio_frames.append(frame.to_ndarray()) elif type(frame) == av.video.frame.VideoFrame: video_frames.append(frame.to_ndarray(format='rgb24')) @@ -92,6 +141,9 @@ def load( if only_keyframes: return video else: - audio = parse_obj_as(AudioNdArray, np.stack(audio_frames)) + if len(audio_frames) == 0: + audio = parse_obj_as(AudioNdArray, np.array(audio_frames)) + else: + audio = parse_obj_as(AudioNdArray, np.stack(audio_frames)) indices = parse_obj_as(NdArray, keyframe_indices) return audio, video, indices diff --git a/tests/integrations/predefined_document/test_video.py b/tests/integrations/predefined_document/test_video.py index 719f0dc2295..09e9795fce6 100644 --- a/tests/integrations/predefined_document/test_video.py +++ b/tests/integrations/predefined_document/test_video.py @@ -1,6 +1,3 @@ -import os - -import numpy as np import pytest from docarray import Video @@ -21,56 +18,3 @@ def test_video(file_url): assert isinstance(vid.audio_tensor, AudioNdArray) assert isinstance(vid.video_tensor, VideoNdArray) assert isinstance(vid.key_frame_indices, NdArray) - - -@pytest.mark.slow -@pytest.mark.internet -@pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE]) -def test_save_video_ndarray(file_url, tmpdir): - tmp_file = str(TOYDATA_DIR / 'tmp.mp4') - - video_1 = Video(url=file_url) - assert video_1.url == file_url - - audio_tensor, video_1.video_tensor, _ = video_1.url.load() - assert isinstance(video_1.video_tensor, np.ndarray) - assert isinstance(video_1.video_tensor, VideoNdArray) - - # from PIL import Image - # Image.fromarray(video_1.tensor[0]).show() - - video_1.video_tensor.save_to_mp4_file(file_path=tmp_file, audio_tensor=audio_tensor) - assert os.path.isfile(tmp_file) - print(f"\nvideo_1.tensor[0][:2] = {video_1.video_tensor[0][:2]}") - # - # video_2 = Video(url=tmp_file) - # video_2.tensor, _ = video_2.url.load() - # video_2.tensor.save_to_file(str(tmpdir / 'tmp_2.mp4')) - # - # video_3 = Video(url=str(tmpdir / f'tmp.mp4')) - # video_3.tensor, _ = video_3.url.load() - # video_3.tensor.save_to_file(str(tmpdir / f'tmp_3.mp4')) - # - # video_4 = Video(url=str(tmpdir / f'tmp.mp4')) - # video_4.tensor, _ = video_4.url.load() - # video_4.tensor.save_to_file(str(tmpdir / f'tmp_4.mp4')) - # - # video_5 = Video(url=str(tmpdir / f'tmp.mp4')) - # video_5.tensor, _ = video_5.url.load() - # video_5.tensor.save_to_file(str(tmpdir / f'tmp_5.mp4')) - # - # video_6 = Video(url=str(tmpdir / f'tmp.mp4')) - # video_6.tensor, _ = video_6.url.load() - # video_6.tensor.save_to_file(str(tmpdir / f'tmp_6.mp4')) - # - # print(f"video_2.tensor[0][:2] = {video_2.tensor[0][:2]}") - # print(f"video_3.tensor[0][:2] = {video_3.tensor[0][:2]}") - # print(f"video_4.tensor[0][:2] = {video_3.tensor[0][:2]}") - # print(f"video_5.tensor[0][:2] = {video_3.tensor[0][:2]}") - # print(f"video_6.tensor[0][:2] = {video_3.tensor[0][:2]}") - # - # # Image.fromarray(video_1.tensor[0]).show() - # assert isinstance(video_1.tensor, np.ndarray) - # assert isinstance(video_1.tensor, VideoNdArray) - # # assert video_1.tensor.shape == video_2.tensor.shape - # # assert np.allclose(video_1.tensor, video_2.tensor) diff --git a/tests/units/typing/tensor/test_video_tensor.py b/tests/units/typing/tensor/test_video_tensor.py index 08b6d5847d7..99ec0454fa1 100644 --- a/tests/units/typing/tensor/test_video_tensor.py +++ b/tests/units/typing/tensor/test_video_tensor.py @@ -6,7 +6,12 @@ from pydantic.tools import parse_obj_as from docarray import BaseDocument -from docarray.typing import VideoNdArray, VideoTorchTensor +from docarray.typing import ( + AudioNdArray, + AudioTorchTensor, + VideoNdArray, + VideoTorchTensor, +) @pytest.mark.parametrize( @@ -74,14 +79,33 @@ def test_proto_tensor(cls_tensor, tensor, proto_key): @pytest.mark.parametrize( - 'cls_tensor,tensor', + 'video_tensor', [ - (VideoTorchTensor, torch.zeros(1, 224, 224, 3)), - (VideoNdArray, np.zeros((1, 224, 224, 3))), + parse_obj_as(VideoTorchTensor, torch.zeros(1, 224, 224, 3)), + parse_obj_as(VideoNdArray, np.zeros((1, 224, 224, 3))), ], ) -def test_save_video_tensor_to_file(cls_tensor, tensor, tmpdir): +def test_save_video_tensor_to_file(video_tensor, tmpdir): tmp_file = str(tmpdir / 'tmp.mp4') - video_tensor = parse_obj_as(cls_tensor, tensor) video_tensor.save_to_mp4_file(tmp_file) assert os.path.isfile(tmp_file) + + +@pytest.mark.parametrize( + 'video_tensor', + [ + parse_obj_as(VideoTorchTensor, torch.zeros(1, 224, 224, 3)), + parse_obj_as(VideoNdArray, np.zeros((1, 224, 224, 3))), + ], +) +@pytest.mark.parametrize( + 'audio_tensor', + [ + parse_obj_as(AudioTorchTensor, torch.randn(100, 1, 1024).to(torch.float32)), + parse_obj_as(AudioNdArray, np.random.randn(100, 1, 1024).astype('float32')), + ], +) +def test_save_video_tensor_to_file_including_audio(video_tensor, audio_tensor, tmpdir): + tmp_file = str(tmpdir / 'tmp.mp4') + video_tensor.save_to_mp4_file(tmp_file, audio_tensor=audio_tensor) + assert os.path.isfile(tmp_file) diff --git a/tests/units/typing/url/test_video_url.py b/tests/units/typing/url/test_video_url.py index b59622390c8..40882b11221 100644 --- a/tests/units/typing/url/test_video_url.py +++ b/tests/units/typing/url/test_video_url.py @@ -6,7 +6,13 @@ from docarray import BaseDocument from docarray.document.io.json import orjson_dumps -from docarray.typing import NdArray, VideoNdArray, VideoTorchTensor, VideoUrl +from docarray.typing import ( + AudioNdArray, + NdArray, + VideoNdArray, + VideoTorchTensor, + VideoUrl, +) from tests import TOYDATA_DIR LOCAL_VIDEO_FILE = str(TOYDATA_DIR / 'mov_bbb.mp4') @@ -21,10 +27,13 @@ ) def test_load_with_only_keyframes_false(file_url): url = parse_obj_as(VideoUrl, file_url) - tensor, indices = url.load(only_keyframes=False) + audio, video, indices = url.load(only_keyframes=False) + + assert isinstance(audio, np.ndarray) + assert isinstance(audio, AudioNdArray) - assert isinstance(tensor, np.ndarray) - assert isinstance(tensor, VideoNdArray) + assert isinstance(video, np.ndarray) + assert isinstance(video, VideoNdArray) assert isinstance(indices, np.ndarray) assert isinstance(indices, NdArray) @@ -38,10 +47,10 @@ def test_load_with_only_keyframes_false(file_url): ) def test_load_with_only_keyframes_true(file_url): url = parse_obj_as(VideoUrl, file_url) - tensor = url.load(only_keyframes=True) + key_frames = url.load(only_keyframes=True) - assert isinstance(tensor, np.ndarray) - assert isinstance(tensor, VideoNdArray) + assert isinstance(key_frames, np.ndarray) + assert isinstance(key_frames, VideoNdArray) @pytest.mark.slow From e4106a8dcec6f4353f1c49b85945b6b674b91519 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 11 Jan 2023 15:29:45 +0100 Subject: [PATCH 12/26] fix: fix some imports after merging Signed-off-by: anna-charlotte --- docarray/documents/__init__.py | 3 ++- tests/integrations/predefined_document/test_video.py | 2 +- tests/units/typing/url/test_video_url.py | 7 ++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docarray/documents/__init__.py b/docarray/documents/__init__.py index 31f2313de4b..052992fc1f6 100644 --- a/docarray/documents/__init__.py +++ b/docarray/documents/__init__.py @@ -3,5 +3,6 @@ from docarray.documents.mesh import Mesh3D from docarray.documents.point_cloud import PointCloud3D from docarray.documents.text import Text +from docarray.documents.video import Video -__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D'] +__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D', 'Video'] diff --git a/tests/integrations/predefined_document/test_video.py b/tests/integrations/predefined_document/test_video.py index 09e9795fce6..2522ba2801e 100644 --- a/tests/integrations/predefined_document/test_video.py +++ b/tests/integrations/predefined_document/test_video.py @@ -1,6 +1,6 @@ import pytest -from docarray import Video +from docarray.documents import Video from docarray.typing import AudioNdArray, NdArray, VideoNdArray from tests import TOYDATA_DIR diff --git a/tests/units/typing/url/test_video_url.py b/tests/units/typing/url/test_video_url.py index 40882b11221..b468160b6ee 100644 --- a/tests/units/typing/url/test_video_url.py +++ b/tests/units/typing/url/test_video_url.py @@ -2,10 +2,11 @@ import numpy as np import pytest +import torch from pydantic.tools import parse_obj_as, schema_json_of from docarray import BaseDocument -from docarray.document.io.json import orjson_dumps +from docarray.base_document.io.json import orjson_dumps from docarray.typing import ( AudioNdArray, NdArray, @@ -67,8 +68,8 @@ class MyVideoDoc(BaseDocument): doc = MyVideoDoc(video_url=file_url) doc.tensor = doc.video_url.load(only_keyframes=True) - assert isinstance(doc.tensor, np.ndarray) - assert isinstance(doc.tensor, VideoNdArray) + assert isinstance(doc.tensor, torch.Tensor) + assert isinstance(doc.tensor, VideoTorchTensor) def test_json_schema(): From 23ee9308e972b4bc37c5a63c23c0c9e65e880c6d Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 11 Jan 2023 16:30:09 +0100 Subject: [PATCH 13/26] docs: add doc strings and fix example urls Signed-off-by: anna-charlotte --- docarray/documents/audio.py | 8 +++--- docarray/documents/video.py | 51 +++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/docarray/documents/audio.py b/docarray/documents/audio.py index c543a0778fb..776020bc964 100644 --- a/docarray/documents/audio.py +++ b/docarray/documents/audio.py @@ -24,7 +24,7 @@ class Audio(BaseDocument): # use it directly audio = Audio( - url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true' + url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true' ) audio.tensor = audio.url.load() model = MyEmbeddingModel() @@ -43,12 +43,12 @@ class MyAudio(Audio): audio = MyAudio( - url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true' + url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true' ) audio.tensor = audio.url.load() model = MyEmbeddingModel() audio.embedding = model(audio.tensor) - audio.name = 'my first audio' + audio.name = Text(text='my first audio') You can use this Document for composition: @@ -66,7 +66,7 @@ class MultiModalDoc(Document): mmdoc = MultiModalDoc( audio=Audio( - url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true' + url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true' ), text=Text(text='hello world, how are you doing?'), ) diff --git a/docarray/documents/video.py b/docarray/documents/video.py index dffac71efd6..e085e1d43bd 100644 --- a/docarray/documents/video.py +++ b/docarray/documents/video.py @@ -20,10 +20,61 @@ class Video(BaseDocument): You can use this Document directly: + .. code-block:: python + + from docarray.documents import Video + + # use it directly + vid = Video( + url='https://github.com/docarray/docarray/tree/feat-add-video-v2/tests/toydata/mov_bbb.mp4?raw=true' + ) + vid.audio_tensor, vid.video_tensor, vid.key_frame_indices = vid.url.load() + model = MyEmbeddingModel() + vid.embedding = model(vid.video_tensor) + You can extend this Document: + .. code-block:: python + + from typing import Optional + + from docarray.documents import Text, Video + + + # extend it + class MyVideo(Video): + name: Optional[Text] + + + video = MyVideo( + url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + ) + video.video_tensor = video.url.load(only_keyframes=True) + model = MyEmbeddingModel() + video.embedding = model(video.video_tensor) + video.name = Text(text='my first video') + You can use this Document for composition: + .. code-block:: python + + from docarray import BaseDocument + from docarray.documents import Text, Video + + + # compose it + class MultiModalDoc(BaseDocument): + video: Video + text: Text + + + mmdoc = MultiModalDoc( + video=Video( + url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' + ), + text=Text(text='hello world, how are you doing?'), + ) + mmdoc.video.video_tensor = mmdoc.video.url.load(only_keyframes=True) """ url: Optional[VideoUrl] From 7ab8dbd41af0d7ef7bb1a8b89a07d5b2e81af4ce Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 11 Jan 2023 16:49:01 +0100 Subject: [PATCH 14/26] docs: small fixes in docs Signed-off-by: anna-charlotte --- docarray/__init__.py | 5 ++++- docarray/computation/abstract_comp_backend.py | 7 ++++--- docarray/computation/numpy_backend.py | 2 +- docarray/computation/torch_backend.py | 2 +- docarray/documents/video.py | 3 ++- docarray/typing/tensor/video/abstract_video_tensor.py | 4 ++-- docarray/typing/url/video_url.py | 8 ++++++-- 7 files changed, 20 insertions(+), 11 deletions(-) diff --git a/docarray/__init__.py b/docarray/__init__.py index ae8a65b853e..54eb3a3bdf7 100644 --- a/docarray/__init__.py +++ b/docarray/__init__.py @@ -3,4 +3,7 @@ from docarray.array.array import DocumentArray from docarray.base_document.document import BaseDocument -__all__ = ['BaseDocument', 'DocumentArray'] +__all__ = [ + 'BaseDocument', + 'DocumentArray', +] diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py index 97660398e4f..4a213777b11 100644 --- a/docarray/computation/abstract_comp_backend.py +++ b/docarray/computation/abstract_comp_backend.py @@ -1,8 +1,9 @@ import typing from abc import ABC, abstractmethod -from typing import List, Optional, Tuple, TypeVar, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, TypeVar, Union -import numpy as np +if TYPE_CHECKING: + import numpy as np # In practice all of the below will be the same type TTensor = TypeVar('TTensor') @@ -38,7 +39,7 @@ def n_dim(array: 'TTensor') -> int: @staticmethod @abstractmethod - def to_numpy(array: 'TTensor') -> np.ndarray: + def to_numpy(array: 'TTensor') -> 'np.ndarray': """ Convert array to np.ndarray. """ diff --git a/docarray/computation/numpy_backend.py b/docarray/computation/numpy_backend.py index 05e1186f6d3..c8b3745bbdb 100644 --- a/docarray/computation/numpy_backend.py +++ b/docarray/computation/numpy_backend.py @@ -65,7 +65,7 @@ def n_dim(array: 'np.ndarray') -> int: return array.ndim @staticmethod - def to_numpy(array: 'np.ndarray') -> np.ndarray: + def to_numpy(array: 'np.ndarray') -> 'np.ndarray': return array @staticmethod diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py index 4e9fcf59b7e..fe2fe4a5266 100644 --- a/docarray/computation/torch_backend.py +++ b/docarray/computation/torch_backend.py @@ -66,7 +66,7 @@ def n_dim(array: 'torch.Tensor') -> int: return array.ndim @staticmethod - def to_numpy(array: 'torch.Tensor') -> np.ndarray: + def to_numpy(array: 'torch.Tensor') -> 'np.ndarray': return array.cpu().detach().numpy() @staticmethod diff --git a/docarray/documents/video.py b/docarray/documents/video.py index e085e1d43bd..99ec9733d6e 100644 --- a/docarray/documents/video.py +++ b/docarray/documents/video.py @@ -14,7 +14,8 @@ class Video(BaseDocument): Document for handling video. The Video Document can contain a VideoUrl (`Video.url`), an AudioTensor (`Video.audio_tensor`), a VideoTensor (`Video.video_tensor`), an AnyTensor - ('Video.key_frame_indices), and an AnyEmbedding (`Video.embedding`). + representing the indices of the video's key frames (`Video.key_frame_indices`), + and an AnyEmbedding (`Video.embedding`). EXAMPLE USAGE: diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/abstract_video_tensor.py index aa3b0c4951c..36a2f53a413 100644 --- a/docarray/typing/tensor/video/abstract_video_tensor.py +++ b/docarray/typing/tensor/video/abstract_video_tensor.py @@ -13,9 +13,9 @@ class AbstractVideoTensor(AbstractTensor, ABC): def save_to_mp4_file( self: 'T', file_path: Union[str, BinaryIO], + audio_tensor: Optional[AudioTensor] = None, video_frame_rate: int = 24, video_codec: str = 'h264', - audio_tensor: Optional[AudioTensor] = None, audio_frame_rate: int = 48000, audio_codec: str = 'aac', audio_format: str = 'fltp', @@ -25,9 +25,9 @@ def save_to_mp4_file( :param file_path: path to a .mp4 file. If file is a string, open the file by that name, otherwise treat it as a file-like object. + :param audio_tensor: AudioTensor containing the video's soundtrack. :param video_frame_rate: video frames per second. :param video_codec: the name of a video decoder/encoder. - :param audio_tensor: AudioTensor that should be added as soundtrack. :param audio_frame_rate: audio frames per second. :param audio_codec: the name of an audio decoder/encoder. :param audio_format: the name of one of the audio formats supported by PyAV, diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index c660abda121..96b085ab43f 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -86,7 +86,9 @@ class MyDoc(BaseDocument): key_frame_indices: Optional[NdArray] - doc = MyDoc(video_url='toydata/mov_bbb.mp4') + doc = MyDoc( + video_url='https://github.com/docarray/docarray/tree/feat-add-video-v2/tests/toydata/mov_bbb.mp4?raw=true' + ) doc.audio, doc.video, doc.key_frame_indices = doc.video_url.load() assert isinstance(doc.video, VideoNdArray) @@ -109,7 +111,9 @@ class MyDoc(BaseDocument): video_key_frames: Optional[VideoNdArray] - doc = MyDoc(video_url='toydata/mov_bbb.mp4') + doc = MyDoc( + video_url='https://github.com/docarray/docarray/tree/feat-add-video-v2/tests/toydata/mov_bbb.mp4?raw=true' + ) doc.video_key_frames = doc.video_url.load(only_keyframes=True) assert isinstance(doc.video_key_frames, VideoNdArray) From 5295dd1f783490001527f93d02e1adf39fac2ffc Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 11 Jan 2023 17:44:48 +0100 Subject: [PATCH 15/26] refactor: rename save to mp4 file to save Signed-off-by: anna-charlotte --- docarray/typing/tensor/video/abstract_video_tensor.py | 4 ++-- tests/units/typing/tensor/test_video_tensor.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/abstract_video_tensor.py index 36a2f53a413..8916902fb74 100644 --- a/docarray/typing/tensor/video/abstract_video_tensor.py +++ b/docarray/typing/tensor/video/abstract_video_tensor.py @@ -10,7 +10,7 @@ class AbstractVideoTensor(AbstractTensor, ABC): - def save_to_mp4_file( + def save( self: 'T', file_path: Union[str, BinaryIO], audio_tensor: Optional[AudioTensor] = None, @@ -53,7 +53,7 @@ class MyDoc(BaseDocument): audio_tensor=np.random.randn(100, 1, 1024).astype("float32"), ) - doc.video_tensor.save_to_mp4_file( + doc.video_tensor.save( file_path="toydata/mp_.mp4", audio_tensor=doc.audio_tensor, audio_format="flt", diff --git a/tests/units/typing/tensor/test_video_tensor.py b/tests/units/typing/tensor/test_video_tensor.py index 99ec0454fa1..214fcdf6e12 100644 --- a/tests/units/typing/tensor/test_video_tensor.py +++ b/tests/units/typing/tensor/test_video_tensor.py @@ -87,7 +87,7 @@ def test_proto_tensor(cls_tensor, tensor, proto_key): ) def test_save_video_tensor_to_file(video_tensor, tmpdir): tmp_file = str(tmpdir / 'tmp.mp4') - video_tensor.save_to_mp4_file(tmp_file) + video_tensor.save(tmp_file) assert os.path.isfile(tmp_file) @@ -107,5 +107,5 @@ def test_save_video_tensor_to_file(video_tensor, tmpdir): ) def test_save_video_tensor_to_file_including_audio(video_tensor, audio_tensor, tmpdir): tmp_file = str(tmpdir / 'tmp.mp4') - video_tensor.save_to_mp4_file(tmp_file, audio_tensor=audio_tensor) + video_tensor.save(tmp_file, audio_tensor=audio_tensor) assert os.path.isfile(tmp_file) From b3f2ccb2d00bad3da52c3a94168be54697227dc5 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 16 Jan 2023 13:44:51 +0100 Subject: [PATCH 16/26] feat: add shape method to comp backend Signed-off-by: anna-charlotte --- docarray/computation/abstract_comp_backend.py | 8 ++++++ docarray/computation/numpy_backend.py | 4 +++ docarray/computation/torch_backend.py | 4 +++ .../numpy_backend/test_basics.py | 25 ++++++++++++++++++ .../torch_backend/test_basics.py | 26 +++++++++++++++++++ 5 files changed, 67 insertions(+) diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py index 4a213777b11..f5cac384e47 100644 --- a/docarray/computation/abstract_comp_backend.py +++ b/docarray/computation/abstract_comp_backend.py @@ -37,6 +37,14 @@ def n_dim(array: 'TTensor') -> int: """ ... + @staticmethod + @abstractmethod + def shape(array: 'TTensor') -> Tuple: + """ + Get the shape of the array. + """ + ... + @staticmethod @abstractmethod def to_numpy(array: 'TTensor') -> 'np.ndarray': diff --git a/docarray/computation/numpy_backend.py b/docarray/computation/numpy_backend.py index c8b3745bbdb..b02c050aee5 100644 --- a/docarray/computation/numpy_backend.py +++ b/docarray/computation/numpy_backend.py @@ -64,6 +64,10 @@ def to_device( def n_dim(array: 'np.ndarray') -> int: return array.ndim + @staticmethod + def shape(array: 'np.ndarray') -> Tuple: + return array.shape + @staticmethod def to_numpy(array: 'np.ndarray') -> 'np.ndarray': return array diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py index fe2fe4a5266..176f887d6ae 100644 --- a/docarray/computation/torch_backend.py +++ b/docarray/computation/torch_backend.py @@ -65,6 +65,10 @@ def to_device( def n_dim(array: 'torch.Tensor') -> int: return array.ndim + @staticmethod + def shape(array: 'torch.Tensor') -> Tuple: + return array.size() + @staticmethod def to_numpy(array: 'torch.Tensor') -> 'np.ndarray': return array.cpu().detach().numpy() diff --git a/tests/units/computation_backends/numpy_backend/test_basics.py b/tests/units/computation_backends/numpy_backend/test_basics.py index 1873889f3a5..29ed9ec001a 100644 --- a/tests/units/computation_backends/numpy_backend/test_basics.py +++ b/tests/units/computation_backends/numpy_backend/test_basics.py @@ -7,3 +7,28 @@ def test_to_device(): with pytest.raises(NotImplementedError): NumpyCompBackend.to_device(np.random.rand(10, 3), 'meta') + + +@pytest.mark.parametrize( + 'array,result', + [ + (np.zeros((5)), 1), + (np.zeros((1, 5)), 2), + (np.zeros((5, 5)), 2), + (np.zeros(()), 0), + ], +) +def test_n_dim(array, result): + assert NumpyCompBackend.n_dim(array) == result + + +@pytest.mark.parametrize( + 'array,result', + [ + (np.zeros((10,)), (10,)), + (np.zeros((5, 5)), (5, 5)), + (np.zeros(()), ()), + ], +) +def test_shape(array, result): + assert NumpyCompBackend.shape(array) == result diff --git a/tests/units/computation_backends/torch_backend/test_basics.py b/tests/units/computation_backends/torch_backend/test_basics.py index 14f337df429..a98cca72a84 100644 --- a/tests/units/computation_backends/torch_backend/test_basics.py +++ b/tests/units/computation_backends/torch_backend/test_basics.py @@ -1,3 +1,4 @@ +import pytest import torch from docarray.computation.torch_backend import TorchCompBackend @@ -8,3 +9,28 @@ def test_to_device(): assert t.device == torch.device('cpu') t = TorchCompBackend.to_device(t, 'meta') assert t.device == torch.device('meta') + + +@pytest.mark.parametrize( + 'array,result', + [ + (torch.zeros((5)), 1), + (torch.zeros((1, 5)), 2), + (torch.zeros((5, 5)), 2), + (torch.zeros(()), 0), + ], +) +def test_n_dim(array, result): + assert TorchCompBackend.n_dim(array) == result + + +@pytest.mark.parametrize( + 'array,result', + [ + (torch.zeros((10,)), (10,)), + (torch.zeros((5, 5)), (5, 5)), + (torch.zeros(()), ()), + ], +) +def test_shape(array, result): + assert TorchCompBackend.shape(array) == result From 20ecf2cd59f86651ecdf26a660389c6b3573115c Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 16 Jan 2023 13:48:01 +0100 Subject: [PATCH 17/26] refactor: move validate shape to video tensor mixin Signed-off-by: anna-charlotte --- docarray/typing/tensor/video/video_ndarray.py | 14 +++------ ..._video_tensor.py => video_tensor_mixin.py} | 29 +++++++++++++++---- .../typing/tensor/video/video_torch_tensor.py | 12 ++------ 3 files changed, 31 insertions(+), 24 deletions(-) rename docarray/typing/tensor/video/{abstract_video_tensor.py => video_tensor_mixin.py} (81%) diff --git a/docarray/typing/tensor/video/video_ndarray.py b/docarray/typing/tensor/video/video_ndarray.py index 10a608ab743..97c07afbdca 100644 --- a/docarray/typing/tensor/video/video_ndarray.py +++ b/docarray/typing/tensor/video/video_ndarray.py @@ -3,7 +3,7 @@ import numpy as np from docarray.typing.tensor.ndarray import NdArray -from docarray.typing.tensor.video.abstract_video_tensor import AbstractVideoTensor +from docarray.typing.tensor.video.video_tensor_mixin import VideoTensorMixin T = TypeVar('T', bound='VideoNdArray') @@ -12,7 +12,7 @@ from pydantic.fields import ModelField -class VideoNdArray(AbstractVideoTensor, NdArray): +class VideoNdArray(NdArray, VideoTensorMixin): """ Subclass of NdArray, to represent a video tensor. Adds video-specific features to the tensor. @@ -30,11 +30,5 @@ def validate( field: 'ModelField', config: 'BaseConfig', ) -> T: - array = super().validate(value=value, field=field, config=config) - if array.ndim not in [3, 4] or array.shape[-1] != 3: - raise ValueError( - f'Expects tensor with 3 or 4 dimensions and the last dimension equal' - f' to 3, but received {array.shape} in {array.dtype}' - ) - else: - return array + tensor = super().validate(value=value, field=field, config=config) + return VideoTensorMixin.validate_shape(cls, value=tensor) diff --git a/docarray/typing/tensor/video/abstract_video_tensor.py b/docarray/typing/tensor/video/video_tensor_mixin.py similarity index 81% rename from docarray/typing/tensor/video/abstract_video_tensor.py rename to docarray/typing/tensor/video/video_tensor_mixin.py index 8916902fb74..01d5184f824 100644 --- a/docarray/typing/tensor/video/abstract_video_tensor.py +++ b/docarray/typing/tensor/video/video_tensor_mixin.py @@ -1,15 +1,34 @@ -from abc import ABC -from typing import BinaryIO, Optional, TypeVar, Union +from typing import TYPE_CHECKING, BinaryIO, Optional, Type, TypeVar, Union import numpy as np -from docarray.typing.tensor.abstract_tensor import AbstractTensor from docarray.typing.tensor.audio.audio_tensor import AudioTensor -T = TypeVar('T', bound='AbstractVideoTensor') +if TYPE_CHECKING: + from docarray.typing import VideoNdArray, VideoTorchTensor -class AbstractVideoTensor(AbstractTensor, ABC): +T = TypeVar('T', bound='VideoTensorMixin') + + +class VideoTensorMixin: + @staticmethod + def validate_shape( + cls: Union[Type['VideoTorchTensor'], Type['VideoNdArray']], value: 'T' + ) -> 'T': + comp_backend = cls.get_comp_backend() + + if ( + comp_backend.n_dim(value) not in [3, 4] # type: ignore + or comp_backend.shape(value)[-1] != 3 # type: ignore + ): + raise ValueError( + f'Expects tensor with 3 or 4 dimensions and the last dimension equal ' + f'to 3, but received {comp_backend.shape(value)}.' # type: ignore + ) + else: + return value + def save( self: 'T', file_path: Union[str, BinaryIO], diff --git a/docarray/typing/tensor/video/video_torch_tensor.py b/docarray/typing/tensor/video/video_torch_tensor.py index 05f56bf792d..5e2953b4231 100644 --- a/docarray/typing/tensor/video/video_torch_tensor.py +++ b/docarray/typing/tensor/video/video_torch_tensor.py @@ -3,7 +3,7 @@ import numpy as np from docarray.typing.tensor.torch_tensor import TorchTensor, metaTorchAndNode -from docarray.typing.tensor.video.abstract_video_tensor import AbstractVideoTensor +from docarray.typing.tensor.video.video_tensor_mixin import VideoTensorMixin T = TypeVar('T', bound='VideoTorchTensor') @@ -12,7 +12,7 @@ from pydantic.fields import ModelField -class VideoTorchTensor(AbstractVideoTensor, TorchTensor, metaclass=metaTorchAndNode): +class VideoTorchTensor(TorchTensor, VideoTensorMixin, metaclass=metaTorchAndNode): """ Subclass of TorchTensor, to represent a video tensor. Adds video-specific features to the tensor. @@ -31,10 +31,4 @@ def validate( config: 'BaseConfig', ) -> T: tensor = super().validate(value=value, field=field, config=config) - if tensor.ndim not in [3, 4] or tensor.shape[-1] != 3: - raise ValueError( - f'Expects tensor with 3 or 4 dimensions and the last dimension equal ' - f'to 3, but received {tensor.shape} in {tensor.dtype}' - ) - else: - return tensor + return VideoTensorMixin.validate_shape(cls, value=tensor) From 711d1057331e9ef4ce02af23b21336953ea7411f Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 16 Jan 2023 14:52:14 +0100 Subject: [PATCH 18/26] refactor: extract private load and make separate methods for frames Signed-off-by: anna-charlotte --- docarray/typing/url/video_url.py | 113 ++++++++++++++--------- tests/units/typing/url/test_video_url.py | 10 +- 2 files changed, 75 insertions(+), 48 deletions(-) diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index 96b085ab43f..2013f22feb4 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -50,22 +50,65 @@ def validate( ) return cls(str(url), scheme=None) - def load( - self: T, only_keyframes: bool = False, **kwargs - ) -> Union[VideoNdArray, Tuple[AudioNdArray, VideoNdArray, NdArray]]: + def _load( + self: T, skip_type: str, **kwargs + ) -> Tuple[AudioNdArray, VideoNdArray, NdArray]: """ - Load the data from the url into a VideoNdArray or Tuple of AudioNdArray, - VideoNdArray and NdArray. + Load the data from the url into a Tuple of AudioNdArray, VideoNdArray and + NdArray. + + :param skip_type: determines what video frames to discard. + :param kwargs: supports all keyword arguments that are being supported by + av.open() as described in: + https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open + + :return: AudioNdArray representing the audio content, VideoNdArray representing + the images of the video, NdArray of the key frame indices. + + """ + import av + + with av.open(self, **kwargs) as container: + stream = container.streams.video[0] + stream.codec_context.skip_frame = skip_type + + audio_frames = [] + video_frames = [] + keyframe_indices = [] + + for frame in container.decode( + video=0, audio=0 if skip_type != 'NONKEY' else [] + ): + if type(frame) == av.audio.frame.AudioFrame: + audio_frames.append(frame.to_ndarray()) + elif type(frame) == av.video.frame.VideoFrame: + video_frames.append(frame.to_ndarray(format='rgb24')) + + if frame.key_frame == 1: + curr_index = len(video_frames) + keyframe_indices.append(curr_index) + + if len(audio_frames) == 0: + audio = parse_obj_as(AudioNdArray, np.array(audio_frames)) + else: + audio = parse_obj_as(AudioNdArray, np.stack(audio_frames)) + + video = parse_obj_as(VideoNdArray, np.stack(video_frames)) + indices = parse_obj_as(NdArray, keyframe_indices) + + return audio, video, indices + + def load(self: T, **kwargs) -> Tuple[AudioNdArray, VideoNdArray, NdArray]: + """ + Load the data from the url into a Tuple of AudioNdArray, VideoNdArray and + NdArray. - :param only_keyframes: if True keep only the keyframes, if False return all - frames, key frame indices and audio. :param kwargs: supports all keyword arguments that are being supported by av.open() as described in: https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open :return: AudioNdArray representing the audio content, VideoNdArray representing - the images of the video, NdArray of key frame indices if only_keyframe - False, else only VideoNdArray representing the keyframes. + the images of the video, NdArray of the key frame indices. EXAMPLE USAGE @@ -95,7 +138,21 @@ class MyDoc(BaseDocument): assert isinstance(doc.audio, AudioNdArray) assert isinstance(doc.key_frame_indices, NdArray) - You can load only the key frames: + """ + return self._load(skip_type='DEFAULT', **kwargs) + + def load_key_frames(self: T, **kwargs) -> VideoNdArray: + """ + Load the data from the url into a VideoNdArray or Tuple of AudioNdArray, + VideoNdArray and NdArray. + + :param kwargs: supports all keyword arguments that are being supported by + av.open() as described in: + https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open + + :return: VideoNdArray representing the keyframes. + + EXAMPLE USAGE .. code-block:: python @@ -114,40 +171,10 @@ class MyDoc(BaseDocument): doc = MyDoc( video_url='https://github.com/docarray/docarray/tree/feat-add-video-v2/tests/toydata/mov_bbb.mp4?raw=true' ) - doc.video_key_frames = doc.video_url.load(only_keyframes=True) + doc.video_key_frames = doc.video_url.load_key_frames() assert isinstance(doc.video_key_frames, VideoNdArray) """ - import av - - with av.open(self, **kwargs) as container: - if only_keyframes: - stream = container.streams.video[0] - stream.codec_context.skip_frame = 'NONKEY' - - audio_frames = [] - video_frames = [] - keyframe_indices = [] - - for frame in container.decode(): - if type(frame) == av.audio.frame.AudioFrame: - audio_frames.append(frame.to_ndarray()) - elif type(frame) == av.video.frame.VideoFrame: - video_frames.append(frame.to_ndarray(format='rgb24')) - - if not only_keyframes and frame.key_frame == 1: - curr_index = len(video_frames) - keyframe_indices.append(curr_index) - - video = parse_obj_as(VideoNdArray, np.stack(video_frames)) - - if only_keyframes: - return video - else: - if len(audio_frames) == 0: - audio = parse_obj_as(AudioNdArray, np.array(audio_frames)) - else: - audio = parse_obj_as(AudioNdArray, np.stack(audio_frames)) - indices = parse_obj_as(NdArray, keyframe_indices) - return audio, video, indices + _, key_frames, _ = self._load(skip_type='NONKEY', **kwargs) + return key_frames diff --git a/tests/units/typing/url/test_video_url.py b/tests/units/typing/url/test_video_url.py index b468160b6ee..02ae5119a59 100644 --- a/tests/units/typing/url/test_video_url.py +++ b/tests/units/typing/url/test_video_url.py @@ -26,9 +26,9 @@ 'file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE], ) -def test_load_with_only_keyframes_false(file_url): +def test_load(file_url): url = parse_obj_as(VideoUrl, file_url) - audio, video, indices = url.load(only_keyframes=False) + audio, video, indices = url.load() assert isinstance(audio, np.ndarray) assert isinstance(audio, AudioNdArray) @@ -46,9 +46,9 @@ def test_load_with_only_keyframes_false(file_url): 'file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE], ) -def test_load_with_only_keyframes_true(file_url): +def test_load_key_frames(file_url): url = parse_obj_as(VideoUrl, file_url) - key_frames = url.load(only_keyframes=True) + key_frames = url.load_key_frames() assert isinstance(key_frames, np.ndarray) assert isinstance(key_frames, VideoNdArray) @@ -66,7 +66,7 @@ class MyVideoDoc(BaseDocument): tensor: Optional[VideoTorchTensor] doc = MyVideoDoc(video_url=file_url) - doc.tensor = doc.video_url.load(only_keyframes=True) + doc.tensor = doc.video_url.load_key_frames() assert isinstance(doc.tensor, torch.Tensor) assert isinstance(doc.tensor, VideoTorchTensor) From 0c9c1fdba6065a5e4b08f5fc4d0042749f5a2a11 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 16 Jan 2023 15:16:50 +0100 Subject: [PATCH 19/26] fix: use torch shape instead of size method Signed-off-by: anna-charlotte --- docarray/computation/torch_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py index 176f887d6ae..66ba4d592f3 100644 --- a/docarray/computation/torch_backend.py +++ b/docarray/computation/torch_backend.py @@ -67,7 +67,7 @@ def n_dim(array: 'torch.Tensor') -> int: @staticmethod def shape(array: 'torch.Tensor') -> Tuple: - return array.size() + return array.shape @staticmethod def to_numpy(array: 'torch.Tensor') -> 'np.ndarray': From e3a465ce11c76029fd8071e193edb78125770610 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 16 Jan 2023 17:05:47 +0100 Subject: [PATCH 20/26] fix: add typehint to shape in comp backend Signed-off-by: anna-charlotte --- docarray/computation/abstract_comp_backend.py | 2 +- docarray/computation/numpy_backend.py | 2 +- docarray/computation/torch_backend.py | 4 ++-- tests/units/computation_backends/numpy_backend/test_basics.py | 4 +++- tests/units/computation_backends/torch_backend/test_basics.py | 4 +++- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py index f5cac384e47..0c015f75d6b 100644 --- a/docarray/computation/abstract_comp_backend.py +++ b/docarray/computation/abstract_comp_backend.py @@ -39,7 +39,7 @@ def n_dim(array: 'TTensor') -> int: @staticmethod @abstractmethod - def shape(array: 'TTensor') -> Tuple: + def shape(array: 'TTensor') -> Tuple[int, ...]: """ Get the shape of the array. """ diff --git a/docarray/computation/numpy_backend.py b/docarray/computation/numpy_backend.py index b02c050aee5..5fb3135b9f1 100644 --- a/docarray/computation/numpy_backend.py +++ b/docarray/computation/numpy_backend.py @@ -65,7 +65,7 @@ def n_dim(array: 'np.ndarray') -> int: return array.ndim @staticmethod - def shape(array: 'np.ndarray') -> Tuple: + def shape(array: 'np.ndarray') -> Tuple[int, ...]: return array.shape @staticmethod diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py index 66ba4d592f3..df68e73c18e 100644 --- a/docarray/computation/torch_backend.py +++ b/docarray/computation/torch_backend.py @@ -66,8 +66,8 @@ def n_dim(array: 'torch.Tensor') -> int: return array.ndim @staticmethod - def shape(array: 'torch.Tensor') -> Tuple: - return array.shape + def shape(array: 'torch.Tensor') -> Tuple[int, ...]: + return tuple(array.shape) @staticmethod def to_numpy(array: 'torch.Tensor') -> 'np.ndarray': diff --git a/tests/units/computation_backends/numpy_backend/test_basics.py b/tests/units/computation_backends/numpy_backend/test_basics.py index 29ed9ec001a..7a20ad32f2f 100644 --- a/tests/units/computation_backends/numpy_backend/test_basics.py +++ b/tests/units/computation_backends/numpy_backend/test_basics.py @@ -31,4 +31,6 @@ def test_n_dim(array, result): ], ) def test_shape(array, result): - assert NumpyCompBackend.shape(array) == result + shape = NumpyCompBackend.shape(array) + assert shape == result + assert type(shape) == tuple diff --git a/tests/units/computation_backends/torch_backend/test_basics.py b/tests/units/computation_backends/torch_backend/test_basics.py index a98cca72a84..68afaacf212 100644 --- a/tests/units/computation_backends/torch_backend/test_basics.py +++ b/tests/units/computation_backends/torch_backend/test_basics.py @@ -33,4 +33,6 @@ def test_n_dim(array, result): ], ) def test_shape(array, result): - assert TorchCompBackend.shape(array) == result + shape = TorchCompBackend.shape(array) + assert shape == result + assert type(shape) == tuple From 40eac9357150e7a849b7038bf0d07b13f6215bc2 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 16 Jan 2023 17:11:42 +0100 Subject: [PATCH 21/26] docs: add supported strings for skip type Signed-off-by: anna-charlotte --- docarray/typing/url/video_url.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py index 2013f22feb4..fff2dda5d18 100644 --- a/docarray/typing/url/video_url.py +++ b/docarray/typing/url/video_url.py @@ -57,7 +57,8 @@ def _load( Load the data from the url into a Tuple of AudioNdArray, VideoNdArray and NdArray. - :param skip_type: determines what video frames to discard. + :param skip_type: determines what video frames to discard. Supported strings + are: 'NONE', 'DEFAULT', 'NONREF', 'BIDIR', 'NONINTRA', 'NONKEY', 'ALL'. :param kwargs: supports all keyword arguments that are being supported by av.open() as described in: https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open From a700f308a743e7ada0f90f3bc5d97852ee5d47cd Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Tue, 17 Jan 2023 10:03:50 +0100 Subject: [PATCH 22/26] fix: apply suggestions from code review Signed-off-by: anna-charlotte --- docarray/documents/video.py | 18 +++++++++--------- .../predefined_document/test_video.py | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docarray/documents/video.py b/docarray/documents/video.py index 99ec9733d6e..dd011b796fc 100644 --- a/docarray/documents/video.py +++ b/docarray/documents/video.py @@ -1,8 +1,8 @@ from typing import Optional, TypeVar from docarray.base_document import BaseDocument +from docarray.documents import Audio from docarray.typing import AnyEmbedding, AnyTensor -from docarray.typing.tensor.audio.audio_tensor import AudioTensor from docarray.typing.tensor.video.video_tensor import VideoTensor from docarray.typing.url.video_url import VideoUrl @@ -12,10 +12,10 @@ class Video(BaseDocument): """ Document for handling video. - The Video Document can contain a VideoUrl (`Video.url`), an AudioTensor - (`Video.audio_tensor`), a VideoTensor (`Video.video_tensor`), an AnyTensor - representing the indices of the video's key frames (`Video.key_frame_indices`), - and an AnyEmbedding (`Video.embedding`). + The Video Document can contain a VideoUrl (`Video.url`), an Audio Document + (`Video.audio`), a VideoTensor (`Video.video_tensor`), an AnyTensor representing + the indices of the video's key frames (`Video.key_frame_indices`) and an + AnyEmbedding (`Video.embedding`). EXAMPLE USAGE: @@ -29,7 +29,7 @@ class Video(BaseDocument): vid = Video( url='https://github.com/docarray/docarray/tree/feat-add-video-v2/tests/toydata/mov_bbb.mp4?raw=true' ) - vid.audio_tensor, vid.video_tensor, vid.key_frame_indices = vid.url.load() + vid.audio.tensor, vid.video_tensor, vid.key_frame_indices = vid.url.load() model = MyEmbeddingModel() vid.embedding = model(vid.video_tensor) @@ -50,7 +50,7 @@ class MyVideo(Video): video = MyVideo( url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' ) - video.video_tensor = video.url.load(only_keyframes=True) + video.video_tensor = video.url.load_key_frames() model = MyEmbeddingModel() video.embedding = model(video.video_tensor) video.name = Text(text='my first video') @@ -75,11 +75,11 @@ class MultiModalDoc(BaseDocument): ), text=Text(text='hello world, how are you doing?'), ) - mmdoc.video.video_tensor = mmdoc.video.url.load(only_keyframes=True) + mmdoc.video.video_tensor = mmdoc.video.url.load_key_frames() """ url: Optional[VideoUrl] - audio_tensor: Optional[AudioTensor] + audio: Optional[Audio] = Audio() video_tensor: Optional[VideoTensor] key_frame_indices: Optional[AnyTensor] embedding: Optional[AnyEmbedding] diff --git a/tests/integrations/predefined_document/test_video.py b/tests/integrations/predefined_document/test_video.py index 2522ba2801e..85cc451e851 100644 --- a/tests/integrations/predefined_document/test_video.py +++ b/tests/integrations/predefined_document/test_video.py @@ -13,8 +13,8 @@ @pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE]) def test_video(file_url): vid = Video(url=file_url) - vid.audio_tensor, vid.video_tensor, vid.key_frame_indices = vid.url.load() + vid.audio.tensor, vid.video_tensor, vid.key_frame_indices = vid.url.load() - assert isinstance(vid.audio_tensor, AudioNdArray) + assert isinstance(vid.audio.tensor, AudioNdArray) assert isinstance(vid.video_tensor, VideoNdArray) assert isinstance(vid.key_frame_indices, NdArray) From 07ceae8eb85e67888e742fef66390b5e43d9c2b7 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Tue, 17 Jan 2023 11:15:17 +0100 Subject: [PATCH 23/26] fix: small change to trigger ci again Signed-off-by: anna-charlotte --- docarray/typing/tensor/video/video_tensor_mixin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docarray/typing/tensor/video/video_tensor_mixin.py b/docarray/typing/tensor/video/video_tensor_mixin.py index 01d5184f824..95ec7a9dfb5 100644 --- a/docarray/typing/tensor/video/video_tensor_mixin.py +++ b/docarray/typing/tensor/video/video_tensor_mixin.py @@ -17,7 +17,6 @@ def validate_shape( cls: Union[Type['VideoTorchTensor'], Type['VideoNdArray']], value: 'T' ) -> 'T': comp_backend = cls.get_comp_backend() - if ( comp_backend.n_dim(value) not in [3, 4] # type: ignore or comp_backend.shape(value)[-1] != 3 # type: ignore From c2e129d32973638de6344cf60ff15720130fc19c Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Tue, 17 Jan 2023 11:22:14 +0100 Subject: [PATCH 24/26] fix: extract shape var Signed-off-by: anna-charlotte --- docarray/typing/tensor/video/video_tensor_mixin.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docarray/typing/tensor/video/video_tensor_mixin.py b/docarray/typing/tensor/video/video_tensor_mixin.py index 95ec7a9dfb5..6decae2ca3f 100644 --- a/docarray/typing/tensor/video/video_tensor_mixin.py +++ b/docarray/typing/tensor/video/video_tensor_mixin.py @@ -17,13 +17,11 @@ def validate_shape( cls: Union[Type['VideoTorchTensor'], Type['VideoNdArray']], value: 'T' ) -> 'T': comp_backend = cls.get_comp_backend() - if ( - comp_backend.n_dim(value) not in [3, 4] # type: ignore - or comp_backend.shape(value)[-1] != 3 # type: ignore - ): + shape = comp_backend.shape(value) # type: ignore + if comp_backend.n_dim(value) not in [3, 4] or shape[-1] != 3: # type: ignore raise ValueError( f'Expects tensor with 3 or 4 dimensions and the last dimension equal ' - f'to 3, but received {comp_backend.shape(value)}.' # type: ignore + f'to 3, but received {shape}.' ) else: return value From d50ae67fcb2b9462304a1b99d8fd622b431680e6 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Tue, 17 Jan 2023 14:42:17 +0100 Subject: [PATCH 25/26] fix: introduce compbackendinterface Signed-off-by: anna-charlotte --- docarray/typing/tensor/video/video_ndarray.py | 2 +- .../typing/tensor/video/video_tensor_mixin.py | 26 ++++++++++++------- .../typing/tensor/video/video_torch_tensor.py | 2 +- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/docarray/typing/tensor/video/video_ndarray.py b/docarray/typing/tensor/video/video_ndarray.py index 97c07afbdca..5cf6efc0057 100644 --- a/docarray/typing/tensor/video/video_ndarray.py +++ b/docarray/typing/tensor/video/video_ndarray.py @@ -31,4 +31,4 @@ def validate( config: 'BaseConfig', ) -> T: tensor = super().validate(value=value, field=field, config=config) - return VideoTensorMixin.validate_shape(cls, value=tensor) + return cls.validate_shape(value=tensor) diff --git a/docarray/typing/tensor/video/video_tensor_mixin.py b/docarray/typing/tensor/video/video_tensor_mixin.py index 6decae2ca3f..ac69cd5dea6 100644 --- a/docarray/typing/tensor/video/video_tensor_mixin.py +++ b/docarray/typing/tensor/video/video_tensor_mixin.py @@ -1,3 +1,4 @@ +import abc from typing import TYPE_CHECKING, BinaryIO, Optional, Type, TypeVar, Union import numpy as np @@ -5,23 +6,28 @@ from docarray.typing.tensor.audio.audio_tensor import AudioTensor if TYPE_CHECKING: - from docarray.typing import VideoNdArray, VideoTorchTensor - + from docarray.typing.tensor.abstract_tensor import AbstractTensor T = TypeVar('T', bound='VideoTensorMixin') +TT = TypeVar('TT', bound='AbstractTensor') -class VideoTensorMixin: +class CompBackendInterface(abc.ABC): @staticmethod - def validate_shape( - cls: Union[Type['VideoTorchTensor'], Type['VideoNdArray']], value: 'T' - ) -> 'T': - comp_backend = cls.get_comp_backend() - shape = comp_backend.shape(value) # type: ignore - if comp_backend.n_dim(value) not in [3, 4] or shape[-1] != 3: # type: ignore + @abc.abstractmethod + def get_comp_backend(): + """The computational backend compatible with this tensor type.""" + ... + + +class VideoTensorMixin(CompBackendInterface, abc.ABC): + @classmethod + def validate_shape(cls: Type['T'], value: 'T') -> 'T': + comp_be = cls.get_comp_backend() + if comp_be.n_dim(value) not in [3, 4] or comp_be.shape(value)[-1] != 3: raise ValueError( f'Expects tensor with 3 or 4 dimensions and the last dimension equal ' - f'to 3, but received {shape}.' + f'to 3, but received {comp_be.shape(value)}.' ) else: return value diff --git a/docarray/typing/tensor/video/video_torch_tensor.py b/docarray/typing/tensor/video/video_torch_tensor.py index 5e2953b4231..60dce18da3f 100644 --- a/docarray/typing/tensor/video/video_torch_tensor.py +++ b/docarray/typing/tensor/video/video_torch_tensor.py @@ -31,4 +31,4 @@ def validate( config: 'BaseConfig', ) -> T: tensor = super().validate(value=value, field=field, config=config) - return VideoTensorMixin.validate_shape(cls, value=tensor) + return cls.validate_shape(value=tensor) From 2e365e6017d19f013360b76a465a06e050783803 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Tue, 17 Jan 2023 15:19:10 +0100 Subject: [PATCH 26/26] fix: revert previous pr and fix for mypy Signed-off-by: anna-charlotte --- .../typing/tensor/video/video_tensor_mixin.py | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/docarray/typing/tensor/video/video_tensor_mixin.py b/docarray/typing/tensor/video/video_tensor_mixin.py index ac69cd5dea6..1d4c2206e9d 100644 --- a/docarray/typing/tensor/video/video_tensor_mixin.py +++ b/docarray/typing/tensor/video/video_tensor_mixin.py @@ -1,33 +1,23 @@ import abc -from typing import TYPE_CHECKING, BinaryIO, Optional, Type, TypeVar, Union +from typing import BinaryIO, Optional, Type, TypeVar, Union import numpy as np +from docarray.typing.tensor.abstract_tensor import AbstractTensor from docarray.typing.tensor.audio.audio_tensor import AudioTensor -if TYPE_CHECKING: - from docarray.typing.tensor.abstract_tensor import AbstractTensor +T = TypeVar('T', bound='AbstractTensor') -T = TypeVar('T', bound='VideoTensorMixin') -TT = TypeVar('TT', bound='AbstractTensor') - -class CompBackendInterface(abc.ABC): - @staticmethod - @abc.abstractmethod - def get_comp_backend(): - """The computational backend compatible with this tensor type.""" - ... - - -class VideoTensorMixin(CompBackendInterface, abc.ABC): +class VideoTensorMixin(AbstractTensor, abc.ABC): @classmethod def validate_shape(cls: Type['T'], value: 'T') -> 'T': comp_be = cls.get_comp_backend() - if comp_be.n_dim(value) not in [3, 4] or comp_be.shape(value)[-1] != 3: + shape = comp_be.shape(value) # type: ignore + if comp_be.n_dim(value) not in [3, 4] or shape[-1] != 3: # type: ignore raise ValueError( f'Expects tensor with 3 or 4 dimensions and the last dimension equal ' - f'to 3, but received {comp_be.shape(value)}.' + f'to 3, but received {shape}.' ) else: return value