10000 feat(v2): add video support by anna-charlotte · Pull Request #972 · docarray/docarray · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a452268
feat: add video url and tensors to proto
Jan 3, 2023
3ccb697
feat: add video url and video ndarray
Jan 3, 2023
dc957d1
feat: add video torch tensor and tests
Jan 4, 2023
fc86920
fix: mypy checks
Jan 4, 2023
8a55e0b
chore: add av to video extra
Jan 4, 2023
5cb098a
fix: allow dim 3
Jan 4, 2023
3ba1f78
test: wip video load and save
Jan 5, 2023
be63926
refactor: move to numpy to computational backend
Jan 6, 2023
395a495
fix: video load and save
Jan 11, 2023
406ec80
test: adjust tests
Jan 11, 2023
091e79a
fix: video load and save and add docstrings
Jan 11, 2023
dee1146
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 11, 2023
e4106a8
fix: fix some imports after merging
Jan 11, 2023
23ee930
docs: add doc strings and fix example urls
Jan 11, 2023
7ab8dbd
docs: small fixes in docs
Jan 11, 2023
ecf01d8
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 11, 2023
5295dd1
refactor: rename save to mp4 file to save
Jan 11, 2023
b3f2ccb
feat: add shape method to comp backend
Jan 16, 2023
20ecf2c
refactor: move validate shape to video tensor mixin
Jan 16, 2023
711d105
refactor: extract private load and make separate methods for frames
Jan 16, 2023
0c9c1fd
fix: use torch shape instead of size method
Jan 16, 2023
e3a465c
fix: add typehint to shape in comp backend
Jan 16, 2023
40eac93
docs: add supported strings for skip type
Jan 16, 2023
a700f30
fix: apply suggestions from code review
Jan 17, 2023
94572fd
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 17, 2023
07ceae8
fix: small change to trigger ci again
Jan 17, 2023
c2e129d
fix: extract shape var
Jan 17, 2023
d50ae67
fix: introduce compbackendinterface
Jan 17, 2023
2e365e6
fix: revert previous pr and fix for mypy
Jan 17, 2023
c44a035
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 17, 2023
95b0b81
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion docarray/computation/abstract_comp_backend.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import typing
from abc import ABC, abstractmethod
from typing import List, Optional, Tuple, TypeVar, Union, overload
from typing import TYPE_CHECKING, List, Optional, Tuple, TypeVar, Union, overload

if TYPE_CHECKING:
import numpy as np

# In practice all of the below will be the same type
TTensor = TypeVar('TTensor')
Expand Down Expand Up @@ -30,6 +33,17 @@ def stack(
@staticmethod
@abstractmethod
def n_dim(array: 'TTensor') -> int:
"""
Get the number of the array dimensions.
"""
...

@staticmethod
@abstractmethod
def to_numpy(array: 'TTensor') -> 'np.ndarray':
"""
Convert array to np.ndarray.
"""
...

@staticmethod
Expand Down
4 changes: 4 additions & 0 deletions docarray/computation/numpy_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ def to_device(
def n_dim(array: 'np.ndarray') -> int:
return array.ndim

@staticmethod
def to_numpy(array: 'np.ndarray') -> 'np.ndarray':
return array

@staticmethod
def empty(shape: Tuple[int, ...]) -> 'np.ndarray':
return np.empty(shape)
Expand Down
5 changes: 5 additions & 0 deletions docarray/computation/torch_backend.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union, overload

import numpy as np
import torch

from docarray.computation.abstract_comp_backend import AbstractComputationalBackend
Expand Down Expand Up @@ -68,6 +69,10 @@ def empty(shape: Tuple[int, ...]) -> torch.Tensor:
def n_dim(array: 'torch.Tensor') -> int:
return array.ndim

@staticmethod
def to_numpy(array: 'torch.Tensor') -> 'np.ndarray':
return array.cpu().detach().numpy()

@staticmethod
def none_value() -> Any:
"""Provide a compatible value that represents None in torch."""
Expand Down
3 changes: 2 additions & 1 deletion docarray/documents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from docarray.documents.mesh import Mesh3D
from docarray.documents.point_cloud import PointCloud3D
from docarray.documents.text import Text
from docarray.documents.video import Video

__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D']
__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D', 'Video']
8 changes: 4 additions & 4 deletions docarray/documents/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Audio(BaseDocument):

# use it directly
audio = Audio(
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true'
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true'
)
audio.tensor = audio.url.load()
model = MyEmbeddingModel()
Expand All @@ -43,12 +43,12 @@ class MyAudio(Audio):


audio = MyAudio(
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true'
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true'
)
audio.tensor = audio.url.load()
model = MyEmbeddingModel()
audio.embedding = model(audio.tensor)
audio.name = 'my first audio'
audio.name = Text(text='my first audio')


You can use this Document for composition:
Expand All @@ -66,7 +66,7 @@ class MultiModalDoc(Document):

mmdoc = MultiModalDoc(
audio=Audio(
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true'
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true'
),
text=Text(text='hello world, how are you doing?'),
)
Expand Down
85 changes: 85 additions & 0 deletions docarray/documents/video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from typing import Optional, TypeVar

from docarray.base_document import BaseDocument
from docarray.documents import Audio
from docarray.typing import AnyEmbedding, AnyTensor
from docarray.typing.tensor.video.video_tensor import VideoTensor
from docarray.typing.url.video_url import VideoUrl

T = TypeVar('T', bound='Video')


class Video(BaseDocument):
"""
Document for handling video.
The Video Document can contain a VideoUrl (`Video.url`), an Audio Document
(`Video.audio`), a VideoTensor (`Video.video_tensor`), an AnyTensor representing
the indices of the video's key frames (`Video.key_frame_indices`) and an
AnyEmbedding (`Video.embedding`).

EXAMPLE USAGE:

You can use this Document directly:

.. code-block:: python

from docarray.documents import Video

# use it directly
vid = Video(
url='https://github.com/docarray/docarray/tree/feat-add-video-v2/tests/toydata/mov_bbb.mp4?raw=true'
)
vid.audio.tensor, vid.video_tensor, vid.key_frame_indices = vid.url.load()
model = MyEmbeddingModel()
vid.embedding = model(vid.video_tensor)

You can extend this Document:

.. code-block:: python

from typing import Optional

from docarray.documents import Text, Video


# extend it
class MyVideo(Video):
name: Optional[Text]


video = MyVideo(
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true'
)
video.video_tensor = video.url.load_key_frames()
model = MyEmbeddingModel()
video.embedding = model(video.video_tensor)
video.name = Text(text='my first video')

You can use this Document for composition:

.. code-block:: python

from docarray import BaseDocument
from docarray.documents import Text, Video


# compose it
class MultiModalDoc(BaseDocument):
video: Video
text: Text


mmdoc = MultiModalDoc(
video=Video(
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true'
),
text=Text(text='hello world, how are you doing?'),
)
mmdoc.video.video_tensor = mmdoc.video.url.load_key_frames()
"""

url: Optional[VideoUrl]
audio: Optional[Audio] = Audio()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not None as a default value ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .load() from VideoUrl returns an AudioNdArray (and some other stuff), which can't be written to video.audio.tensor if video.audio == None.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay makes sense

video_tensor: Optional[VideoTensor]
key_frame_indices: Optional[AnyTensor]
embedding: Optional[AnyEmbedding]
6 changes: 6 additions & 0 deletions docarray/proto/docarray.proto
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ message NodeProto {

NdArrayProto audio_torch_tensor = 16;

string video_url = 17;

NdArrayProto video_ndarray = 18;

NdArrayProto video_torch_tensor = 19;

}

}
Expand Down
28 changes: 14 additions & 14 deletions docarray/proto/pb2/docarray_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 9 additions & 2 deletions docarray/typing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,28 @@
from docarray.typing.tensor.embedding.embedding import AnyEmbedding
from docarray.typing.tensor.ndarray import NdArray
from docarray.typing.tensor.tensor import AnyTensor
from docarray.typing.tensor.video import VideoNdArray
from docarray.typing.url import (
AnyUrl,
AudioUrl,
ImageUrl,
Mesh3DUrl,
PointCloud3DUrl,
TextUrl,
VideoUrl,
)

__all__ = [
'AudioNdArray',
'NdArray',
'AudioNdArray',
'VideoNdArray',
'AnyEmbedding',
'ImageUrl',
'AudioUrl',
'TextUrl',
'Mesh3DUrl',
'PointCloud3DUrl',
'VideoUrl',
'AnyUrl',
'ID',
'AnyTensor',
Expand All @@ -33,5 +37,8 @@
else:
from docarray.typing.tensor import TorchEmbedding, TorchTensor # noqa: F401
from docarray.typing.tensor.audio.audio_torch_tensor import AudioTorchTensor # noqa
from docarray.typing.tensor.video.video_torch_tensor import VideoTorchTensor # noqa

__all__.extend(['AudioTorchTensor', 'TorchEmbedding', 'TorchTensor'])
__all__.extend(
['AudioTorchTensor', 'TorchEmbedding', 'TorchTensor', 'VideoTorchTensor']
)
12 changes: 12 additions & 0 deletions docarray/typing/tensor/video/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from docarray.typing.tensor.video.video_ndarray import VideoNdArray

__all__ = ['VideoNdArray']

try:
import torch # noqa: F401
except ImportError:
pass
else:
from docarray.typing.tensor.video.video_torch_tensor import VideoTorchTensor # noqa

__all__.extend(['VideoTorchTensor'])
34 changes: 34 additions & 0 deletions docarray/typing/tensor/video/video_ndarray.py