From 1c903d90a0c91ba7e70a1525acada96aee8dab8b Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Thu, 22 Feb 2024 19:44:36 +0530 Subject: [PATCH 1/9] feat: add text asset --- videodb/_constants.py | 32 +++++++++++++++++++++++++++++++- videodb/asset.py | 29 ++++++++++++++++++++++++++++- videodb/timeline.py | 18 +++++++++++++----- 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/videodb/_constants.py b/videodb/_constants.py index 7c2d317..a960a0d 100644 --- a/videodb/_constants.py +++ b/videodb/_constants.py @@ -1,5 +1,5 @@ """Constants used in the videodb package.""" - +from typing import Union from dataclasses import dataclass VIDEO_DB_API: str = "https://api.videodb.io" @@ -103,3 +103,33 @@ class SubtitleStyle: margin_l: int = 10 margin_r: int = 10 margin_v: int = 10 + + +@dataclass +class TextStyle: + fontsize: int = 24 + fontcolor: str = "black" + fontcolor_expr: str = "" + alpha: float = 1.0 + font: str = "Sans" + ft_load_flags: str = "default" + box: bool = True + boxcolor: str = "white" + boxborderw: str = "10" + boxw: int = 0 + boxh: int = 0 + line_spacing: int = 0 + text_align: str = "T" + y_align: str = "text" + borderw: int = 0 + bordercolor: str = "black" + expansion: str = "normal" + basetime: int = 0 + fix_bounds: bool = False + text_shaping: bool = True + shadowcolor: str = "black" + shadowx: int = 0 + shadowy: int = 0 + tabsize: int = 4 + x: Union[str, int] = "(main_w-text_w)/2" + y: Union[str, int] = "(main_h-text_h)/2" diff --git a/videodb/asset.py b/videodb/asset.py index e64a103..f831fc3 100644 --- a/videodb/asset.py +++ b/videodb/asset.py @@ -1,9 +1,10 @@ import copy import logging +import uuid from typing import Optional, Union -from videodb._constants import MaxSupported +from videodb._constants import MaxSupported, TextStyle logger = logging.getLogger(__name__) @@ -117,3 +118,29 @@ def __repr__(self) -> str: f"y={self.y}, " f"duration={self.duration})" ) + + +class TextAsset(MediaAsset): + def __init__( + self, + duration: Optional[int] = None, + style: TextStyle = TextStyle(), + ) -> None: + super().__init__(f"t-{str(uuid.uuid4())}") + self.duration = duration + self.style: TextStyle = style + + def to_json(self) -> dict: + return { + "asset_id": copy.deepcopy(self.asset_id), + "duration": copy.deepcopy(self.duration), + "style": copy.deepcopy(self.style.__dict__), + } + + def __repr__(self) -> str: + return ( + f"TextAsset(" + f"asset_id={self.asset_id}, " + f"duration={self.duration}, " + f"style={self.style})" + ) diff --git a/videodb/timeline.py b/videodb/timeline.py index 96b66bf..c4b63ce 100644 --- a/videodb/timeline.py +++ b/videodb/timeline.py @@ -1,7 +1,7 @@ from typing import Union from videodb._constants import ApiPath -from videodb.asset import VideoAsset, AudioAsset, ImageAsset +from videodb.asset import VideoAsset, AudioAsset, ImageAsset, TextAsset class Timeline(object): @@ -23,14 +23,22 @@ def to_json(self) -> dict: timeline_json.append(asset.to_json()) return {"timeline": timeline_json} - def add_inline(self, asset: Union[VideoAsset]) -> None: + def add_inline(self, asset: VideoAsset) -> None: if not isinstance(asset, VideoAsset): raise ValueError("asset must be of type VideoAsset") self._timeline.append(asset) - def add_overlay(self, start: int, asset: Union[AudioAsset, ImageAsset]) -> None: - if not isinstance(asset, AudioAsset) and not isinstance(asset, ImageAsset): - raise ValueError("asset must be of type AudioAsset or ImageAsset") + def add_overlay( + self, start: int, asset: Union[AudioAsset, ImageAsset, TextAsset] + ) -> None: + if ( + not isinstance(asset, AudioAsset) + and not isinstance(asset, ImageAsset) + and not isinstance(asset, TextAsset) + ): + raise ValueError( + "asset must be of type AudioAsset, ImageAsset or TextAsset" + ) self._timeline.append((start, asset)) def generate_stream(self) -> str: From 620ba99aa07314f1e1a166b0f10b46168a2f731d Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Sun, 25 Feb 2024 21:56:52 +0530 Subject: [PATCH 2/9] feat: add scene index and search --- videodb/__init__.py | 2 ++ videodb/_constants.py | 9 +++++++ videodb/search.py | 39 ++++++++++++++++++++++++++- videodb/video.py | 62 +++++++++++++++++++++++++++++++++++++------ 4 files changed, 103 insertions(+), 9 deletions(-) diff --git a/videodb/__init__.py b/videodb/__init__.py index 2fd4d95..2141255 100644 --- a/videodb/__init__.py +++ b/videodb/__init__.py @@ -8,6 +8,7 @@ from videodb._constants import ( VIDEO_DB_API, MediaType, + SceneModels, SearchType, SubtitleAlignment, SubtitleBorderStyle, @@ -37,6 +38,7 @@ "SubtitleAlignment", "SubtitleBorderStyle", "SubtitleStyle", + "SceneModels", ] diff --git a/videodb/_constants.py b/videodb/_constants.py index 7c2d317..895eb0e 100644 --- a/videodb/_constants.py +++ b/videodb/_constants.py @@ -14,10 +14,18 @@ class MediaType: class SearchType: semantic = "semantic" keyword = "keyword" + scene = "scene" class IndexType: semantic = "semantic" + scene = "scene" + + +class SceneModels: + gemini_vision: str = "gemini-vision" + gpt4_vision: str = "gpt4-v" + all: str = "all" class Workflows: @@ -44,6 +52,7 @@ class ApiPath: compile = "compile" workflow = "workflow" timeline = "timeline" + delete = "delete" class Status: diff --git a/videodb/search.py b/videodb/search.py index 49db816..2e2aca3 100644 --- a/videodb/search.py +++ b/videodb/search.py @@ -112,6 +112,7 @@ def search_inside_video( result_threshold: Optional[int] = None, score_threshold: Optional[int] = None, dynamic_score_percentage: Optional[int] = None, + **kwargs, ): search_data = self._connection.post( path=f"{ApiPath.video}/{video_id}/{ApiPath.search}", @@ -133,6 +134,7 @@ def search_inside_collection( result_threshold: Optional[int] = None, score_threshold: Optional[int] = None, dynamic_score_percentage: Optional[int] = None, + **kwargs, ): search_data = self._connection.post( path=f"{ApiPath.collection}/{collection_id}/{ApiPath.search}", @@ -176,7 +178,42 @@ def search_inside_collection(**kwargs): raise NotImplementedError("Keyword search will be implemented in the future") -search_type = {SearchType.semantic: SemanticSearch, SearchType.keyword: KeywordSearch} +class SceneSearch(Search): + def __init__(self, _connection): + self._connection = _connection + + def search_inside_video( + self, + video_id: str, + query: str, + scene_model: Optional[str] = None, + result_threshold: Optional[int] = None, + score_threshold: Optional[int] = None, + dynamic_score_percentage: Optional[int] = None, + **kwargs, + ): + search_data = self._connection.post( + path=f"{ApiPath.video}/{video_id}/{ApiPath.search}", + data={ + "index_type": SearchType.scene, + "query": query, + "model_name": scene_model, + "score_threshold": score_threshold, + "result_threshold": result_threshold, + }, + ) + print(search_data) + return SearchResult(self._connection, **search_data) + + def search_inside_collection(**kwargs): + raise NotImplementedError("Scene search will be implemented in the future") + + +search_type = { + SearchType.semantic: SemanticSearch, + SearchType.keyword: KeywordSearch, + SearchType.scene: SceneSearch, +} class SearchFactory: diff --git a/videodb/video.py b/videodb/video.py index 6e8f3dc..22d9bf4 100644 --- a/videodb/video.py +++ b/videodb/video.py @@ -1,11 +1,12 @@ -from typing import Optional +from typing import Optional, Union from videodb._utils._video import play_stream from videodb._constants import ( ApiPath, - SearchType, IndexType, - Workflows, + SceneModels, + SearchType, SubtitleStyle, + Workflows, ) from videodb.search import SearchFactory, SearchResult from videodb.shot import Shot @@ -24,6 +25,7 @@ def __init__(self, _connection, id: str, collection_id: str, **kwargs) -> None: self.length = float(kwargs.get("length", 0.0)) self.transcript = kwargs.get("transcript", None) self.transcript_text = kwargs.get("transcript_text", None) + self.scenes = kwargs.get("scenes", None) def __repr__(self) -> str: return ( @@ -45,17 +47,19 @@ def search( self, query: str, search_type: Optional[str] = SearchType.semantic, + scene_model: Optional[str] = SceneModels.gemini_vision, result_threshold: Optional[int] = None, score_threshold: Optional[int] = None, dynamic_score_percentage: Optional[int] = None, ) -> SearchResult: search = SearchFactory(self._connection).get_search(search_type) return search.search_inside_video( - self.id, - query, - result_threshold, - score_threshold, - dynamic_score_percentage, + video_id=self.id, + query=query, + result_threshold=result_threshold, + score_threshold=score_threshold, + dynamic_score_percentage=dynamic_score_percentage, + scene_model=scene_model, ) def delete(self) -> None: @@ -130,6 +134,48 @@ def index_spoken_words(self) -> None: }, ) + def index_scenes( + self, + scene_model: str = SceneModels.gemini_vision, + force: bool = False, + prompt: str = None, + callback_url: str = None, + ) -> None: + self._connection.post( + path=f"{ApiPath.video}/{self.id}/{ApiPath.index}", + data={ + "index_type": IndexType.scene, + "model_name": scene_model, + "force": force, + "prompt": prompt, + "callback_url": callback_url, + }, + ) + + def get_scenes( + self, scene_model: str = SceneModels.gemini_vision + ) -> Union[list, None]: + if self.scenes: + return self.scenes + scene_data = self._connection.get( + path=f"{ApiPath.video}/{self.id}/{ApiPath.index}", + params={ + "index_type": IndexType.scene, + "model_name": scene_model, + }, + ) + self.scenes = scene_data + return scene_data if scene_data else None + + def delete_scene_index(self, scene_model: str = SceneModels.gemini_vision) -> None: + self._connection.post( + path=f"{ApiPath.video}/{self.id}/{ApiPath.index}/{ApiPath.delete}", + data={ + "index_type": IndexType.scene, + "model_name": scene_model, + }, + ) + def add_subtitle(self, style: SubtitleStyle = SubtitleStyle()) -> str: if not isinstance(style, SubtitleStyle): raise ValueError("style must be of type SubtitleStyle") From a144df1fd508ab8ce7146938e3e5e29c51fda89d Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Sun, 25 Feb 2024 22:10:11 +0530 Subject: [PATCH 3/9] fix: collection search. --- videodb/collection.py | 14 +++++++------- videodb/search.py | 5 ++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/videodb/collection.py b/videodb/collection.py index 489a56b..b70a716 100644 --- a/videodb/collection.py +++ b/videodb/collection.py @@ -69,18 +69,18 @@ def delete_image(self, image_id: str) -> None: def search( self, query: str, - type: Optional[str] = SearchType.semantic, + search_type: Optional[str] = SearchType.semantic, result_threshold: Optional[int] = None, score_threshold: Optional[int] = None, dynamic_score_percentage: Optional[int] = None, ) -> SearchResult: - search = SearchFactory(self._connection).get_search(type) + search = SearchFactory(self._connection).get_search(search_type) return search.search_inside_collection( - self.id, - query, - result_threshold, - score_threshold, - dynamic_score_percentage, + collection_id=self.id, + query=query, + result_threshold=result_threshold, + score_threshold=score_threshold, + dynamic_score_percentage=dynamic_score_percentage, ) def upload( diff --git a/videodb/search.py b/videodb/search.py index 2e2aca3..fcba062 100644 --- a/videodb/search.py +++ b/videodb/search.py @@ -174,7 +174,7 @@ def search_inside_video( ) return SearchResult(self._connection, **search_data) - def search_inside_collection(**kwargs): + def search_inside_collection(self, **kwargs): raise NotImplementedError("Keyword search will be implemented in the future") @@ -202,10 +202,9 @@ def search_inside_video( "result_threshold": result_threshold, }, ) - print(search_data) return SearchResult(self._connection, **search_data) - def search_inside_collection(**kwargs): + def search_inside_collection(self, **kwargs): raise NotImplementedError("Scene search will be implemented in the future") From b1680548f5606745be216b8e50150633f13017c7 Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Mon, 26 Feb 2024 17:28:57 +0530 Subject: [PATCH 4/9] fix: delete scenes --- videodb/video.py | 1 + 1 file changed, 1 insertion(+) diff --git a/videodb/video.py b/videodb/video.py index 22d9bf4..121549d 100644 --- a/videodb/video.py +++ b/videodb/video.py @@ -175,6 +175,7 @@ def delete_scene_index(self, scene_model: str = SceneModels.gemini_vision) -> No "model_name": scene_model, }, ) + self.scenes = None def add_subtitle(self, style: SubtitleStyle = SubtitleStyle()) -> str: if not isinstance(style, SubtitleStyle): From 29e8c4f2b8c773e29d3f709645296b5c2e9bde82 Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Tue, 27 Feb 2024 19:01:01 +0530 Subject: [PATCH 5/9] fix: text asset --- videodb/__init__.py | 2 ++ videodb/asset.py | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/videodb/__init__.py b/videodb/__init__.py index 54abd12..ba82645 100644 --- a/videodb/__init__.py +++ b/videodb/__init__.py @@ -12,6 +12,7 @@ SubtitleAlignment, SubtitleBorderStyle, SubtitleStyle, + TextStyle, ) from videodb.client import Connection from videodb.exceptions import ( @@ -37,6 +38,7 @@ "SubtitleAlignment", "SubtitleBorderStyle", "SubtitleStyle", + "TextStyle", ] diff --git a/videodb/asset.py b/videodb/asset.py index f831fc3..73fd8e7 100644 --- a/videodb/asset.py +++ b/videodb/asset.py @@ -123,15 +123,18 @@ def __repr__(self) -> str: class TextAsset(MediaAsset): def __init__( self, + text: str, duration: Optional[int] = None, style: TextStyle = TextStyle(), ) -> None: - super().__init__(f"t-{str(uuid.uuid4())}") + super().__init__(f"txt-{str(uuid.uuid4())}") + self.text = text self.duration = duration self.style: TextStyle = style def to_json(self) -> dict: return { + "text": copy.deepcopy(self.text), "asset_id": copy.deepcopy(self.asset_id), "duration": copy.deepcopy(self.duration), "style": copy.deepcopy(self.style.__dict__), @@ -140,6 +143,7 @@ def to_json(self) -> dict: def __repr__(self) -> str: return ( f"TextAsset(" + f"text={self.text}, " f"asset_id={self.asset_id}, " f"duration={self.duration}, " f"style={self.style})" From 962e4d4a5c546f2f0713cfca8bd8590b10461b60 Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Mon, 4 Mar 2024 16:26:56 +0530 Subject: [PATCH 6/9] fix: remove scenemodels --- videodb/__init__.py | 2 -- videodb/_constants.py | 6 ------ videodb/_utils/_http_client.py | 4 ++-- videodb/search.py | 2 -- videodb/video.py | 13 ++----------- 5 files changed, 4 insertions(+), 23 deletions(-) diff --git a/videodb/__init__.py b/videodb/__init__.py index 2141255..2fd4d95 100644 --- a/videodb/__init__.py +++ b/videodb/__init__.py @@ -8,7 +8,6 @@ from videodb._constants import ( VIDEO_DB_API, MediaType, - SceneModels, SearchType, SubtitleAlignment, SubtitleBorderStyle, @@ -38,7 +37,6 @@ "SubtitleAlignment", "SubtitleBorderStyle", "SubtitleStyle", - "SceneModels", ] diff --git a/videodb/_constants.py b/videodb/_constants.py index 895eb0e..3befbc3 100644 --- a/videodb/_constants.py +++ b/videodb/_constants.py @@ -22,12 +22,6 @@ class IndexType: scene = "scene" -class SceneModels: - gemini_vision: str = "gemini-vision" - gpt4_vision: str = "gpt4-v" - all: str = "all" - - class Workflows: add_subtitles = "add_subtitles" diff --git a/videodb/_utils/_http_client.py b/videodb/_utils/_http_client.py index ae8f0d0..4555411 100644 --- a/videodb/_utils/_http_client.py +++ b/videodb/_utils/_http_client.py @@ -133,7 +133,7 @@ def _get_output(self, url: str): response_json.get("status") == Status.in_progress or response_json.get("status") == Status.processing ): - percentage = response_json.get("data").get("percentage") + percentage = response_json.get("data", {}).get("percentage") if percentage and self.show_progress and self.progress_bar: self.progress_bar.n = int(percentage) self.progress_bar.update(0) @@ -169,7 +169,7 @@ def _parse_response(self, response: requests.Response): bar_format="{l_bar}{bar:100}{r_bar}{bar:-100b}", ) response_json = self._get_output( - response_json.get("data").get("output_url") + response_json.get("data", {}).get("output_url") ) if response_json.get("success"): return response_json.get("data") diff --git a/videodb/search.py b/videodb/search.py index fcba062..81ff27b 100644 --- a/videodb/search.py +++ b/videodb/search.py @@ -186,7 +186,6 @@ def search_inside_video( self, video_id: str, query: str, - scene_model: Optional[str] = None, result_threshold: Optional[int] = None, score_threshold: Optional[int] = None, dynamic_score_percentage: Optional[int] = None, @@ -197,7 +196,6 @@ def search_inside_video( data={ "index_type": SearchType.scene, "query": query, - "model_name": scene_model, "score_threshold": score_threshold, "result_threshold": result_threshold, }, diff --git a/videodb/video.py b/videodb/video.py index 121549d..faf78d0 100644 --- a/videodb/video.py +++ b/videodb/video.py @@ -3,7 +3,6 @@ from videodb._constants import ( ApiPath, IndexType, - SceneModels, SearchType, SubtitleStyle, Workflows, @@ -47,7 +46,6 @@ def search( self, query: str, search_type: Optional[str] = SearchType.semantic, - scene_model: Optional[str] = SceneModels.gemini_vision, result_threshold: Optional[int] = None, score_threshold: Optional[int] = None, dynamic_score_percentage: Optional[int] = None, @@ -59,7 +57,6 @@ def search( result_threshold=result_threshold, score_threshold=score_threshold, dynamic_score_percentage=dynamic_score_percentage, - scene_model=scene_model, ) def delete(self) -> None: @@ -136,7 +133,6 @@ def index_spoken_words(self) -> None: def index_scenes( self, - scene_model: str = SceneModels.gemini_vision, force: bool = False, prompt: str = None, callback_url: str = None, @@ -145,34 +141,29 @@ def index_scenes( path=f"{ApiPath.video}/{self.id}/{ApiPath.index}", data={ "index_type": IndexType.scene, - "model_name": scene_model, "force": force, "prompt": prompt, "callback_url": callback_url, }, ) - def get_scenes( - self, scene_model: str = SceneModels.gemini_vision - ) -> Union[list, None]: + def get_scenes(self) -> Union[list, None]: if self.scenes: return self.scenes scene_data = self._connection.get( path=f"{ApiPath.video}/{self.id}/{ApiPath.index}", params={ "index_type": IndexType.scene, - "model_name": scene_model, }, ) self.scenes = scene_data return scene_data if scene_data else None - def delete_scene_index(self, scene_model: str = SceneModels.gemini_vision) -> None: + def delete_scene_index(self) -> None: self._connection.post( path=f"{ApiPath.video}/{self.id}/{ApiPath.index}/{ApiPath.delete}", data={ "index_type": IndexType.scene, - "model_name": scene_model, }, ) self.scenes = None From dc108c425f3f61b952c894a5526634b2b83d375a Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Mon, 4 Mar 2024 17:12:01 +0530 Subject: [PATCH 7/9] fix: linter --- videodb/video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/videodb/video.py b/videodb/video.py index 1f7d7d7..a2bcbab 100644 --- a/videodb/video.py +++ b/videodb/video.py @@ -1,4 +1,4 @@ -from typing import Optional, List, Dict, Tuple +from typing import Optional, Union, List, Dict, Tuple from videodb._utils._video import play_stream from videodb._constants import ( ApiPath, From 89009c802fd2605b8e3f2bec2148d2866bd4403a Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Mon, 4 Mar 2024 17:19:28 +0530 Subject: [PATCH 8/9] fix: remove text style option --- videodb/_constants.py | 1 - videodb/asset.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/videodb/_constants.py b/videodb/_constants.py index ff8f453..50fe068 100644 --- a/videodb/_constants.py +++ b/videodb/_constants.py @@ -115,7 +115,6 @@ class TextStyle: fontcolor_expr: str = "" alpha: float = 1.0 font: str = "Sans" - ft_load_flags: str = "default" box: bool = True boxcolor: str = "white" boxborderw: str = "10" diff --git a/videodb/asset.py b/videodb/asset.py index 73fd8e7..6061b4b 100644 --- a/videodb/asset.py +++ b/videodb/asset.py @@ -33,8 +33,8 @@ class VideoAsset(MediaAsset): def __init__( self, asset_id: str, - start: Optional[int] = 0, - end: Optional[Union[int, None]] = None, + start: Optional[float] = 0, + end: Optional[float] = None, ) -> None: super().__init__(asset_id) self.start: int = start @@ -56,8 +56,8 @@ class AudioAsset(MediaAsset): def __init__( self, asset_id: str, - start: Optional[int] = 0, - end: Optional[Union[int, None]] = None, + start: Optional[float] = 0, + end: Optional[float] = None, disable_other_tracks: Optional[bool] = True, fade_in_duration: Optional[Union[int, float]] = 0, fade_out_duration: Optional[Union[int, float]] = 0, From 20b9c6730b442500337f2fcdcbc2ce69e9792ced Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Mon, 4 Mar 2024 17:20:21 +0530 Subject: [PATCH 9/9] build: version upgrade --- videodb/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/videodb/__init__.py b/videodb/__init__.py index 58f305a..51225eb 100644 --- a/videodb/__init__.py +++ b/videodb/__init__.py @@ -24,7 +24,7 @@ logger: logging.Logger = logging.getLogger("videodb") -__version__ = "0.0.5" +__version__ = "0.1.0" __author__ = "videodb" __all__ = [