docarray · JoanFM · Jan 27, 2023 · Jan 25, 2023 · Jan 25, 2023 · Jan 25, 2023
diff --git a/docarray/base_document/document.py b/docarray/base_document/document.py
@@ -43,3 +43,6 @@ def __str__(self):
             _console.print(self)
 
         return capture.get().strip()
+
+    def _get_string_for_regex_filter(self):
+        return str(self)
diff --git a/docarray/documents/text.py b/docarray/documents/text.py
@@ -65,6 +65,22 @@ class MultiModalDoc(BaseDocument):
             text_doc=Text(text="hello world, how are you doing?"),
         )
         mmdoc.text_doc.text = mmdoc.text_doc.url.load()
+
+    This Document can be compared against another Document of the same type or a string.
+    When compared against another object of the same type, the pydantic BaseModel
+    equality check will apply which checks the equality of every attribute,
+    including `id`. When compared against a str, it will check the equality
+    of the `text` attribute against the given string.
+
+    .. code-block:: python
+
+        from docarray.documents Text
+
+        doc = Text(text='This is the main text', url='exampleurl.com')
+        doc2 = Text(text='This is the main text', url='exampleurl.com')
+
+        doc == 'This is the main text' # True
+        doc == doc2 # False, their ids are not equivalent
     """
 
     text: Optional[str] = None
@@ -79,3 +95,33 @@ def validate(
         if isinstance(value, str):
             value = cls(text=value)
         return super().validate(value)
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, str):
+            return self.text == other
+        else:
+            # BaseModel has a default equality
+            return super().__eq__(other)
+
+    def __contains__(self, item: str) -> bool:
+        """
+        This method makes `Text` behave the same as an `str`.
+
+            .. code-block:: python
+
+            from docarray.documents import Text
+
+            t = Text(text='this is my text document')
+            assert 'text' in t
+            assert 'docarray' not in t
+
+        :param item: A string to be checked if is a substring of `text` attribute
+        :return: A boolean determining the presence of `item` as a substring in `text`
+        """
+        if self.text is not None:
+            return self.text.__contains__(item)
+        else:
+            return False
+
+    def _get_string_for_regex_filter(self):
+        return self.text
diff --git a/docarray/utils/filter.py b/docarray/utils/filter.py
@@ -0,0 +1,66 @@
+import json
+
+from typing import Union, Dict, List
+
+
+from docarray.array.abstract_array import AnyDocumentArray
+from docarray.array.array import DocumentArray
+
+
+def filter(
+    docs: AnyDocumentArray,
+    query: Union[str, Dict, List[Dict]],
+) -> AnyDocumentArray:
+    """
+    Filter the Documents in the index according to the given filter query.
+
+
+    EXAMPLE USAGE
+
+    .. code-block:: python
+
+        from docarray import DocumentArray, BaseDocument
+        from docarray.documents import Text, Image
+        from docarray.util.filter import filter
+
+
+        class MyDocument(BaseDocument):
+            caption: Text
+            image: Image
+            price: int
+
+
+        docs = DocumentArray[MyDocument](
+            [MyDocument(caption='A tiger in the jungle',
+            image=Image(url='tigerphoto.png'), price=100),
+            MyDocument(caption='A swimming turtle',
+            image=Image(url='turtlepic.png'), price=50),
+            MyDocument(caption='A couple birdwatching with binoculars',
+            image=Image(url='binocularsphoto.png'), price=30)]
+        )
+        query = {
+            '$and': {
+                'image.url': {'$regex': 'photo'},
+                'price': {'$lte': 50},
+            }
+        }
+
+        results = filter(docs, query)
+        assert len(results) == 1
+        assert results[0].price == 30
+        assert results[0].caption == 'A couple birdwatching with binoculars'
+        assert results[0].image.url == 'binocularsphoto.png'
+
+    :param docs: the DocumentArray where to apply the filter
+    :param query: the query to filter by
+    :return: A DocumentArray containing the Documents
+    in `docs` that fulfill the filter conditions in the `query`
+    """
+    from docarray.utils.query_language.query_parser import QueryParser
+
+    if query:
+        query = query if not isinstance(query, str) else json.loads(query)
+        parser = QueryParser(query)
+        return DocumentArray(d for d in docs if parser.evaluate(d))
+    else:
+        return docs
diff --git a/docarray/utils/query_language/__init__.py b/docarray/utils/query_language/__init__.py