u2d-ai
diff --git a/‎docs/release-notes.md
Lines changed: 4 additions & 0 deletions b/‎docs/release-notes.md
Lines changed: 4 additions & 0 deletions
diff --git a/‎msaDocModels/__init__.py
Lines changed: 1 addition & 1 deletion b/‎msaDocModels/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎msaDocModels/sdu.py
Lines changed: 151 additions & 0 deletions b/‎msaDocModels/sdu.py
Lines changed: 151 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml
Lines changed: 1 addition & 1 deletion
@@ -1,6 +1,10 @@
 # msaDocModels Release Notes
 ## Possible future features:
 
+# 0.0.114
+
+- add models for ExtractorUniversal
+
 # 0.0.113
 
 - add ProcessingDocumentsInput model
 
@@ -1,7 +1,7 @@
 import glob
 from os.path import basename, dirname, isfile, join
 
-version = "0.0.113"
+version = "0.0.114"
 __author__ = "Stefan Welcker"
 __copyright__ = "Copyright 2022, U2D.ai"
 __license__ = "MIT"
 
@@ -7697,3 +7697,154 @@ class UnzipInputModel(BaseModel):
 
     archive_path: str
     unpack_nested: Optional[bool] = False
+
+
+class ExtractorUniversalBaseInput(BaseModel):
+    """
+    Base input model for the universal entity extraction.
+
+    Attributes:
+
+        entity_type: A dictionary specifying entity types and their values.
+        max_new_tokens : The maximum number of new tokens to generate. Defaults to 256.
+    """
+
+    entity_type: Dict[str, List[str]]
+    max_new_tokens: int = 256
+
+
+class ExtractorUniversalDocumentInput(ExtractorUniversalBaseInput, BaseDocumentInput):
+    """
+    Input model for document-level entity extraction.
+
+    Attributes:
+
+        result_output: The type of result to output (e.g., 'document', 'pages', 'paragraphs', 'sentences').
+            Defaults to ResultType.document.
+    """
+
+    result_output: ResultType = ResultType.document
+
+
+class ExtractorUniversalTextInput(ExtractorUniversalBaseInput):
+    """
+    Input model for text-level entity extraction.
+
+    Attributes:
+
+        input_text: The input text or texts to extract entities from.
+    """
+
+    input_text: Union[str, List[str], Dict[Any, str]]
+
+
+class ExtractorUniversalEntity(BaseModel):
+    """
+    Model representing an extracted entity.
+
+    Attributes:
+
+        result: The extracted entity text.
+        positions: The positions of the extracted entity in the input text.
+    """
+
+    result: str
+    positions: List[Dict[str, int]]
+
+
+class ExtractorUniversalDTO(BaseModel):
+    """
+    Model that represents the result from universal entity extraction.
+
+    Attributes:
+
+        prediction:
+
+            The extracted entities organized by entity type.
+    """
+
+    prediction: Union[
+        Dict[str, List[ExtractorUniversalEntity]],
+        List[Dict[str, List[ExtractorUniversalEntity]]],
+        Dict[str, Dict[str, List[ExtractorUniversalEntity]]],
+    ]
+
+
+class ExtractorUniversalPageResult(NestingId):
+    """
+    Model representing the result of entity extraction for a page.
+
+    Attributes:
+
+        result: The extracted entities organized by entity type.
+    """
+
+    result: Dict[str, List[ExtractorUniversalEntity]]
+
+
+class ExtractorUniversalSentenceResult(NestingId):
+    """
+    Model representing the result of entity extraction for a sentence.
+
+    Attributes:
+
+        result: The extracted entities organized by entity type.
+    """
+
+    result: Dict[str, List[ExtractorUniversalEntity]]
+
+
+class ExtractorUniversalParagraphSentences(NestingId):
+    """
+    Model representing the result of entity extraction for paragraphs with sentences.
+
+    Attributes:
+
+        sentences: List of sentence-level extraction results.
+    """
+
+    sentences: List[ExtractorUniversalSentenceResult]
+
+
+class ExtractorUniversalParagraphResult(NestingId):
+    """
+    Model representing the result of entity extraction for a paragraph.
+
+    Attributes:
+
+        result: The extracted entities organized by entity type.
+    """
+
+    result: Dict[str, List[ExtractorUniversalEntity]]
+
+
+class ExtractorUniversalPageParagraphs(NestingId):
+    """
+    Model representing the result of entity extraction for pages with paragraphs.
+
+    Attributes:
+
+        paragraphs: List of paragraph-level extraction results.
+    """
+
+    paragraphs: Union[
+        List[ExtractorUniversalParagraphResult],
+        List[ExtractorUniversalParagraphSentences],
+    ]
+
+
+class ExtractorUniversalDocumentDTO(BaseModel):
+    """
+    Model representing the result of universal entity extraction for a document.
+
+    Attributes:
+
+        extractor_universal: The extracted entities organized by entity type at various levels
+                            (document, page, paragraph, sentence).
+    """
+
+    extractor_universal: Union[
+        Dict[str, List[ExtractorUniversalEntity]],
+        List[ExtractorUniversalPageResult],
+        List[ExtractorUniversalPageParagraphs],
+    ]
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "msaDocModels"
-version = "0.0.113"
+version = "0.0.114"
 description = "MSA Document Pydantic Models and Schemas, used to store Parser, NLP, NLU and AI results for processed documents"
 authors = ["Stefan Welcker"]
 readme = "README.md"