@@ -7697,3 +7697,154 @@ class UnzipInputModel(BaseModel):
7697
7697
7698
7698
archive_path : str
7699
7699
unpack_nested : Optional [bool ] = False
7700
+
7701
+
7702
+ class ExtractorUniversalBaseInput (BaseModel ):
7703
+ """
7704
+ Base input model for the universal entity extraction.
7705
+
7706
+ Attributes:
7707
+
7708
+ entity_type: A dictionary specifying entity types and their values.
7709
+ max_new_tokens : The maximum number of new tokens to generate. Defaults to 256.
7710
+ """
7711
+
7712
+ entity_type : Dict [str , List [str ]]
7713
+ max_new_tokens : int = 256
7714
+
7715
+
7716
+ class ExtractorUniversalDocumentInput (ExtractorUniversalBaseInput , BaseDocumentInput ):
7717
+ """
7718
+ Input model for document-level entity extraction.
7719
+
7720
+ Attributes:
7721
+
7722
+ result_output: The type of result to output (e.g., 'document', 'pages', 'paragraphs', 'sentences').
7723
+ Defaults to ResultType.document.
7724
+ """
7725
+
7726
+ result_output : ResultType = ResultType .document
7727
+
7728
+
7729
+ class ExtractorUniversalTextInput (ExtractorUniversalBaseInput ):
7730
+ """
7731
+ Input model for text-level entity extraction.
7732
+
7733
+ Attributes:
7734
+
7735
+ input_text: The input text or texts to extract entities from.
7736
+ """
7737
+
7738
+ input_text : Union [str , List [str ], Dict [Any , str ]]
7739
+
7740
+
7741
+ class ExtractorUniversalEntity (BaseModel ):
7742
+ """
7743
+ Model representing an extracted entity.
7744
+
7745
+ Attributes:
7746
+
7747
+ result: The extracted entity text.
7748
+ positions: The positions of the extracted entity in the input text.
7749
+ """
7750
+
7751
+ result : str
7752
+ positions : List [Dict [str , int ]]
7753
+
7754
+
7755
+ class ExtractorUniversalDTO (BaseModel ):
7756
+ """
7757
+ Model that represents the result from universal entity extraction.
7758
+
7759
+ Attributes:
7760
+
7761
+ prediction:
7762
+
7763
+ The extracted entities organized by entity type.
7764
+ """
7765
+
7766
+ prediction : Union [
7767
+ Dict [str , List [ExtractorUniversalEntity ]],
7768
+ List [Dict [str , List [ExtractorUniversalEntity ]]],
7769
+ Dict [str , Dict [str , List [ExtractorUniversalEntity ]]],
7770
+ ]
7771
+
7772
+
7773
+ class ExtractorUniversalPageResult (NestingId ):
7774
+ """
7775
+ Model representing the result of entity extraction for a page.
7776
+
7777
+ Attributes:
7778
+
7779
+ result: The extracted entities organized by entity type.
7780
+ """
7781
+
7782
+ result : Dict [str , List [ExtractorUniversalEntity ]]
7783
+
7784
+
7785
+ class ExtractorUniversalSentenceResult (NestingId ):
7786
+ """
7787
+ Model representing the result of entity extraction for a sentence.
7788
+
7789
+ Attributes:
7790
+
7791
+ result: The extracted entities organized by entity type.
7792
+ """
7793
+
7794
+ result : Dict [str , List [ExtractorUniversalEntity ]]
7795
+
7796
+
7797
+ class ExtractorUniversalParagraphSentences (NestingId ):
7798
+ """
7799
+ Model representing the result of entity extraction for paragraphs with sentences.
7800
+
7801
+ Attributes:
7802
+
7803
+ sentences: List of sentence-level extraction results.
7804
+ """
7805
+
7806
+ sentences : List [ExtractorUniversalSentenceResult ]
7807
+
7808
+
7809
+ class ExtractorUniversalParagraphResult (NestingId ):
7810
+ """
7811
+ Model representing the result of entity extraction for a paragraph.
7812
+
7813
+ Attributes:
7814
+
7815
+ result: The extracted entities organized by entity type.
7816
+ """
7817
+
7818
+ result : Dict [str , List [ExtractorUniversalEntity ]]
7819
+
7820
+
7821
+ class ExtractorUniversalPageParagraphs (NestingId ):
7822
+ """
7823
+ Model representing the result of entity extraction for pages with paragraphs.
7824
+
7825
+ Attributes:
7826
+
7827
+ paragraphs: List of paragraph-level extraction results.
7828
+ """
7829
+
7830
+ paragraphs : Union [
7831
+ List [ExtractorUniversalParagraphResult ],
7832
+ List [ExtractorUniversalParagraphSentences ],
7833
+ ]
7834
+
7835
+
7836
+ class ExtractorUniversalDocumentDTO (BaseModel ):
7837
+ """
7838
+ Model representing the result of universal entity extraction for a document.
7839
+
7840
+ Attributes:
7841
+
7842
+ extractor_universal: The extracted entities organized by entity type at various levels
7843
+ (document, page, paragraph, sentence).
7844
+ """
7845
+
7846
+ extractor_universal : Union [
7847
+ Dict [str , List [ExtractorUniversalEntity ]],
7848
+ List [ExtractorUniversalPageResult ],
7849
+ List [ExtractorUniversalPageParagraphs ],
7850
+ ]
0 commit comments