@@ -48,7 +48,7 @@ class Parser(object):
48
48
logger : Logger
49
49
current_element : Dict [str , Any ]
50
50
creation_info : Dict [str , Any ]
51
- elements_build : Dict [str , Any ]
51
+ elements_built : Dict [str , Any ]
52
52
lex : SPDXLexer
53
53
yacc : LRParser
54
54
@@ -57,7 +57,7 @@ def __init__(self, **kwargs):
57
57
self .logger = Logger ()
58
58
self .current_element = {"logger" : Logger ()}
59
59
self .creation_info = {"logger" : Logger ()}
60
- self .elements_build = dict ()
60
+ self .elements_built = dict ()
61
61
self .lex = SPDXLexer ()
62
62
self .lex .build (reflags = re .UNICODE )
63
63
self .yacc = yacc .yacc (module = self , ** kwargs )
@@ -168,8 +168,8 @@ def p_external_document_ref(self, p):
168
168
external_document_ref = ExternalDocumentRef (document_r
8000
ef_id , document_uri , checksum )
169
169
self .creation_info .setdefault ("external_document_refs" , []).append (external_document_ref )
170
170
171
+ @grammar_rule ("creator : CREATOR PERSON_VALUE\n | CREATOR TOOL_VALUE\n | CREATOR ORG_VALUE" )
171
172
def p_creator (self , p ):
172
- """creator : CREATOR PERSON_VALUE\n | CREATOR TOOL_VALUE\n | CREATOR ORG_VALUE"""
173
173
self .creation_info .setdefault ("creators" , []).append (ActorParser .parse_actor (p [2 ]))
174
174
175
175
@grammar_rule ("created : CREATED DATE" )
@@ -513,36 +513,26 @@ def p_snippet_license_info(self, p):
513
513
else :
514
514
self .current_element .setdefault ("license_info_in_snippet" , []).append (p [2 ])
515
515
516
- @grammar_rule ("snip_byte_range : SNIPPET_BYTE_RANGE LINE" )
517
- def p_snippet_byte_range (self , p ):
518
- self .check_that_current_element_matches_class_for_value (Snippet , p .lineno (1 ))
519
- if "byte_range" in self .current_element :
520
- self .current_element ["logger" ].append (
521
- f"Multiple values for { p [1 ]} found. Line: { p .lineno (1 )} " )
522
- range_re = re .compile (r"^(\d+):(\d+)$" , re .UNICODE )
523
- if not range_re .match (p [2 ].strip ()):
524
- self .current_element ["logger" ].append (f"Value
8000
for SnippetByteRange doesn't match valid range pattern. "
525
- f"Line: { p .lineno (1 )} " )
516
+ @grammar_rule ("snip_byte_range : SNIPPET_BYTE_RANGE LINE\n snip_line_range : SNIPPET_LINE_RANGE LINE" )
517
+ def p_snippet_range (self , p ):
518
+ if p [1 ] == "SnippetByteRange" :
519
+ argument_name = "byte_range"
520
+ elif p [1 ] == "SnippetLineRange" :
521
+ argument_name = "line_range"
522
+ else :
526
523
return
527
- startpoint = int (p [2 ].split (":" )[0 ])
528
- endpoint = int (p [2 ].split (":" )[- 1 ])
529
- self .current_element ["byte_range" ] = startpoint , endpoint
530
-
531
- @grammar_rule ("snip_line_range : SNIPPET_LINE_RANGE LINE" )
532
- def p_snippet_line_range (self , p ):
533
524
self .check_that_current_element_matches_class_for_value (Snippet , p .lineno (1 ))
534
- if "line_range" in self .current_element :
525
+ if argument_name in self .current_element :
535
526
self .current_element ["logger" ].append (
536
527
f"Multiple values for { p [1 ]} found. Line: { p .lineno (1 )} " )
537
- return
538
528
range_re = re .compile (r"^(\d+):(\d+)$" , re .UNICODE )
539
529
if not range_re .match (p [2 ].strip ()):
540
- self .current_element ["logger" ].append (f"Value for SnippetLineRange doesn't match valid range pattern. "
530
+ self .current_element ["logger" ].append (f"Value for { p [ 1 ] } doesn't match valid range pattern. "
541
531
f"Line: { p .lineno (1 )} " )
542
532
return
543
533
startpoint = int (p [2 ].split (":" )[0 ])
544
- endpoint = int (p [2 ].split (":" )[1 ])
545
- self .current_element ["line_range" ] = startpoint , endpoint
534
+ endpoint = int (p [2 ].split (":" )[- 1 ])
535
+ self .current_element [argument_name ] = startpoint , endpoint
546
536
547
537
# parsing methods for annotation
548
538
@@ -552,8 +542,8 @@ def p_annotation_value_error(self, p):
552
542
self .current_element ["logger" ].append (
553
543
f"Error while parsing { p [1 ]} : Token did not match specified grammar rule. Line: { p .lineno (1 )} " )
554
544
545
+ @grammar_rule ("annotator : ANNOTATOR PERSON_VALUE\n | TOOL_VALUE\n | ORG_VALUE" )
555
546
def p_annotator (self , p ):
556
- """annotator : ANNOTATOR PERSON_VALUE\n | TOOL_VALUE\n | ORG_VALUE"""
557
547
self .initialize_new_current_element (Annotation )
558
548
set_value (p , self .current_element , method_to_apply = ActorParser .parse_actor )
559
549
@@ -632,63 +622,64 @@ def p_error(self, p):
632
622
pass
633
623
634
624
def parse (self , text ):
625
+ # entry point for the tag-value parser
635
626
self .yacc .parse (text , lexer = self .lex )
627
+ # this constructs the last remaining element; all other elements are constructed at the start of
628
+ # their subsequent element
636
629
self .construct_current_element ()
637
- try :
638
- raise_parsing_error_if_logger_has_messages (self .creation_info .pop ("logger" ), "CreationInfo" )
639
- except SPDXParsingError as err :
640
- self .logger .extend (err .get_messages ())
630
+
631
+ # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use
632
+ # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate
633
+ # logger for the creation info whose messages we need to add to the main logger to than raise all collected
634
+ # messages at once.
635
+ creation_info_logger = self .creation_info .pop ("logger" )
636
+ if creation_info_logger .has_messages ():
637
+ self .logger .extend ([f"Error while parsing CreationInfo: { creation_info_logger .get_messages ()} " ])
638
+
641
639
raise_parsing_error_if_logger_has_messages (self .logger )
642
640
creation_info = construct_or_raise_parsing_error (CreationInfo , self .creation_info )
643
- self .elements_build ["creation_info" ] = creation_info
644
- document = construct_or_raise_parsing_error (Document , self .elements_build )
641
+ self .elements_built ["creation_info" ] = creation_info
642
+ document = construct_or_raise_parsing_error (Document , self .elements_built )
645
643
return document
646
644
647
- def initialize_new_current_element (self , class_name : Any ):
645
+ def initialize_new_current_element (self , clazz : Any ):
648
646
self .construct_current_element ()
649
- self .current_element ["class" ] = class_name
647
+ self .current_element ["class" ] = clazz
650
648
651
649
def check_that_current_element_matches_class_for_value (self , expected_class , line_number ):
652
- if "class" not in self .current_element :
653
- self .logger .append (
654
- f"Element { expected_class .__name__ } is not the current element in scope, probably the expected tag to "
655
- f"start the element ({ ELEMENT_EXPECTED_START_TAG [expected_class .__name__ ]} ) is missing. "
656
- f"Line: { line_number } " )
657
- elif expected_class != self .current_element ["class" ]:
650
+ if "class" not in self .current_element or expected_class != self .current_element ["class" ]:
658
651
self .logger .append (
659
652
f"Element { expected_class .__name__ } is not the current element in scope, probably the expected tag to "
660
653
f"start the element ({ ELEMENT_EXPECTED_START_TAG [expected_class .__name__ ]} ) is missing. "
661
654
f"Line: { line_number } " )
662
655
663
656
def construct_current_element (self ):
664
657
if "class" not in self .current_element :
665
- self .current_element = {"logger" : Logger ()}
666
- return
667
- class_name = self .current_element .pop ("class" )
668
- try :
669
- raise_parsing_error_if_logger_has_messages (self .current_element .pop ("logger" ), class_name .__name__ )
670
- except SPDXParsingError as err :
671
- self .logger .extend (err .get_messages ())
672
- self .current_element = {"logger" : Logger ()}
658
+ # When the first element of the document is instantiated we don't have a current element in scope
659
+ # and the key "class" doesn't exist. Additionally, if the first element doesn't have the expected start
660
+ # value the key "class" wouldn't exist. To prevent a KeyError we use early return.
673
661
return
662
+
663
+ clazz = self .current_element .pop ("class" )
674
664
try :
675
- self .elements_build .setdefault (CLASS_MAPPING [class_name .__name__ ], []).append (
676
- construct_or_raise_parsing_error (class_name , self .current_element ))
677
- if class_name == File :
665
+ raise_parsing_error_if_logger_has_messages (self .current_element .pop ("logger" ), clazz .__name__ )
666
+ self .elements_built .setdefault (CLASS_MAPPING [clazz .__name__ ], []).append (
667
+ construct_or_raise_parsing_error (clazz , self .current_element ))
668
+ if clazz == File :
678
669
self .check_for_preceding_package_and_build_contains_relationship ()
679
670
except SPDXParsingError as err :
680
671
self .logger .extend (err .get_messages ())
681
672
self .current_element = {"logger" : Logger ()}
682
673
683
674
def check_for_preceding_package_and_build_contains_relationship (self ):
684
675
file_spdx_id = self .current_element ["spdx_id" ]
685
- if "packages" not in self .elements_build :
676
+ if "packages" not in self .elements_built :
686
677
return
687
678
# We assume that all files that are not contained in a package precede any package information. Any file
688
679
# information that follows any package information is assigned to the last parsed package by creating a
689
680
# corresponding contains relationship.
690
681
# (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2)
691
- package_spdx_id = self .elements_build ["packages" ][- 1 ].spdx_id
682
+ package_spdx_id = self .elements_built ["packages" ][- 1 ].spdx_id
692
683
relationship = Relationship (package_spdx_id , RelationshipType .CONTAINS , file_spdx_id )
693
- if relationship not in self .elements_build .setdefault ("relationships" , []):
694
- self .elements_build . setdefault ( "relationships" , []) .append (relationship )
684
+ if relationship not in self .elements_built .setdefault ("relationships" , []):
685
+ self .elements_built [ "relationships" ] .append (relationship )
0 commit comments