8000 squashed review commits concerning the tag value parser · HarshvMahawar/tools-python@bafc163 · GitHub
[go: up one dir, main page]

Skip to content

Commit bafc163

Browse files
committed
squashed review commits concerning the tag value parser
[review] add comments to parser to improve code readability [review] merge parsing methods for byte_range and line_range [review] delete superfluous except block [review] delete superfluous call to setdefault [review] delete superfluous case distinction [review] rename parameter [review] get rid of docstrings [review] rename Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
1 parent 4e279f6 commit bafc163

File tree

1 file changed

+45
-54
lines changed

1 file changed

+45
-54
lines changed

src/spdx/parser/tagvalue/parser/tagvalue.py

Lines changed: 45 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class Parser(object):
4848
logger: Logger
4949
current_element: Dict[str, Any]
5050
creation_info: Dict[str, Any]
51-
elements_build: Dict[str, Any]
51+
elements_built: Dict[str, Any]
5252
lex: SPDXLexer
5353
yacc: LRParser
5454

@@ -57,7 +57,7 @@ def __init__(self, **kwargs):
5757
self.logger = Logger()
5858
self.current_element = {"logger": Logger()}
5959
self.creation_info = {"logger": Logger()}
60-
self.elements_build = dict()
60+
self.elements_built = dict()
6161
self.lex = SPDXLexer()
6262
self.lex.build(reflags=re.UNICODE)
6363
self.yacc = yacc.yacc(module=self, **kwargs)
@@ -168,8 +168,8 @@ def p_external_document_ref(self, p):
168168
external_document_ref = ExternalDocumentRef(document_r 8000 ef_id, document_uri, checksum)
169169
self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)
170170

171+
@grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORG_VALUE")
171172
def p_creator(self, p):
172-
"""creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORG_VALUE"""
173173
self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))
174174

175175
@grammar_rule("created : CREATED DATE")
@@ -513,36 +513,26 @@ def p_snippet_license_info(self, p):
513513
else:
514514
self.current_element.setdefault("license_info_in_snippet", []).append(p[2])
515515

516-
@grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE")
517-
def p_snippet_byte_range(self, p):
518-
self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1))
519-
if "byte_range" in self.current_element:
520-
self.current_element["logger"].append(
521-
f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
522-
range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
523-
if not range_re.match(p[2].strip()):
524-
self.current_element["logger"].append(f"Value 8000 for SnippetByteRange doesn't match valid range pattern. "
525-
f"Line: {p.lineno(1)}")
516+
@grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE\n snip_line_range : SNIPPET_LINE_RANGE LINE")
517+
def p_snippet_range(self, p):
518+
if p[1] == "SnippetByteRange":
519+
argument_name = "byte_range"
520+
elif p[1] == "SnippetLineRange":
521+
argument_name = "line_range"
522+
else:
526523
return
527-
startpoint = int(p[2].split(":")[0])
528-
endpoint = int(p[2].split(":")[-1])
529-
self.current_element["byte_range"] = startpoint, endpoint
530-
531-
@grammar_rule("snip_line_range : SNIPPET_LINE_RANGE LINE")
532-
def p_snippet_line_range(self, p):
533524
self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1))
534-
if "line_range" in self.current_element:
525+
if argument_name in self.current_element:
535526
self.current_element["logger"].append(
536527
f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
537-
return
538528
range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
539529
if not range_re.match(p[2].strip()):
540-
self.current_element["logger"].append(f"Value for SnippetLineRange doesn't match valid range pattern. "
530+
self.current_element["logger"].append(f"Value for {p[1]} doesn't match valid range pattern. "
541531
f"Line: {p.lineno(1)}")
542532
return
543533
startpoint = int(p[2].split(":")[0])
544-
endpoint = int(p[2].split(":")[1])
545-
self.current_element["line_range"] = startpoint, endpoint
534+
endpoint = int(p[2].split(":")[-1])
535+
self.current_element[argument_name] = startpoint, endpoint
546536

547537
# parsing methods for annotation
548538

@@ -552,8 +542,8 @@ def p_annotation_value_error(self, p):
552542
self.current_element["logger"].append(
553543
f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}")
554544

545+
@grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE")
555546
def p_annotator(self, p):
556-
"""annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE"""
557547
self.initialize_new_current_element(Annotation)
558548
set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)
559549

@@ -632,63 +622,64 @@ def p_error(self, p):
632622
pass
633623

634624
def parse(self, text):
625+
# entry point for the tag-value parser
635626
self.yacc.parse(text, lexer=self.lex)
627+
# this constructs the last remaining element; all other elements are constructed at the start of
628+
# their subsequent element
636629
self.construct_current_element()
637-
try:
638-
raise_parsing_error_if_logger_has_messages(self.creation_info.pop("logger"), "CreationInfo")
639-
except SPDXParsingError as err:
640-
self.logger.extend(err.get_messages())
630+
631+
# To be able to parse creation info values if they appear in between other elements, e.g. packages, we use
632+
# two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate
633+
# logger for the creation info whose messages we need to add to the main logger to than raise all collected
634+
# messages at once.
635+
creation_info_logger = self.creation_info.pop("logger")
636+
if creation_info_logger.has_messages():
637+
self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"])
638+
641639
raise_parsing_error_if_logger_has_messages(self.logger)
642640
creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info)
643-
self.elements_build["creation_info"] = creation_info
644-
document = construct_or_raise_parsing_error(Document, self.elements_build)
641+
self.elements_built["creation_info"] = creation_info
642+
document = construct_or_raise_parsing_error(Document, self.elements_built)
645643
return document
646644

647-
def initialize_new_current_element(self, class_name: Any):
645+
def initialize_new_current_element(self, clazz: Any):
648646
self.construct_current_element()
649-
self.current_element["class"] = class_name
647+
self.current_element["class"] = clazz
650648

651649
def check_that_current_element_matches_class_for_value(self, expected_class, line_number):
652-
if "class" not in self.current_element:
653-
self.logger.append(
654-
f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to "
655-
f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. "
656-
f"Line: {line_number}")
657-
elif expected_class != self.current_element["class"]:
650+
if "class" not in self.current_element or expected_class != self.current_element["class"]:
658651
self.logger.append(
659652
f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to "
660653
f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. "
661654
f"Line: {line_number}")
662655

663656
def construct_current_element(self):
664657
if "class" not in self.current_element:
665-
self.current_element = {"logger": Logger()}
666-
return
667-
class_name = self.current_element.pop("class")
668-
try:
669-
raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), class_name.__name__)
670-
except SPDXParsingError as err:
671-
self.logger.extend(err.get_messages())
672-
self.current_element = {"logger": Logger()}
658+
# When the first element of the document is instantiated we don't have a current element in scope
659+
# and the key "class" doesn't exist. Additionally, if the first element doesn't have the expected start
660+
# value the key "class" wouldn't exist. To prevent a KeyError we use early return.
673661
return
662+
663+
clazz = self.current_element.pop("class")
674664
try:
675-
self.elements_build.setdefault(CLASS_MAPPING[class_name.__name__], []).append(
676-
construct_or_raise_parsing_error(class_name, self.current_element))
677-
if class_name == File:
665+
raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__)
666+
self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append(
667+
construct_or_raise_parsing_error(clazz, self.current_element))
668+
if clazz == File:
678669
self.check_for_preceding_package_and_build_contains_relationship()
679670
except SPDXParsingError as err:
680671
self.logger.extend(err.get_messages())
681672
self.current_element = {"logger": Logger()}
682673

683674
def check_for_preceding_package_and_build_contains_relationship(self):
684675
file_spdx_id = self.current_element["spdx_id"]
685-
if "packages" not in self.elements_build:
676+
if "packages" not in self.elements_built:
686677
return
687678
# We assume that all files that are not contained in a package precede any package information. Any file
688679
# information that follows any package information is assigned to the last parsed package by creating a
689680
# corresponding contains relationship.
690681
# (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2)
691-
package_spdx_id = self.elements_build["packages"][-1].spdx_id
682+
package_spdx_id = self.elements_built["packages"][-1].spdx_id
692683
relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id)
693-
if relationship not in self.elements_build.setdefault("relationships", []):
694-
self.elements_build.setdefault("relationships", []).append(relationship)
684+
if relationship not in self.elements_built.setdefault("relationships", []):
685+
self.elements_built["relationships"].append(relationship)

0 commit comments

Comments
 (0)
0