8000 squashed review commits · HarshvMahawar/tools-python@e5af2eb · GitHub
[go: up one dir, main page]

Skip to content

Commit e5af2eb

Browse files
committed
squashed review commits
[review] fix parsing of external document ref [review] use only one dictionary [review] return if multiple values for snippet range found Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
1 parent af58dfa commit e5af2eb

File tree

6 files changed

+55
-65
lines changed

6 files changed

+55
-65
lines changed

src/spdx/parser/tagvalue/helper_methods.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def get_property_name(tag: str):
105105
"SnippetComment": (Snippet, "comment"), "SnippetCopyrightText": (Snippet, "copyright_text"),
106106
"SnippetLicenseComments": (Snippet, "license_comment"), "SnippetLicenseConcluded": (Snippet, "license_concluded"),
107107
"SnippetByteRange": (Snippet, "byte_range"), "SnippetLineRange": (Snippet, "line_range"),
108+
"Annotator": (Annotation, "annotator"),
108109
"SPDXREF": (Annotation, "spdx_id"), "AnnotationComment": (Annotation, "annotation_comment"),
109110
"LicenseID": (ExtractedLicensingInfo, "license_id"), "ExtractedText": (ExtractedLicensingInfo, "extracted_text"),
110111
"LicenseComment": (ExtractedLicensingInfo, "comment"), "LicenseName": (ExtractedLicensingInfo, "license_name")

src/spdx/parser/tagvalue/lexer.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,7 @@ class SPDXLexer(object):
109109
"PERSON_VALUE",
110110
"DATE",
111111
"LINE",
112-
"CHECKSUM",
113-
"EXT_DOC_REF_ID",
114-
"EXT_DOC_URI",
115-
"EXT_DOC_REF_CHECKSUM",
112+
"CHECKSUM"
116113
] + list(reserved.values())
117114

118115
def __init__(self):
@@ -145,21 +142,6 @@ def t_CHECKSUM(self, t):
145142
t.value = t.value[1:].strip()
146143
return t
147144

148-
@TOKEN(r":\s*DocumentRef-([A-Za-z0-9\+\.\-]+)")
149-
def t_EXT_DOC_REF_ID(self, t):
150-
t.value = t.value[1:].strip()
151-
return t
152-
153-
@TOKEN(r"\s*((ht|f)tps?:\/\/\S*)")
154-
def t_EXT_DOC_URI(self, t):
155-
t.value = t.value.strip()
156-
return t
157-
158-
@TOKEN(r"\s*SHA1:\s*[a-f0-9]{40}")
159-
def t_EXT_DOC_REF_CHECKSUM(self, t):
160-
t.value = t.value[1:].strip()
161-
return t
162-
163145
@TOKEN(r":\s*Tool:.+")
164146
def t_TOOL_VALUE(self, t):
165147
t.value = t.value[1:].strip()

src/spdx/parser/tagvalue/parser.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,6 @@
4242
Package="packages", ExtractedLicensingInfo="extracted_licensing_info")
4343
ELEMENT_EXPECTED_START_TAG = dict(File="FileName", Annotation="Annotator", Relationship="Relationship",
4444
Snippet="SnippetSPDXID", Package="PackageName", ExtractedLicensingInfo="LicenseID")
45-
EXPECTED_START_TAG_ELEMENT = {"FileName": File, "PackageName": Package, "Annotator": Annotation,
46-
"Relationship": Relationship, "SnippetSPDXID": Snippet,
47-
"LicenseID": ExtractedLicensingInfo}
4845

4946

5047
class Parser(object):
@@ -135,8 +132,8 @@ def p_attrib(self, p):
135132
"annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n "
136133
"annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error")
137134
def p_current_element_error(self, p):
138-
if p[1] in EXPECTED_START_TAG_ELEMENT.keys():
139-
self.initialize_new_current_element(EXPECTED_START_TAG_ELEMENT[p[1]])
135+
if p[1] in ELEMENT_EXPECTED_START_TAG.values():
136+
self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
140137
self.current_element["logger"].append(
141138
f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}")
142139

@@ -167,8 +164,8 @@ def p_current_element_error(self, p):
167164
"annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n "
168165
"annotation_comment : ANNOTATION_COMMENT text_or_line")
169166
def p_generic_value(self, p):
170-
if p[1] in EXPECTED_START_TAG_ELEMENT.keys():
171-
self.initialize_new_current_element(EXPECTED_START_TAG_ELEMENT[p[1]])
167+
if p[1] in ELEMENT_EXPECTED_START_TAG.values():
168+
self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
172169
if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)):
173170
set_value(p, self.current_element)
174171

@@ -232,11 +229,22 @@ def p_generic_value_creation_info(self, p):
232229
def p_license_list_version(self, p):
233230
set_value(p, self.creation_info, method_to_apply=Version.from_string)
234231

235-
@grammar_rule("ext_doc_ref : EXT_DOC_REF EXT_DOC_REF_ID EXT_DOC_URI EXT_DOC_REF_CHECKSUM")
232+
@grammar_rule("ext_doc_ref : EXT_DOC_REF LINE")
236233
def p_external_document_ref(self, p):
237-
document_ref_id = p[2]
238-
document_uri = p[3]
239-
checksum = parse_checksum(p[4])
234+
external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})")
235+
external_doc_ref_match = external_doc_ref_regex.match(p[2])
236+
if not external_doc_ref_match:
237+
self.creation_info["logger"].append(
238+
f"Error while parsing ExternalDocumentRef: Couldn\'t match Checksum. Line: {p.lineno(1)}")
239+
return
240+
try:
241+
document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ")
242+
except ValueError:
243+
self.creation_info["logger"].append(
244+
f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into "
245+
f"document_ref_id and document_uri. Line: {p.lineno(1)}")
246+
return
247+
checksum = parse_checksum(external_doc_ref_match.group(2).strip())
240248
external_document_ref F438 = ExternalDocumentRef(document_ref_id, document_uri, checksum)
241249
self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)
242250

@@ -415,6 +423,7 @@ def p_snippet_range(self, p):
415423
if argument_name in self.current_element:
416424
self.current_element["logger"].append(
417425
f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
426+
return
418427
range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
419428
if not range_re.match(p[2].strip()):
420429
self.current_element["logger"].append(f"Value for {p[1]} doesn't match valid range pattern. "
@@ -443,8 +452,8 @@ def p_annotation_type(self, p):
443452

444453
# parsing methods for relationship
445454

446-
@grammar_rule("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n "
447-
"| RELATIONSHIP relationship_value")
455+
@grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n "
456+
"| RELATIONSHIP LINE")
448457
def p_relationship(self, p):
449458
self.initialize_new_current_element(Relationship)
450459
try:
@@ -467,16 +476,6 @@ def p_relationship(self, p):
467476
if len(p) == 5:
468477
self.current_element["comment"] = p[4]
469478

470-
@grammar_rule("relationship_value : EXT_DOC_REF_ID LINE")
471-
def p_relationship_value_with_doc_ref(self, p):
472-
473-
p[0] = p[1] + ":" + p[2]
474-
475-
@grammar_rule("relationship_value : LINE")
476-
def p_relationship_value_without_doc_ref(self, p):
477-
478-
p[0] = p[1]
479-
480479
def p_error(self, p):
481480
pass
482481

tests/spdx/parser/tagvalue/test_creation_info_parser.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -69,20 +69,22 @@ def test_parse_creation_info():
6969
"Creator: Person Bob (bob@example.com)", "Creator: Organization: Acme [email]",
7070
"Created: 2010-02-03T00:00:0Z", "CreatorComment: <text>Sample Comment</text>",
7171
"LicenseListVersion: 7"]),
72-
"Error while parsing CreationInfo: ['Error while parsing DocumentNamespace: "
73-
"Token did not match specified grammar rule. Line: 6', 'Error while parsing "
74-
"ExternalDocumentRef: Token did not match specified grammar rule. Line: 7', "
75-
"'Error while parsing Creator: Token did not match specified grammar rule. Line: 8', "
76-
"'Error while parsing Created: Token did not match specified grammar rule. Line: 10', "
77-
"'7 is not a valid version string']"),
78-
("\n".join(
79-
["SPDXVersion: SPDX-2.3", "DataLicense: CC0-1.0", "DocumentName: Sample_Document-V2.3",
80-
"SPDXID: SPDXRef-DOCUMENT"]),
81-
r"__init__() missing 3 required positional arguments: 'document_namespace', "
82-
r"'creators', and 'created'"),
83-
("LicenseListVersion: 3.5\nLicenseListVersion: 3.7",
84-
"Error while parsing CreationInfo: ['Multiple values for LicenseListVersion found. "
85-
"Line: 2']")]))
72+
("Error while parsing CreationInfo: ['Error while parsing DocumentNamespace: "
73+
'Token did not match specified grammar rule. Line: 6\', "Error while parsing '
74+
"ExternalDocumentRef: Couldn't split the first part of the value into "
75+
'document_ref_id and document_uri. Line: 7", \'Error while parsing Creator: '
76+
"Token did not match specified grammar rule. Line: 8', 'Error while parsing "
77+
"Created: Token did not match specified grammar rule. Line: 10', '7 is not a "
78+
"valid version string']")),
79+
("\n".join(
80+
["SPDXVersion: SPDX-2.3", "DataLicense: CC0-1.0", "DocumentName: Sample_Document-V2.3",
81+
"SPDXID: SPDXRef-DOCUMENT"]),
82+
r"__init__() missing 3 required positional arguments: 'document_namespace', 'creators', and 'created'"),
83+
("LicenseListVersion: 3.5\nLicenseListVersion: 3.7",
84+
"Error while parsing CreationInfo: ['Multiple values for LicenseListVersion found. Line: 2']"),
85+
("ExternalDocumentRef: Document_ref document_uri SHA1: afded",
86+
'Error while parsing CreationInfo: ["Error while parsing ExternalDocumentRef: Couldn\'t match Checksum. Line: 1"]'
87+
)]))
8688
def test_parse_invalid_creation_info(document_str, expected_message):
8789
parser = Parser()
8890
with pytest.raises(SPDXParsingError) as err:

tests/spdx/parser/tagvalue/test_relationship_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
SpdxNoAssertion())),
2929
("Relationship: SPDXRef-CarolCompression DEPENDS_ON NONE",
3030
Relationship("SPDXRef-CarolCompression", RelationshipType.DEPENDS_ON, SpdxNone())),
31-
("Relationship: DocumentRef-ExternalDocument: SPDXRef-Test DEPENDS_ON DocumentRef:AnotherRef",
31+
("Relationship: DocumentRef-ExternalDocument:SPDXRef-Test DEPENDS_ON DocumentRef:AnotherRef",
3232
Relationship("DocumentRef-ExternalDocument:SPDXRef-Test", RelationshipType.DEPENDS_ON,
3333
"DocumentRef:AnotherRef"))
3434
])

tests/spdx/parser/tagvalue/test_tag_value_lexer.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,18 @@ def test_tokenization_of_document(lexer):
5353

5454

5555
def test_tokenization_of_external_document_references(lexer):
56-
data = """
57-
ExternalDocumentRef:DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759
58-
"""
56+
data = "\n".join([
57+
"ExternalDocumentRef:DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759",
58+
"ExternalDocumentRef:DocumentRef-spdx-tool-2.1 ldap://[2001:db8::7]/c=GB?objectClass?one SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759"])
5959
lexer.input(data)
60+
token_assert_helper(lexer.token(), "EXT_DOC_REF", "ExternalDocumentRef", 1)
61+
token_assert_helper(lexer.token(), "LINE",
62+
"DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759",
63+
1)
6064
token_assert_helper(lexer.token(), "EXT_DOC_REF", "ExternalDocumentRef", 2)
61-
token_assert_helper(lexer.token(), "EXT_DOC_REF_ID", "DocumentRef-spdx-tool-2.1", 2)
62-
token_assert_helper(lexer.token(), "EXT_DOC_URI", "http://spdx.org/spdxdocs/spdx-tools-v2.1-3F25"
63-
"04E0-4F89-41D3-9A0C-0305E82C3301", 2)
64-
token_assert_helper(lexer.token(), "EXT_DOC_REF_CHECKSUM", "SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", 2)
65+
token_assert_helper(lexer.token(), "LINE",
66+
"DocumentRef-spdx-tool-2.1 ldap://[2001:db8::7]/c=GB?objectClass?one SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759",
67+
2)
6568

6669

6770
def test_tokenization_of_file(lexer):
@@ -277,10 +280,13 @@ def test_tokenization_of_annotation(lexer):
277280

278281
def test_tokenization_of_relationship(lexer):
279282
relationship_str = "\n".join(["Relationship: SPDXRef-DOCUMENT DESCRIBES NONE",
280-
"RelationshipComment: This i 6B9F s a comment."])
283+
"RelationshipComment: This is a comment.",
284+
"Relationship: DocumentRef-extern:SPDXRef-Package DESCRIBES NONE"])
281285

282286
lexer.input(relationship_str)
283287
token_assert_helper(lexer.token(), "RELATIONSHIP", "Relationship", 1)
284288
token_assert_helper(lexer.token(), "LINE", "SPDXRef-DOCUMENT DESCRIBES NONE", 1)
285289
token_assert_helper(lexer.token(), "RELATIONSHIP_COMMENT", "RelationshipComment", 2)
286290
token_assert_helper(lexer.token(), "LINE", "This is a comment.", 2)
291+
token_assert_helper(lexer.token(), "RELATIONSHIP", "Relationship", 3)
292+
token_assert_helper(lexer.token(), "LINE", "DocumentRef-extern:SPDXRef-Package DESCRIBES NONE", 3)

0 commit comments

Comments
 (0)
0