HarshvMahawar
diff --git a/‎src/spdx/parser/tagvalue/helper_methods.py
Lines changed: 1 addition & 0 deletions b/‎src/spdx/parser/tagvalue/helper_methods.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/spdx/parser/tagvalue/lexer.py
Lines changed: 1 addition & 19 deletions b/‎src/spdx/parser/tagvalue/lexer.py
Lines changed: 1 addition & 19 deletions
diff --git a/‎src/spdx/parser/tagvalue/parser.py
Lines changed: 22 additions & 23 deletions b/‎src/spdx/parser/tagvalue/parser.py
Lines changed: 22 additions & 23 deletions
diff --git a/‎tests/spdx/parser/tagvalue/test_creation_info_parser.py
Lines changed: 16 additions & 14 deletions b/‎tests/spdx/parser/tagvalue/test_creation_info_parser.py
Lines changed: 16 additions & 14 deletions
diff --git a/‎tests/spdx/parser/tagvalue/test_relationship_parser.py
Lines changed: 1 addition & 1 deletion b/‎tests/spdx/parser/tagvalue/test_relationship_parser.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/spdx/parser/tagvalue/test_tag_value_lexer.py
Lines changed: 14 additions & 8 deletions b/‎tests/spdx/parser/tagvalue/test_tag_value_lexer.py
Lines changed: 14 additions & 8 deletions
@@ -105,6 +105,7 @@ def get_property_name(tag: str):
     "SnippetComment": (Snippet, "comment"), "SnippetCopyrightText": (Snippet, "copyright_text"),
     "SnippetLicenseComments": (Snippet, "license_comment"), "SnippetLicenseConcluded": (Snippet, "license_concluded"),
     "SnippetByteRange": (Snippet, "byte_range"), "SnippetLineRange": (Snippet, "line_range"),
+    "Annotator": (Annotation, "annotator"),
     "SPDXREF": (Annotation, "spdx_id"), "AnnotationComment": (Annotation, "annotation_comment"),
     "LicenseID": (ExtractedLicensingInfo, "license_id"), "ExtractedText": (ExtractedLicensingInfo, "extracted_text"),
     "LicenseComment": (ExtractedLicensingInfo, "comment"), "LicenseName": (ExtractedLicensingInfo, "license_name")
 
@@ -109,10 +109,7 @@ class SPDXLexer(object):
                  "PERSON_VALUE",
                  "DATE",
                  "LINE",
-                 "CHECKSUM",
-                 "EXT_DOC_REF_ID",
-                 "EXT_DOC_URI",
-                 "EXT_DOC_REF_CHECKSUM",
+                 "CHECKSUM"
              ] + list(reserved.values())
 
     def __init__(self):
@@ -145,21 +142,6 @@ def t_CHECKSUM(self, t):
         t.value = t.value[1:].strip()
         return t
 
-    @TOKEN(r":\s*DocumentRef-([A-Za-z0-9\+\.\-]+)")
-    def t_EXT_DOC_REF_ID(self, t):
-        t.value = t.value[1:].strip()
-        return t
-
-    @TOKEN(r"\s*((ht|f)tps?:\/\/\S*)")
-    def t_EXT_DOC_URI(self, t):
-        t.value = t.value.strip()
-        return t
-
-    @TOKEN(r"\s*SHA1:\s*[a-f0-9]{40}")
-    def t_EXT_DOC_REF_CHECKSUM(self, t):
-        t.value = t.value[1:].strip()
-        return t
-
     @TOKEN(r":\s*Tool:.+")
     def t_TOOL_VALUE(self, t):
         t.value = t.value[1:].strip()
 
@@ -42,9 +42,6 @@
                      Package="packages", ExtractedLicensingInfo="extracted_licensing_info")
 ELEMENT_EXPECTED_START_TAG = dict(File="FileName", Annotation="Annotator", Relationship="Relationship",
                                   Snippet="SnippetSPDXID", Package="PackageName", ExtractedLicensingInfo="LicenseID")
-EXPECTED_START_TAG_ELEMENT = {"FileName": File, "PackageName": Package, "Annotator": Annotation,
-                              "Relationship": Relationship, "SnippetSPDXID": Snippet,
-                              "LicenseID": ExtractedLicensingInfo}
 
 
 class Parser(object):
@@ -135,8 +132,8 @@ def p_attrib(self, p):
                   "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n "
                   "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error")
     def p_current_element_error(self, p):
-        if p[1] in EXPECTED_START_TAG_ELEMENT.keys():
-            self.initialize_new_current_element(EXPECTED_START_TAG_ELEMENT[p[1]])
+        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
+            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
         self.current_element["logger"].append(
             f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}")
 
@@ -167,8 +164,8 @@ def p_current_element_error(self, p):
                   "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n "
                   "annotation_comment : ANNOTATION_COMMENT text_or_line")
     def p_generic_value(self, p):
-        if p[1] in EXPECTED_START_TAG_ELEMENT.keys():
-            self.initialize_new_current_element(EXPECTED_START_TAG_ELEMENT[p[1]])
+        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
+            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
         if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)):
             set_value(p, self.current_element)
 
@@ -232,11 +229,22 @@ def p_generic_value_creation_info(self, p):
     def p_license_list_version(self, p):
         set_value(p, self.creation_info, method_to_apply=Version.from_string)
 
-    @grammar_rule("ext_doc_ref : EXT_DOC_REF EXT_DOC_REF_ID EXT_DOC_URI EXT_DOC_REF_CHECKSUM")
+    @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE")
     def p_external_document_ref(self, p):
-        document_ref_id = p[2]
-        document_uri = p[3]
-        checksum = parse_checksum(p[4])
+        external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})")
+        external_doc_ref_match = external_doc_ref_regex.match(p[2])
+        if not external_doc_ref_match:
+            self.creation_info["logger"].append(
+                f"Error while parsing ExternalDocumentRef: Couldn\'t match Checksum. Line: {p.lineno(1)}")
+            return
+        try:
+            document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ")
+        except ValueError:
+            self.creation_info["logger"].append(
+                f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into "
+                f"document_ref_id and document_uri. Line: {p.lineno(1)}")
+            return
+        checksum = parse_checksum(external_doc_ref_match.group(2).strip())
         external_document_ref 
F438
= ExternalDocumentRef(document_ref_id, document_uri, checksum)
         self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)
 
@@ -415,6 +423,7 @@ def p_snippet_range(self, p):
         if argument_name in self.current_element:
             self.current_element["logger"].append(
                 f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
+            return
         range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
         if not range_re.match(p[2].strip()):
             self.current_element["logger"].append(f"Value for {p[1]} doesn't match valid range pattern. "
@@ -443,8 +452,8 @@ def p_annotation_type(self, p):
 
     # parsing methods for relationship
 
-    @grammar_rule("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n "
-                  "| RELATIONSHIP relationship_value")
+    @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n "
+                  "| RELATIONSHIP LINE")
     def p_relationship(self, p):
         self.initialize_new_current_element(Relationship)
         try:
@@ -467,16 +476,6 @@ def p_relationship(self, p):
         if len(p) == 5:
             self.current_element["comment"] = p[4]
 
-    @grammar_rule("relationship_value : EXT_DOC_REF_ID LINE")
-    def p_relationship_value_with_doc_ref(self, p):
-
-        p[0] = p[1] + ":" + p[2]
-
-    @grammar_rule("relationship_value : LINE")
-    def p_relationship_value_without_doc_ref(self, p):
-
-        p[0] = p[1]
-
     def p_error(self, p):
         pass
 
 
@@ -69,20 +69,22 @@ def test_parse_creation_info():
                               "Creator: Person Bob (bob@example.com)", "Creator: Organization: Acme [email]",
                               "Created: 2010-02-03T00:00:0Z", "CreatorComment: <text>Sample Comment</text>",
                               "LicenseListVersion: 7"]),
-                            "Error while parsing CreationInfo: ['Error while parsing DocumentNamespace: "
-                            "Token did not match specified grammar rule. Line: 6', 'Error while parsing "
-                            "ExternalDocumentRef: Token did not match specified grammar rule. Line: 7', "
-                            "'Error while parsing Creator: Token did not match specified grammar rule. Line: 8', "
-                            "'Error while parsing Created: Token did not match specified grammar rule. Line: 10', "
-                            "'7 is not a valid version string']"),
-                           ("\n".join(
-                               ["SPDXVersion: SPDX-2.3", "DataLicense: CC0-1.0", "DocumentName: Sample_Document-V2.3",
-                                "SPDXID: SPDXRef-DOCUMENT"]),
-                            r"__init__() missing 3 required positional arguments: 'document_namespace', "
-                            r"'creators', and 'created'"),
-                           ("LicenseListVersion: 3.5\nLicenseListVersion: 3.7",
-                            "Error while parsing CreationInfo: ['Multiple values for LicenseListVersion found. "
-                            "Line: 2']")]))
+                            ("Error while parsing CreationInfo: ['Error while parsing DocumentNamespace: "
+                             'Token did not match specified grammar rule. Line: 6\', "Error while parsing '
+                             "ExternalDocumentRef: Couldn't split the first part of the value into "
+                             'document_ref_id and document_uri. Line: 7", \'Error while parsing Creator: '
+                             "Token did not match specified grammar rule. Line: 8', 'Error while parsing "
+                             "Created: Token did not match specified grammar rule. Line: 10', '7 is not a "
+                             "valid version string']")),
+                             ("\n".join(
+                                 ["SPDXVersion: SPDX-2.3", "DataLicense: CC0-1.0", "DocumentName: Sample_Document-V2.3",
+                                  "SPDXID: SPDXRef-DOCUMENT"]),
+                              r"__init__() missing 3 required positional arguments: 'document_namespace', 'creators', and 'created'"),
+                             ("LicenseListVersion: 3.5\nLicenseListVersion: 3.7",
+                              "Error while parsing CreationInfo: ['Multiple values for LicenseListVersion found. Line: 2']"),
+                             ("ExternalDocumentRef: Document_ref document_uri SHA1: afded",
+                              'Error while parsing CreationInfo: ["Error while parsing ExternalDocumentRef: Couldn\'t match Checksum. Line: 1"]'
+                              )]))
 def test_parse_invalid_creation_info(document_str, expected_message):
     parser = Parser()
     with pytest.raises(SPDXParsingError) as err:
 
@@ -28,7 +28,7 @@
                                         SpdxNoAssertion())),
                           ("Relationship: SPDXRef-CarolCompression DEPENDS_ON NONE",
                            Relationship("SPDXRef-CarolCompression", RelationshipType.DEPENDS_ON, SpdxNone())),
-                          ("Relationship: DocumentRef-ExternalDocument: SPDXRef-Test DEPENDS_ON DocumentRef:AnotherRef",
+                          ("Relationship: DocumentRef-ExternalDocument:SPDXRef-Test DEPENDS_ON DocumentRef:AnotherRef",
                            Relationship("DocumentRef-ExternalDocument:SPDXRef-Test", RelationshipType.DEPENDS_ON,
                                         "DocumentRef:AnotherRef"))
                           ])
 
@@ -53,15 +53,18 @@ def test_tokenization_of_document(lexer):
 
 
 def test_tokenization_of_external_document_references(lexer):
-    data = """
-    ExternalDocumentRef:DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759
-    """
+    data = "\n".join([
+                         "ExternalDocumentRef:DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759",
+                         "ExternalDocumentRef:DocumentRef-spdx-tool-2.1 ldap://[2001:db8::7]/c=GB?objectClass?one SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759"])
     lexer.input(data)
+    token_assert_helper(lexer.token(), "EXT_DOC_REF", "ExternalDocumentRef", 1)
+    token_assert_helper(lexer.token(), "LINE",
+                        "DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759",
+                        1)
     token_assert_helper(lexer.token(), "EXT_DOC_REF", "ExternalDocumentRef", 2)
-    token_assert_helper(lexer.token(), "EXT_DOC_REF_ID", "DocumentRef-spdx-tool-2.1", 2)
-    token_assert_helper(lexer.token(), "EXT_DOC_URI", "http://spdx.org/spdxdocs/spdx-tools-v2.1-3F25"
-                                                  "04E0-4F89-41D3-9A0C-0305E82C3301", 2)
-    token_assert_helper(lexer.token(), "EXT_DOC_REF_CHECKSUM", "SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", 2)
+    token_assert_helper(lexer.token(), "LINE",
+                        "DocumentRef-spdx-tool-2.1 ldap://[2001:db8::7]/c=GB?objectClass?one SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759",
+                        2)
 
 
 def test_tokenization_of_file(lexer):
@@ -277,10 +280,13 @@ def test_tokenization_of_annotation(lexer):
 
 def test_tokenization_of_relationship(lexer):
     relationship_str = "\n".join(["Relationship: SPDXRef-DOCUMENT DESCRIBES NONE",
-                                  "RelationshipComment: This i
6B9F
s a comment."])
+                                  "RelationshipComment: This is a comment.",
+                                  "Relationship: DocumentRef-extern:SPDXRef-Package DESCRIBES NONE"])
 
     lexer.input(relationship_str)
     token_assert_helper(lexer.token(), "RELATIONSHIP", "Relationship", 1)
     token_assert_helper(lexer.token(), "LINE", "SPDXRef-DOCUMENT DESCRIBES NONE", 1)
     token_assert_helper(lexer.token(), "RELATIONSHIP_COMMENT", "RelationshipComment", 2)
     token_assert_helper(lexer.token(), "LINE", "This is a comment.", 2)
+    token_assert_helper(lexer.token(), "RELATIONSHIP", "Relationship", 3)
+    token_assert_helper(lexer.token(), "LINE", "DocumentRef-extern:SPDXRef-Package DESCRIBES NONE", 3)