8000 [Issue 305] add new json parser by meretp · Pull Request #366 · spdx/tools-python · GitHub
[go: up one dir, main page]

Skip to content

[Issue 305] add new json parser #366

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Dec 28, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
296816b
[issue-305] add new parser
meretp Nov 25, 2022
cd095e2
[issue-305, refactor] add method to construct an object and raise SPD…
meretp Dec 14, 2022
f2f91fd
[issue-305, refactor] annotation_parser: extract methods to improve r…
meretp Dec 14, 2022
190cd5a
[issue-305, refactor] add methods to parse required/ optional fields …
meretp Dec 14, 2022
2834000
[issue-305, refactor] relationship_parser: extract dict to invert rel…
meretp Dec 14, 2022
6297673
[issue-305, refactor] add method to raise error if logger has message…
meretp Dec 14, 2022
4741a43
[issue-305, review] refactor methods in dict_parsing_functions.py, sm…
meretp Dec 15, 2022
080d848
[issue-305, refactor] json_parser
meretp Dec 15, 2022
5826922
[issue-305, reformat]
meretp Dec 19, 2022
e6332cb
[issue-305] add testcases and update license_expression parser
meretp Dec 19, 2022
1f6d5b6
[issue-305, refactor] delete duplicated check for error type
meretp Dec 20, 2022
fc980b1
[issue-305, review] fix messages, naming, type hints
meretp Dec 21, 2022
3fe3e11
[issue-305, review] refactor relationship_parser
meretp Dec 21, 2022
0be1780
[issue-305, review] refactor snippet_parser
meretp Dec 21, 2022
c5b8d3c
[issue-305, review] make naming consistent
meretp Dec 21, 2022
03cce38
[issue-305, review] add test for dict parsing functions and catch Val…
meretp Dec 21, 2022
2dcd125
[issue-305, review] add None handling for required fields
meretp Dec 21, 2022
50c3038
[issue-305, review] make error messages consistent, add test for json…
meretp Dec 28, 2022
562f288
[issue-305, review] add tests, change test data, naming of tests and …
meretp Dec 22, 2022
a722036
[issue-305, review] add method to parse fields that can be SpdxNone o…
meretp Dec 22, 2022
c8851d8
[issue-305, review] refactor parse_field_or_log_error
meretp Dec 22, 2022
347051a
[issue-305, review] reformat, type hints, fix typos, error messages
meretp Dec 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[issue-305, refactor] add methods to parse required/ optional fields …
…with exception handling

Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
  • Loading branch information
meretp committed Dec 28, 2022
commit 190cd5a4d81f48cb02ef9a97700bbcca0ff0629b
54 changes: 28 additions & 26 deletions src/parser/json/annotation_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from src.model.annotation import Annotation, AnnotationType
from src.parser.error import SPDXParsingError
from src.parser.json.actor_parser import ActorParser
from src.parser.json.dict_parsing_functions import datetime_from_str, try_construction_raise_parsing_error
from src.parser.json.dict_parsing_functions import datetime_from_str, try_construction_raise_parsing_error, \
try_parse_optional_field_append_logger_when_failing, try_parse_required_field_append_logger_when_failing
from src.parser.logger import Logger


Expand Down Expand Up @@ -78,21 +79,21 @@ def parse_annotations(self, annotations_dict_list: List[Dict], spdx_id: Optional
def parse_annotation(self, annotation: Dict, spdx_id: Optional[str] = None) -> Annotation:
logger = Logger()
spdx_id: str = annotation.get("SPDXID") or spdx_id
try:
annotation_type: Optional[AnnotationType] = self.parse_annotation_type(annotation.get("annotationType"))
except SPDXParsingError as err:
logger.append_all(err.get_messages())
annotation_type = None
try:
annotator: Optional[Actor] = self.actor_parser.parse_actor(annotation.get("annotator"))
except SPDXParsingError as err:
logger.append_all(err.get_messages())
annotator = None
try:
annotation_date: Optional[datetime] = datetime_from_str(annotation.get("annotationDate"))
except TypeError:
logger.append("ValueError while parsing annotationDate.")
annotation_date = None

annotation_type: Optional[AnnotationType] = try_parse_required_field_append_logger_when_failing(
logger=logger, field=annotation.get("annotationType"),
method_to_parse=self.parse_annotation_type)

annotator: Optional[Actor] = try_parse_required_field_append_logger_when_failing(logger=logger,
field=annotation.get(
"annotator"),
method_to_parse=self.actor_parser.parse_actor)

annotation_date: Optional[datetime] = try_parse_required_field_append_logger_when_failing(logger=logger,
field=annotation.get(
"annotationDate"),
method_to_parse=datetime_from_str)

annotation_comment: str = annotation.get("comment")
if logger.has_messages():
raise SPDXParsingError([f"Error while parsing annotation: {logger.get_messages()}"])
Expand All @@ -112,16 +113,17 @@ def parse_annotation_type(annotation_type: str) -> AnnotationType:

def parse_review(self, review_dict: Dict, spdx_id: str) -> Annotation:
logger = Logger()
try:
annotator: Optional[Actor] = self.actor_parser.parse_actor(review_dict.get("reviewer"))
except SPDXParsingError as err:
logger.append_all(err.get_messages())
annotator = None
try:
annotation_date: Optional[datetime] = datetime_from_str(review_dict.get("reviewDate"))
except TypeError:
logger.append("ValueError while parsing reviewDate.")
annotation_date = None

annotator: Optional[Actor] = try_parse_optional_field_append_logger_when_failing(logger=logger,
field=review_dict.get(
"reviewer"),
method_to_parse=self.actor_parser.parse_actor)

annotation_date: Optional[datetime] = try_parse_required_field_append_logger_when_failing(logger=logger,
field=review_dict.get(
"reviewDate"),
method_to_parse=datetime_from_str)

annotation_type = AnnotationType.REVIEW
comment: str = review_dict.get("comment")
if logger.has_messages():
Expand Down
1 change: 0 additions & 1 deletion src/parser/json/checksum_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from typing import Dict, List

from src.model.checksum import Checksum, ChecksumAlgorithm
from src.model.typing.constructor_type_errors import ConstructorTypeErrors
from src.parser.error import SPDXParsingError
from src.parser.json.dict_parsing_functions import transform_json_str_to_enum_name, try_construction_raise_parsing_error
from src.parser.logger import Logger
Expand Down
69 changes: 33 additions & 36 deletions src/parser/json/creation_info_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
from src.parser.error import SPDXParsingError
from src.parser.json.actor_parser import ActorParser
from src.parser.json.checksum_parser import ChecksumParser
from src.parser.json.dict_parsing_functions import datetime_from_str, parse_optional_field, \
try_construction_raise_parsing_error
from src.parser.json.dict_parsing_functions import datetime_from_str, try_construction_raise_parsing_error, \
try_parse_optional_field_append_logger_when_failing, \
try_parse_required_field_append_logger_when_failing
from src.parser.logger import Logger


Expand All @@ -47,33 +48,28 @@ def parse_creation_info(self, doc_dict: Dict) -> CreationInfo:
if creation_info_dict is None:
logger.append("CreationInfo is not valid.")
raise SPDXParsingError([f"Error while parsing doc {name}: {logger.get_messages()}"])
try:
list_of_creators: List[str] = creation_info_dict.get("creators")
creators: List[Actor] = self.parse_creators(list_of_creators)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
creators = []
try:
created: Optional[datetime] = datetime_from_str(creation_info_dict.get("created"))
except ValueError:
logger.append("Error while parsing created")
created = None

list_of_creators: List[str] = creation_info_dict.get("creators")
creators: List[Actor] = try_parse_required_field_append_logger_when_failing(logger=logger,
field=list_of_creators,
method_to_parse=self.parse_creators,
default=[])

created: Optional[datetime] = try_parse_required_field_append_logger_when_failing(logger=logger,
field=creation_info_dict.get(
"created"),
method_to_parse=datetime_from_str)

creator_comment: Optional[str] = creation_info_dict.get("comment")
data_license: str = doc_dict.get("dataLicense")
try:
external_document_refs: List[ExternalDocumentRef] = parse_optional_field(
doc_dict.get("externalDocumentRefs"),
self.parse_external_document_refs)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
external_document_refs = []
try:
license_list_version: Optional[Version] = parse_optional_field(creation_info_dict.get("licenseListVersion"),
self.parse_version)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
license_list_version = None

external_document_refs: List[ExternalDocumentRef] = try_parse_optional_field_append_logger_when_failing(
logger=logger, field=doc_dict.get("externalDocumentRefs"),
method_to_parse=self.parse_external_document_refs)
license_list_version: Optional[Version] = try_parse_optional_field_append_logger_when_failing(logger=logger,
field=creation_info_dict.get(
"licenseListVersion"),
method_to_parse=self.parse_version)
document_comment: Optional[str] = doc_dict.get("comment")
if logger.has_messages():
raise SPDXParsingError([f"Error while parsing doc {name}: {logger.get_messages()}"])
Expand Down Expand Up @@ -114,22 +110,23 @@ def parse_external_document_refs(self, external_document_refs_dict: List[Dict])
logger = Logger()
external_document_refs = []
for external_ref_dict in external_document_refs_dict:
try:
external_doc_ref: ExternalDocumentRef = self.parse_external_doc_ref(external_ref_dict)
external_document_refs.append(external_doc_ref)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
external_doc_ref: ExternalDocumentRef = try_parse_optional_field_append_logger_when_failing(logger=logger,
field=external_ref_dict,
method_to_parse=self.parse_external_doc_ref)

external_document_refs.append(external_doc_ref)

if logger.has_messages():
raise SPDXParsingError(logger.get_messages())
return external_document_refs

def parse_external_doc_ref(self, external_doc_ref_dict: Dict) -> ExternalDocumentRef:
logger = Logger()
try:
checksum: Optional[Checksum] = self.checksum_parser.parse_checksum(external_doc_ref_dict.get("checksum"))
except SPDXParsingError as err:
logger.append_all(err.get_messages())
checksum = None
checksum: Optional[Checksum] = try_parse_required_field_append_logger_when_failing(logger=logger,
field=external_doc_ref_dict.get(
"checksum"),
A36C method_to_parse=self.checksum_parser.parse_checksum)

external_document_id: str = external_doc_ref_dict.get("externalDocumentId")
spdx_document: str = external_doc_ref_dict.get("spdxDocument")
if logger.has_messages():
Expand Down
30 changes: 25 additions & 5 deletions src/parser/json/dict_parsing_functions.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
from datetime import datetime
from typing import Any, Callable, Dict

from src.model.external_document_ref import ExternalDocumentRef
from src.model.typing.constructor_type_errors import ConstructorTypeErrors
from src.parser.error import SPDXParsingError
from src.parser.logger import Logger


def parse_optional_field(field: Any, method_to_parse:Callable=lambda x: x, default=None):
def parse_optional_field(field: Any, method_to_parse: Callable = lambda x: x, default=None):
if not field:
return default
return method_to_parse(field)


def datetime_from_str(created: str) -> datetime:
date = datetime.strptime(created, "%Y-%m-%dT%H:%M:%SZ")
def datetime_from_str(date_str: str) -> datetime:
if not isinstance(date_str, str):
raise SPDXParsingError([f"Could not convert str to datetime, invalid type: {type(date_str).__name__}"])
date = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ")
return date


def transform_json_str_to_enum_name(json_str: str) -> str:
return json_str.replace("-","_").upper()
return json_str.replace("-", "_").upper()


def try_construction_raise_parsing_error(object_to_construct: Any, args_for_construction: Dict) -> Any:
Expand All @@ -27,3 +29,21 @@ def try_construction_raise_parsing_error(object_to_construct: Any, args_for_cons
except ConstructorTypeErrors as err:
raise SPDXParsingError([f"Error while constructing {object_to_construct.__name__}: {err.get_messages()}"])
return constructed_object


def try_parse_optional_field_append_logger_when_failing(logger: Logger, field: Any, method_to_parse: Callable, default=None):
try:
parsed_element = parse_optional_field(field=field, method_to_parse=method_to_parse, default=default)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
parsed_element = default
return parsed_element

def try_parse_required_field_append_logger_when_failing(logger: Logger, field: Any, method_to_parse: Callable, default=None):
try:
parsed_element = method_to_parse(field)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
parsed_element = default
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't we get another error down the line when using the None default as a value in some constructor?

Copy link
Collaborator Author
@meretp meretp Dec 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, before calling any constructor we are checking if the logger has any messages and raise a SPDXParsingError.

return parsed_element

50 changes: 24 additions & 26 deletions src/parser/json/file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from src.model.spdx_none import SpdxNone
from src.parser.error import SPDXParsingError
from src.parser.json.checksum_parser import ChecksumParser
from src.parser.json.dict_parsing_functions import parse_optional_field, try_construction_raise_parsing_error
from src.parser.json.dict_parsing_functions import try_construction_raise_parsing_error, \
try_parse_optional_field_append_logger_when_failing, try_parse_required_field_append_logger_when_failing
from src.parser.json.license_expression_parser import LicenseExpressionParser
from src.parser.logger import Logger

Expand Down Expand Up @@ -50,37 +51,34 @@ def parse_file(self, file_dict: Dict) -> Optional[File]:
name: str = file_dict.get("fileName")
spdx_id: str = file_dict.get("SPDXID")
checksums_list: List[Dict] = file_dict.get("checksums")
try:
checksums: List[Checksum] = self.checksum_parser.parse_checksums(checksums_list)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
checksums = []

checksums: List[Checksum] = try_parse_required_field_append_logger_when_failing(logger=logger,
field=checksums_list,
method_to_parse=self.checksum_parser.parse_checksums)

attribution_texts: Optional[str] = file_dict.get("attributionTexts")
comment: Optional[str] = file_dict.get("comment")
copyright_text: Optional[str] = file_dict.get("copyrightText")
file_contributors: List[str] = file_dict.get("fileContributors")
try:
file_types: List[FileType] = parse_optional_field(file_dict.get("fileTypes"), self.parse_file_types)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
file_types = []
file_types: List[FileType] = try_parse_optional_field_append_logger_when_failing(logger=logger,
field=file_dict.get(
"fileTypes"),
method_to_parse=self.parse_file_types)

license_comments: Optional[str] = file_dict.get("licenseComments")
try:
license_concluded: Optional[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]] = parse_optional_field(
file_dict.get("licenseConcluded"),
self.license_expression_parser.parse_license_expression)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
license_concluded = None
try:
license_info_in_files: Optional[
Union[List[LicenseExpression], SpdxNoAssertion, SpdxNone]] = parse_optional_field(
file_dict.get("licenseInfoInFiles"),
self.license_expression_parser.parse_license_expression)
except SPDXParsingError as err:
logger.append_all(err.get_messages())
license_info_in_files = None

license_concluded: Optional[Union[
LicenseExpression, SpdxNoAssertion, SpdxNone]] = try_parse_optional_field_append_logger_when_failing(
logger=logger, field=file_dict.get("licenseConcluded"),
method_to_parse=self.license_expression_parser.parse_license_expression)

license_info_in_files: Optional[
Union[List[
LicenseExpression], SpdxNoAssertion, SpdxNone]] = try_parse_optional_field_append_logger_when_failing(
logger=logger,
field=file_dict.get("licenseInfoInFiles"),
method_to_parse=self.license_expression_parser.parse_license_expression)

notice_text: Optional[str] = file_dict.get("noticeText")

if logger.has_messages():
Expand Down
Loading
0