8000 Create utility for generating SBOM from artifacts by sethmlarson · Pull Request #82 · python/release-tools · GitHub
[go: up one dir, main page]

Skip to content

Create utility for generating SBOM from artifacts #82

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 6, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Addresses external reviewer feedback
* Adds dependency relationships between top-level
  CPython package and vendored packages.
* Removes directory prefix in file names to make diffs
  more consistent across different releases.
* Gets release-tool commit SHA for tool version
  • Loading branch information
sethmlarson committed Jan 16, 2024
commit 9b1fdfa9605a9e71ff32333cb2e71ad3304ceed6
50 changes: 31 additions & 19 deletions sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
import os
import re
import subprocess
import sys
import tarfile

Expand All @@ -19,7 +20,7 @@ def spdx_id(value: str) -> str:

def calculate_package_verification_codes(sbom) -> None:
"""
Calculate SPDX 'PackageVerificationCode' values for
Calculate SPDX 'packageVerificationCode' values for
each package with 'filesAnalyzed' set to 'true'.
Mutates the values within the passed structure.

Expand Down Expand Up @@ -90,6 +91,14 @@ def calculate_package_verification_codes(sbom) -> None:
}


def get_release_tools_commit_sha() -> str:
"""Gets the git commit SHA of the release-tools repository"""
git_prefix = os.path.abspath(os.path.dirname(__file__))
stdout = subprocess.check_output(["git", "rev-parse", "--prefix", git_prefix, "HEAD"]).decode("ascii")
assert re.match(r"^[a-f0-9]{40,}$", stdout)
return stdout


def create_sbom_for_source_tarball(tarball_path: str):
"""Stitches together an SBOM for a source tarball"""
tarball_name = os.path.basename(tarball_path)
Expand All @@ -114,7 +123,8 @@ def create_sbom_for_source_tarball(tarball_path: str):

# There should be an SBOM included in the tarball.
# If there's not we can't create an SBOM.
sbom_bytes = tarball.extractfile(tarball.getmember("Misc/sbom.spdx.json")).read()
sbom_tarball_member = tarball.getmember(f"Python-{cpython_version}/Misc/sbom.spdx.json")
sbom_bytes = tarball.extractfile(sbom_tarball_member).read()

sbom = json.loads(sbom_bytes)
sbom.update({
Expand All @@ -132,7 +142,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
),
"creators": [
"Person: Python Release Managers",
"Tool: python/release-tools@f58cfa6611dd13f2fb4e4790a8c54f06dddab6bc",
f"Tool: ReleaseTools-{get_release_tools_commit_sha()}",
],
# Version of the SPDX License ID list.
# This shouldn't need to be updated often, if ever.
Expand Down Expand Up @@ -161,26 +171,26 @@ def create_sbom_for_source_tarball(tarball_path: str):
"downloadLocation": tarball_download_location,
"checksums": [{"algorithm": "SHA256", "checksumValue": tarball_checksum_sha256}],
}

# The top-level CPython package depends on every vendored sub-package.
for sbom_package in sbom["packages"]:
sbom["relationships"].append({
"spdxElementId": sbom_cpython_package["SPDXID"],
"relatedSpdxElement": sbom_package["SPDXID"],
"relationshipType": "DEPENDS_ON",
})

sbom["packages"].append(sbom_cpython_package)

# Extract all currently known files from the SBOM with their checksums.
known_sbom_files = {}
for sbom_file in sbom["files"]:
sbom_filename = sbom_file["fileName"]

# We use the name we're expecting in the tarball here
# which is to prefix the name with 'Python-{version}/...'.
expected_tar_filename = f"Python-{cpython_version}/{sbom_filename}"

# We also want to update our SBOM to use the same filenames
# as the ones in the tarball. We maintain the SPDXIDs though
# to not need to rewrite SBOM relationships.
sbom_file["fileName"] = expected_tar_filename

# Look for the expected SHA256 checksum.
for sbom_file_checksum in sbom_file["checksums"]:
if sbom_file_checksum["algorithm"] == "SHA256":
known_sbom_files[expected_tar_filename] = (
known_sbom_files[sbom_filename] = (
sbom_file_checksum["checksumValue"]
)
break
Expand All @@ -206,21 +216,23 @@ def create_sbom_for_source_tarball(tarball_path: str):
actual_file_checksum_sha1 = hashlib.sha1(file_bytes).hexdigest()
actual_file_checksum_sha256 = hashlib.sha256(file_bytes).hexdigest()

# Remove the 'Python-{version}/...' prefix for the SPDXID and fileName.
member_name_no_prefix = member.name.split('/', 1)[1]

# We've already seen this file, so we check it hasn't been modified and continue on.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tar files can contain the same file multiple times, there isn't a situation where this can lead to false positives here, is there? (I'm thinking in terms of the listed sha256 being the same but other information being different in the tarball-contained SBOM.) I guess we're trusting both the tarball and the contained SBOM anyway, so no.

if member.name in known_sbom_files:
if member_name_no_prefix in known_sbom_files:
# If there's a hash mismatch we raise an error, something isn't right!
expected_file_checksum_sha256 = known_sbom_files.pop(member.name)
expected_file_checksum_sha256 = known_sbom_files.pop(member_name_no_prefix)
if expected_file_checksum_sha256 != actual_file_checksum_sha256:
raise ValueError(f"Mismatched checksum for file '{member.name}'")
raise ValueError(f"Mismatched checksum for file '{member_name_no_prefix}'")

# If this is a new file, then it's a part of the 'CPython' SBOM package.
else:
# Remove the 'Python-{version}/...' prefix for the SPDXID.
sbom_file_spdx_id = spdx_id(f"SPDXRef-FILE-{member.name.split('/', 1)[1]}")
sbom_file_spdx_id = spdx_id(f"SPDXRef-FILE-{member_name_no_prefix}")
sbom["files"].append(
{
"SPDXID": sbom_file_spdx_id,
"fileName": member.name,
"fileName": member_name_no_prefix,
"checksums": [
{
"algorithm": "SHA1",
Expand Down
0