10000 Deduplicate top-level and CPython SBOM generation steps · python/release-tools@26eacab · GitHub
[go: up one dir, main page]

Skip to content

Commit 26eacab

Browse files
committed
Deduplicate top-level and CPython SBOM generation steps
1 parent 26b97e6 commit 26eacab

File tree

1 file changed

+54
-101
lines changed

1 file changed

+54
-101
lines changed

sbom.py

Lines changed: 54 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -316,38 +316,20 @@ def create_pip_sbom_from_wheel(
316316
)
317317

318318

319-
def create_sbom_for_source_tarball(tarball_path: str):
320-
"""Stitches together an SBOM for a source tarball"""
321-
tarball_name = os.path.basename(tarball_path)
322-
323-
# Open the tarball with known compression settings.
324-
if tarball_name.endswith(".tgz"):
325-
tarball = tarfile.open(tarball_path, mode="r:gz")
326-
elif tarball_name.endswith(".tar.xz"):
327-
tarball = tarfile.open(tarball_path, mode="r:xz")
328-
else:
329-
raise ValueError(f"Unknown tarball format: '{tarball_name}'")
319+
def create_cpython_sbom(
320+
sbom_data: dict[str, typing.Any],
321+
cpython_version: str,
322+
artifact_path: str,
323+
):
324+
"""Creates the top-level SBOM metadata and the CPython SBOM package."""
330325

331-
# Parse the CPython version from the tarball.
332-
# Calculate the download locations from the CPython version and tarball name.
333-
cpython_version = re.match(r"^Python-([0-9abrc.]+)\.t", tarball_name).group(1)
334326
cpython_version_without_suffix = re.match(r"^([0-9.]+)", cpython_version).group(1)
335-
tarball_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{tarball_name}"
336-
337-
# Take a hash of the tarball
338-
with open(tarball_path, mode="rb") as f:
339-
tarball_checksum_sha256 = hashlib.sha256(f.read()).hexdigest()
327+
artifact_name = os.path.basename(artifact_path)
328+
artifact_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{artifact_name}"
340329

341-
# There should be an SBOM included in the tarball.
342-
# If there's not we can't create an SBOM.
343-
try:
344-
sbom_tarball_member = tarball.getmember(f"Python-{cpython_version}/Misc/sbom.spdx.json")
345-
except KeyError:
346-
raise ValueError(
347-
"Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'"
348-
) from None
349-
sbom_bytes = tarball.extractfile(sbom_tarball_member).read()
350-
sbom_data = json.loads(sbom_bytes)
330+
# Take a hash of the artifact
331+
with open(artifact_path, mode="rb") as f:
332+
artifact_checksum_sha256 = hashlib.sha256(f.read()).hexdigest()
351333

352334
sbom_data.update({
353335
"SPDXID": "SPDXRef-DOCUMENT",
@@ -356,7 +338,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
356338
"dataLicense": "CC0-1.0",
357339
# Naming done according to OpenSSF SBOM WG recommendations.
358340
# See: https://github.com/ossf/sbom-everywhere/blob/main/reference/sbom_naming.md
359-
"documentNamespace": f"{tarball_download_location}.spdx.json",
341+
"documentNamespace": f"{artifact_download_location}.spdx.json",
360342
"creationInfo": {
361343
"created": (
362344
datetime.datetime.now(tz=datetime.timezone.utc)
@@ -381,7 +363,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
381363
"licenseConcluded": "PSF-2.0",
382364
"originator": "Organization: Python Software Foundation",
383365
"supplier": "Organization: Python Software Foundation",
384-
"packageFileName": tarball_name,
366+
"packageFileName": artifact_name,
385367
"externalRefs": [
386368
{
387369
"referenceCategory": "SECURITY",
@@ -390,8 +372,8 @@ def create_sbom_for_source_tarball(tarball_path: str):
390372
}
391373
],
392374
"primaryPackagePurpose": "SOURCE",
393-
"downloadLocation": tarball_download_location,
394-
"checksums": [{"algorithm": "SHA256", "checksumValue": tarball_checksum_sha256}],
375+
"downloadLocation": artifact_download_location,
376+
"checksums": [{"algorithm": "SHA256", "checksumValue": artifact_checksum_sha256}],
395377
}
396378

397379
# The top-level CPython package depends on every vendored sub-package.
@@ -404,6 +386,37 @@ def create_sbom_for_source_tarball(tarball_path: str):
404386

405387
sbom_data["packages"].append(sbom_cpython_package)
406388

389+
390+
def create_sbom_for_source_tarball(tarball_path: str):
391+
"""Stitches together an SBOM for a source tarball"""
392+
tarball_name = os.path.basename(tarball_path)
393+
394+
# Open the tarball with known compression settings.
395+
if tarball_name.endswith(".tgz"):
396+
tarball = tarfile.open(tarball_path, mode="r:gz")
397+
elif tarball_name.endswith(".tar.xz"):
398+
tarball = tarfile.open(tarball_path, mode="r:xz")
399+
else:
400+
raise ValueError(f"Unknown tarball format: '{tarball_name}'")
401+
402+
# Parse the CPython version from the tarball.
403+
# Calculate the download locations from the CPython version and tarball name.
404+
cpython_version = re.match(r"^Python-([0-9abrc.]+)\.t", tarball_name).group(1)
405+
406+
# There should be an SBOM included in the tarball.
407+
# If there's not we can't create an SBOM.
408+
try:
409+
sbom_tarball_member = tarball.getmember(f"Python-{cpython_version}/Misc/sbom.spdx.json")
410+
except KeyError:
411+
raise ValueError(
412+
"Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'"
413+
) from None
414+
sbom_bytes = tarball.extractfile(sbom_tarball_member).read()
415+
sbom_data = json.loads(sbom_bytes)
416+
417+
create_cpython_sbom(sbom_data, cpython_version=cpython_version, artifact_path=tarball_path)
418+
sbom_cpython_package_spdx_id = spdx_id("SPDXRef-PACKAGE-cpython")
419+
407420
# Find the pip wheel in ensurepip in the tarball
408421
for member in tarball.getmembers():
409422
match = re.match(rf"^Python-{cpython_version}/Lib/ensurepip/_bundled/(pip-.*\.whl)$", member.name)
@@ -487,7 +500,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
487500
)
488501
sbom_data["relationships"].append(
489502
{
490-
"spdxElementId": sbom_cpython_package["SPDXID"],
503+
"spdxElementId": sbom_cpython_package_spdx_id,
491504
"relatedSpdxElement": sbom_file_spdx_id,
492505
"relationshipType": "CONTAINS",
493506
}
@@ -505,7 +518,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
505518
sbom_data["relationships"].append(
506519
{
507520
"spdxElementId": "SPDXRef-DOCUMENT",
508-
"relatedSpdxElement": sbom_cpython_package["SPDXID"],
521+
"relatedSpdxElement": sbom_cpython_package_spdx_id,
509522
"relationshipType": "DESCRIBES",
510523
}
511524
)
@@ -519,20 +532,12 @@ def create_sbom_for_source_tarball(tarball_path: str):
519532
# Calculate the 'packageVerificationCode' values for files in packages.
520533
calculate_package_verification_codes(sbom_data)
521534

522-
# Normalize SBOM structures for reproducibility.
523-
normalize_sbom_data(sbom_data)
524-
525535
return sbom_data
526536

527537

528538
def create_sbom_for_windows_artifact(exe_path):
529539
exe_name = os.path.basename(exe_path)
530540
cpython_version = re.match(r"^python-([0-9abrc.]+)(?:-|\.exe)", exe_name).group(1)
531-
cpython_version_without_suffix = re.match(r"^([0-9.]+)", cpython_version).group(1)
532-
exe_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{exe_name}"
533-
534-
with open(exe_path, mode="rb") as f:
535-
exe_checksum_sha256 = hashlib.sha256(f.read()).hexdigest()
536541

537542
# Start with the CPython source SBOM as a base
538543
with open("Misc/externals.spdx.json") as f:
@@ -549,80 +554,26 @@ def create_sbom_for_windows_artifact(exe_path):
549554
sbom_data["relationships"] = []
550555
sbom_data["files"] = []
551556

552-
sbom_data.update({
553-
"SPDXID": "SPDXRef-DOCUMENT",
554-
"spdxVersion": "SPDX-2.3",
555-
"name": "CPython SBOM",
556-
"dataLicense": "CC0-1.0",
557-
# Naming done according to OpenSSF SBOM WG recommendations.
558-
# See: https://github.com/ossf/sbom-everywhere/blob/main/reference/sbom_naming.md
559-
"documentNamespace": f"{exe_download_location}.spdx.json",
560-
"creationInfo": {
561-
"created": (
562-
datetime.datetime.now(tz=datetime.timezone.utc)
563-
.strftime("%Y-%m-%dT%H:%M:%SZ")
564-
),
565-
"creators": [
566-
"Person: Python Release Managers",
567-
f"Tool: ReleaseTools-{get_release_tools_commit_sha()}",
568-
],
569-
# Version of the SPDX License ID list.
570-
# This shouldn't need to be updated often, if ever.
571-
"licenseListVersion": "3.22",
572-
},
573-
})
574-
575-
# Create the SBOM entry for the CPython package. We use
576-
# the SPDXID later on for creating relationships to files.
577-
sbom_cpython_package = {
578-
"SPDXID": "SPDXRef-PACKAGE-cpython",
579-
"name": "CPython",
580-
"versionInfo": cpython_version,
581-
"licenseConcluded": "PSF-2.0",
582-
"originator": "Organization: Python Software Foundation",
583-
"supplier": "Organization: Python Software Foundation",
584-
"packageFileName": exe_name,
585-
"externalRefs": [
586-
{
587-
"referenceCategory": "SECURITY",
588-
"referenceLocator": f"cpe:2.3:a:python:python:{cpython_version}:*:*:*:*:*:*:*",
589-
"referenceType": "cpe23Type",
590-
}
591-
],
592-
"primaryPackagePurpose": "APPLICATION",
593-
"downloadLocation": exe_download_location,
594-
"checksums": [{"algorithm": "SHA256", "checksumValue": exe_checksum_sha256}],
595-
}
596-
597-
# The top-level CPython package depends on every vendored sub-package.
598-
for sbom_package in sbom_data["packages"]:
599-
sbom_data["relationships"].append({
600-
"spdxElementId": sbom_cpython_package["SPDXID"],
601-
"relatedSpdxElement": sbom_package["SPDXID"],
602-
"relationshipType": "DEPENDS_ON",
603-
})
604-
605-
sbom_data["packages"].append(sbom_cpython_package)
557+
create_cpython_sbom(sbom_data, cpython_version=cpython_version, artifact_path=exe_path)
558+
sbom_cpython_package_spdx_id = spdx_id("SPDXRef-PACKAGE-cpython")
606559

607560
# Final relationship, this SBOM describes the CPython package.
608561
sbom_data["relationships"].append(
609562
{
610563
"spdxElementId": "SPDXRef-DOCUMENT",
611-
"relatedSpdxElement": sbom_cpython_package["SPDXID"],
564+
"relatedSpdxElement": sbom_cpython_package_spdx_id,
612565
"relationshipType": "DESCRIBES",
613566
}
614567
)
615568

616569
# Apply the 'supplier' tag to every package since we're shipping
617-
# the package in the tarball itself. Originator field is used for maintainers.
570+
# the package in the artifact itself. Originator field is used for maintainers.
618571
for sbom_package in sbom_data["packages"]:
619572
sbom_package["supplier"] = "Organization: Python Software Foundation"
620573
# Source packages have been compiled.
621574
if sbom_package["primaryPackagePurpose"] == "SOURCE":
622575
sbom_package["primaryPackagePurpose"] = "LIBRARY"
623576

624-
normalize_sbom_data(sbom_data)
625-
626577
return sbom_data
627578

628579

@@ -634,6 +585,8 @@ def main() -> None:
634585
else:
635586
sbom_data = create_sbom_for_source_tarball(artifact_path)
636587

588+
# Normalize SBOM data for reproducibility.
589+
normalize_sbom_data(sbom_data)
637590
with open(artifact_path + ".spdx.json", mode="w") as f:
638591
f.truncate()
639592
f.write(json.dumps(sbom_data, indent=2, sort_keys=True))

0 commit comments

Comments
 (0)
0