8000 Add SBOMs generation for Windows artifacts · python/release-tools@e945180 · GitHub
[go: up one dir, main page]

Skip to content

Commit e945180

Browse files
authored
Add SBOMs generation for Windows artifacts
1 parent 1ee177c commit e945180

File tree

5 files changed

+280
-41
lines changed

5 files changed

+280
-41
lines changed

sbom.py

Lines changed: 143 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010
1111
"""
1212

13+
import argparse
1314
import datetime
1415
import hashlib
1516
import io
1617
import json
1718
import os
19+
import pathlib
1820
import re
1921
import subprocess
2022
import sys
@@ -316,38 +318,20 @@ def create_pip_sbom_from_wheel(
316318
)
317319

318320

319-
def create_sbom_for_source_tarball(tarball_path: str):
320-
"""Stitches together an SBOM for a source tarball"""
321-
tarball_name = os.path.basename(tarball_path)
322-
323-
# Open the tarball with known compression settings.
324-
if tarball_name.endswith(".tgz"):
325-
tarball = tarfile.open(tarball_path, mode="r:gz")
326-
elif tarball_name.endswith(".tar.xz"):
327-
tarball = tarfile.open(tarball_path, mode="r:xz")
328-
else:
329-
raise ValueError(f"Unknown tarball format: '{tarball_name}'")
321+
def create_cpython_sbom(
322+
sbom_data: dict[str, typing.Any],
323+
cpython_version: str,
324+
artifact_path: str,
325+
):
326+
"""Creates the top-level SBOM metadata and the CPython SBOM package."""
330327

331-
# Parse the CPython version from the tarball.
332-
# Calculate the download locations from the CPython version and tarball name.
333-
cpython_version = re.match(r"^Python-([0-9abrc.]+)\.t", tarball_name).group(1)
334328
cpython_version_without_suffix = re.match(r"^([0-9.]+)", cpython_version).group(1)
335-
tarball_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{tarball_name}"
329+
artifact_name = os.path.basename(artifact_path)
330+
artifact_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{artifact_name}"
336331

337-
# Take a hash of the tarball
338-
with open(tarball_path, mode="rb") as f:
339-
tarball_checksum_sha256 = hashlib.sha256(f.read()).hexdigest()
340-
341-
# There should be an SBOM included in the tarball.
342-
# If there's not we can't create an SBOM.
343-
try:
344-
sbom_tarball_member = tarball.getmember(f"Python-{cpython_version}/Misc/sbom.spdx.json")
345-
except KeyError:
346-
raise ValueError(
347-
"Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'"
348-
) from None
349-
sbom_bytes = tarball.extractfile(sbom_tarball_member).read()
350-
sbom_data = json.loads(sbom_bytes)
332+
# Take a hash of the artifact
333+
with open(artifact_path, mode="rb") as f:
334+
artifact_checksum_sha256 = hashlib.sha256(f.read()).hexdigest()
351335

352336
sbom_data.update({
353337
"SPDXID": "SPDXRef-DOCUMENT",
@@ -356,7 +340,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
356340
"dataLicense": "CC0-1.0",
357341
# Naming done according to OpenSSF SBOM WG recommendations.
358342
# See: https://github.com/ossf/sbom-everywhere/blob/main/reference/sbom_naming.md
359-
"documentNamespace": f"{tarball_download_location}.spdx.json",
343+
"documentNamespace": f"{artifact_download_location}.spdx.json",
360344
"creationInfo": {
361345
"created": (
362346
datetime.datetime.now(tz=datetime.timezone.utc)
@@ -381,7 +365,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
381365
"licenseConcluded": "PSF-2.0",
382366
"originator": "Organization: Python Software Foundation",
383367
"supplier": "Organization: Python Software Foundation",
384-
"packageFileName": tarball_name,
368+
"packageFileName": artifact_name,
385369
"externalRefs": [
386370
{
387371
"referenceCategory": "SECURITY",
@@ -390,8 +374,8 @@ def create_sbom_for_source_tarball(tarball_path: str):
390374
}
391375
],
392376
"primaryPackagePurpose": "SOURCE",
393-
"downloadLocation": tarball_download_location,
394-
"checksums": [{"algorithm": "SHA256", "checksumValue": tarball_checksum_sha256}],
377+
"downloadLocation": artifact_download_location,
378+
"checksums": [{"algorithm": "SHA256", "checksumValue": artifact_checksum_sha256}],
395379
}
396380

397381
# The top-level CPython package depends on every vendored sub-package.
@@ -404,6 +388,37 @@ def create_sbom_for_source_tarball(tarball_path: str):
404388

405389
sbom_data["packages"].append(sbom_cpython_package)
406390

391+
392+
def create_sbom_for_source_tarball(tarball_path: str):
393+
"""Stitches together an SBOM for a source tarball"""
394+
tarball_name = os.path.basename(tarball_path)
395+
396+
# Open the tarball with known compression settings.
397+
if tarball_name.endswith(".tgz"):
398+
tarball = tarfile.open(tarball_path, mode="r:gz")
399+
elif tarball_name.endswith(".tar.xz"):
400+
tarball = tarfile.open(tarball_path, mode="r:xz")
401+
else:
402+
raise ValueError(f"Unknown tarball format: '{tarball_name}'")
403+
404+
# Parse the CPython version from the tarball.
405+
# Calculate the download locations from the CPython version and tarball name.
406+
cpython_version = re.match(r"^Python-([0-9abrc.]+)\.t", tarball_name).group(1)
407+
408+
# There should be an SBOM included in the tarball.
409+
# If there's not we can't create an SBOM.
410+
try:
411+
sbom_tarball_member = tarball.getmember(f"Python-{cpython_version}/Misc/sbom.spdx.json")
412+
except KeyError:
413+
raise ValueError(
414+
"Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'"
415+
) from None
416+
sbom_bytes = tarball.extractfile(sbom_tarball_member).read()
417+
sbom_data = json.loads(sbom_bytes)
418+
419+
create_cpython_sbom(sbom_data, cpython_version=cpython_version, artifact_path=tarball_path)
420+
sbom_cpython_package_spdx_id = spdx_id("SPDXRef-PACKAGE-cpython")
421+
407422
# Find the pip wheel in ensurepip in the tarball
408423
for member in tarball.getmembers():
409424
match = re.match(rf"^Python-{cpython_version}/Lib/ensurepip/_bundled/(pip-.*\.whl)$", member.name)
@@ -487,7 +502,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
487502
)
488503
sbom_data["relationships"].append(
489504
{
490-
"spdxElementId": sbom_cpython_package["SPDXID"],
505+
"spdxElementId": sbom_cpython_package_spdx_id,
491506
"relatedSpdxElement": sbom_file_spdx_id,
492507
"relationshipType": "CONTAINS",
493508
}
@@ -505,7 +520,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
505520
sbom_data["relationships"].append(
506521
{
507522
"spdxElementId": "SPDXRef-DOCUMENT",
508-
"relatedSpdxElement": sbom_cpython_package["SPDXID"],
523+
"relatedSpdxElement": sbom_cpython_package_spdx_id,
509524
"relationshipType": "DESCRIBES",
510525
}
511526
)
@@ -519,16 +534,104 @@ def create_sbom_for_source_tarball(tarball_path: str):
519534
# Calculate the 'packageVerificationCode' values for files in packages.
520535
calculate_package_verification_codes(sbom_data)
521536

522-
# Normalize SBOM structures for reproducibility.
523-
normalize_sbom_data(sbom_data)
537+
return sbom_data
538+
539+
540+
def create_sbom_for_windows_artifact(artifact_path, cpython_source_dir: str):
541+
artifact_name = os.path.basename(artifact_path)
542+
cpython_version = re.match(r"^python-([0-9abrc.]+)(?:-|\.exe|\.zip)", artifact_name).group(1)
543+
544+
if not cpython_source_dir:
545+
raise ValueError("Must specify --cpython-source-dir for Windows artifacts")
546+
cpython_source_dir = pathlib.Path(cpython_source_dir)
547+
548+
# Start with the CPython source SBOM as a base
549+
with (cpython_source_dir / "Misc/externals.spdx.json").open() as f:
550+
sbom_data = json.loads(f.read())
551+
552+
sbom_data["relationships"] = []
553+
sbom_data["files"] = []
554+
555+
# Add all the packages from the source SBOM
556+
# We want to skip the file information because
557+
# the files aren't available in Windows artifacts.
558+
with (cpython_source_dir / "Misc/sbom.spdx.json").open() as f:
559+
source_sbom_data = json.loads(f.read())
560+
for sbom_package in source_sbom_data["packages"]:
561+
sbom_data["packages"].append(sbom_package)
562+
563+
create_cpython_sbom(
564+
sbom_data,
565+
cpython_version=cpython_version,
566+
artifact_path=artifact_path
567+
)
568+
sbom_cpython_package_spdx_id = spdx_id("SPDXRef-PACKAGE-cpython")
569+
570+
# The Windows embed artifacts don't contain pip/ensurepip,
571+
# but the MSI artifacts do. Add pip for MSI installers.
572+
if artifact_name.endswith(".exe"):
573+
574+
# Find the pip wheel in ensurepip in the source code
575+
for pathname in os.listdir(cpython_source_dir / "Lib/ensurepip/_bundled"):
576+
if pathname.startswith("pip-") and pathname.endswith(".whl"):
577+
pip_wheel_filename = pathname
578+
pip_wheel_bytes = (cpython_source_dir / f"Lib/ensurepip/_bundled/{pathname}").read_bytes()
579+
break
580+
else:
581+
raise ValueError("Could not find pip wheel in 'Lib/ensurepip/_bundled/...'")
582+
583+
create_pip_sbom_from_wheel(
584+
sbom_data,
585+
pip_wheel_filename=pip_wheel_filename,
586+
pip_wheel_bytes=pip_wheel_bytes,
587+
)
588+
589+
# Final relationship, this SBOM describes the CPython package.
590+
sbom_data["relationships"].append(
591+
{
592+
"spdxElementId": "SPDXRef-DOCUMENT",
593+
"relatedSpdxElement": sbom_cpython_package_spdx_id,
594+
"relationshipType": "DESCRIBES",
595+
}
596+
)
597+
598+
# Apply the 'supplier' tag to every package since we're shipping
599+
# the package in the artifact itself. Originator field is used for maintainers.
600+
for sbom_package in sbom_data["packages"]:
601+
sbom_package["supplier"] = "Organization: Python Software Foundation"
602+
# Source packages have been compiled.
603+
if sbom_package["primaryPackagePurpose"] == "SOURCE":
604+
sbom_package["primaryPackagePurpose"] = "LIBRARY"
524605

525606
return sbom_data
526607

527608

528609
def main() -> None:
529-
tarball_path = sys.argv[1]
530-
sbom_data = create_sbom_for_source_tarball(tarball_path)
531-
print(json.dumps(sbom_data, indent=2, sort_keys=True))
610+
parser = argparse.ArgumentParser()
611+
parser.add_argument("--cpython-source-dir", default=None)
612+
parser.add_argument("artifacts", nargs="+")
613+
parsed_args = parser.parse_args(sys.argv[1:])
614+
615+
artifact_paths = parsed_args.artifacts
616+
cpython_source_dir = parsed_args.cpython_source_dir
617+
618+
for artifact_path in artifact_paths:
619+
# Windows MSI and Embed artifacts
620+
if artifact_path.endswith(".exe") or artifact_path.endswith(".zip"):
621+
sbom_data = create_sbom_for_windows_artifact(
622+
artifact_path,
623+
cpython_source_dir=cpython_source_dir
624+
)
625+
# Source artifacts
626+
else:
627+
sbom_data = create_sbom_for_source_tarball(artifact_path)
628+
629+
# Normalize SBOM data for reproducibility.
630+
normalize_sbom_data(sbom_data)
631+
with open(artifact_path + ".spdx.json", mode="w") as f:
632+
f.truncate()
633+
f.write(json.dumps(sbom_data, indent=2, sort_keys=True))
634+
532635

533636
if __name__ == "__main__":
534637
main()

tests/fake-artifact.txt

Whitespace-only changes.

tests/test_sbom.py

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import pathlib
12
import json
23
import random
34
import hashlib
5+
import re
46
import unittest.mock
57

68
import pytest
@@ -65,7 +67,6 @@ def test_normalization():
6567

6668

6769
def test_fetch_project_metadata_from_pypi(mocker):
68-
6970
mock_urlopen = mocker.patch("sbom.urlopen")
7071
mock_urlopen.return_value = unittest.mock.Mock()
7172

@@ -116,3 +117,81 @@ def test_fetch_project_metadata_from_pypi(mocker):
116117

117118
assert download_url == "https://files.pythonhosted.org/packages/.../pip-24.0.tar.gz"
118119
assert checksum_sha256 == "ea9bd1a847e8c5774a5777bb398c19e80bcd4e2aa16a4b301b718fe6f593aba2"
120+
121+
122+
def test_create_cpython_sbom():
123+
sbom_data = {"packages": []}
124+
125+
artifact_path = str(pathlib.Path(__file__).parent / "fake-artifact.txt")
126+
sbom.create_cpython_sbom(
127+
sbom_data, cpython_version="3.13.0", artifact_path=artifact_path
128+
)
129+
130+
assert re.fullmatch(
131+
r"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$",
132+
sbom_data["creationInfo"].pop("created")
133+
)
134+
assert re.fullmatch(
135+
r"^Tool: ReleaseTools-[a-f0-9]+$",
136+
sbom_data["creationInfo"]["creators"].pop(1)
137+
)
138+
139+
assert sbom_data == {
140+
"packages": [
141+
{
142+
"SPDXID": "SPDXRef-PACKAGE-cpython",
143+
"name": "CPython",
144+
"versionInfo": "3.13.0",
145+
"licenseConcluded": "PSF-2.0",
146+
"originator": "Organization: Python Software Foundation",
147+
"supplier": "Organization: Python Software Foundation",
148+
"packageFileName": "fake-artifact.txt",
149+
"externalRefs": [
150+
{
151+
"referenceCategory": "SECURITY",
152+
"referenceLocator": "cpe:2.3:a:python:python:3.13.0:*:*:*:*:*:*:*",
153+
"referenceType": "cpe23Type",
154+
}
155+
],
156+
"primaryPackagePurpose": "SOURCE",
157+
"downloadLocation": "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt",
158+
"checksums": [
159+
{
160+
"algorithm": "SHA256",
161+
"checksumValue": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
162+
}
163+
],
164+
}
165+
],
166+
"SPDXID": "SPDXRef-DOCUMENT",
167+
"spdxVersion": "SPDX-2.3",
168+
"name": "CPython SBOM",
169+
"dataLicense": "CC0-1.0",
170+
"documentNamespace": "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt.spdx.json",
171+
"creationInfo": {
172+
"creators": [
173+
"Person: Python Release Managers",
174+
],
175+
"licenseListVersion": "3.22",
176+
},
177+
}
178+
179+
180+
@pytest.mark.parametrize(
181+
["cpython_version", "download_location"],
182+
[
183+
("3.13.0", "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt"),
184+
("3.11.0a1", "https://www.python.org/ftp/python/3.11.0/fake-artifact.txt"),
185+
("3.12.0b2", "https://www.python.org/ftp/python/3.12.0/fake-artifact.txt"),
186+
("3.13.0rc3", "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt"),
187+
]
188+
)
189+
def test_create_cpython_sbom_pre_release_download_location(cpython_version, download_location):
190+
sbom_data = {"packages": []}
191+
192+
artifact_path = str(pathlib.Path(__file__).parent / "fake-artifact.txt")
193+
sbom.create_cpython_sbom(
194+
sbom_data, cpython_version=cpython_version, artifact_path=artifact_path
195+
)
196+
197+
assert sbom_data["packages"][0]["downloadLocation"] == download_location

windows-release/msi-steps.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,35 @@ steps:
111111
*.cab
112112
*.exe
113113
114+
- powershell: |
115+
git clone $(Build.Repository.Uri) -b $(Build.SourceBranchName) --single-branch --no-checkout "$(Pipeline.Workspace)\release-tools"
116+
git -C "$(Pipeline.Workspace)\release-tools" checkout $(Build.SourceVersion)
117+
displayName: 'Clone the python/release-tools repository'
118+
119+
- powershell: >
120+
& $(Python)
121+
"$(Pipeline.Workspace)\release-tools\sbom.py"
122+
"--cpython-source-dir=$(Build.SourcesDirectory)"
123+
$(gci -r "$(Build.ArtifactStagingDirectory)\msi\**\python-*.exe")
124+
workingDirectory: $(Build.BinariesDirectory)
125+
displayName: 'Create SBOMs for binaries'
126+
127+
- task: CopyFiles@2
128+
displayName: 'Layout Artifact: sbom'
129+
inputs:
130+
sourceFolder: $(Build.ArtifactStagingDirectory)\msi
131+
targetFolder: $(Build.ArtifactStagingDirectory)\sbom
132+
flatten: true
133+
flattenFolders: true
134+
contents: |
135+
**\*.spdx.json
136+
114137
- publish: '$(Build.ArtifactStagingDirectory)\msi'
115138
artifact: msi
116139
displayName: 'Publish MSI'
140+
141+
- task: PublishBuildArtifacts@1
142+
displayName: 'Publish Artifact: sbom'
143+
inputs:
144+
PathtoPublish: '$(Build.ArtifactStagingDirectory)\sbom'
145+
ArtifactName: sbom

0 commit comments

Comments
 (0)
0