10
10
11
11
"""
12
12
13
+ import argparse
13
14
import datetime
14
15
import hashlib
15
16
import io
16
17
import json
17
18
import os
19
+ import pathlib
18
20
import re
19
21
import subprocess
20
22
import sys
@@ -316,38 +318,20 @@ def create_pip_sbom_from_wheel(
316
318
)
317
319
318
320
319
- def create_sbom_for_source_tarball (tarball_path : str ):
320
- """Stitches together an SBOM for a source tarball"""
321
- tarball_name = os .path .basename (tarball_path )
322
-
323
- # Open the tarball with known compression settings.
324
- if tarball_name .endswith (".tgz" ):
325
- tarball = tarfile .open (tarball_path , mode = "r:gz" )
326
- elif tarball_name .endswith (".tar.xz" ):
327
- tarball = tarfile .open (tarball_path , mode = "r:xz" )
328
- else :
329
- raise ValueError (f"Unknown tarball format: '{ tarball_name } '" )
321
+ def create_cpython_sbom (
322
+ sbom_data : dict [str , typing .Any ],
323
+ cpython_version : str ,
324
+ artifact_path : str ,
325
+ ):
326
+ """Creates the top-level SBOM metadata and the CPython SBOM package."""
330
327
331
- # Parse the CPython version from the tarball.
332
- # Calculate the download locations from the CPython version and tarball name.
333
- cpython_version = re .match (r"^Python-([0-9abrc.]+)\.t" , tarball_name ).group (1 )
334
328
cpython_version_without_suffix = re .match (r"^([0-9.]+)" , cpython_version ).group (1 )
335
- tarball_download_location = f"https://www.python.org/ftp/python/{ cpython_version_without_suffix } /{ tarball_name } "
329
+ artifact_name = os .path .basename (artifact_path )
330
+ artifact_download_location = f"https://www.python.org/ftp/python/{ cpython_version_without_suffix } /{ artifact_name } "
336
331
337
- # Take a hash of the tarball
338
- with open (tarball_path , mode = "rb" ) as f :
339
- tarball_checksum_sha256 = hashlib .sha256 (f .read ()).hexdigest ()
340
-
341
- # There should be an SBOM included in the tarball.
342
- # If there's not we can't create an SBOM.
343
- try :
344
- sbom_tarball_member = tarball .getmember (f"Python-{ cpython_version } /Misc/sbom.spdx.json" )
345
- except KeyError :
346
- raise ValueError (
347
- "Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'"
348
- ) from None
349
- sbom_bytes = tarball .extractfile (sbom_tarball_member ).read ()
350
- sbom_data = json .loads (sbom_bytes )
332
+ # Take a hash of the artifact
333
+ with open (artifact_path , mode = "rb" ) as f :
334
+ artifact_checksum_sha256 = hashlib .sha256 (f .read ()).hexdigest ()
351
335
352
336
sbom_data .update ({
353
337
"SPDXID" : "SPDXRef-DOCUMENT" ,
@@ -356,7 +340,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
356
340
"dataLicense" : "CC0-1.0" ,
357
341
# Naming done according to OpenSSF SBOM WG recommendations.
358
342
# See: https://github.com/ossf/sbom-everywhere/blob/main/reference/sbom_naming.md
359
- "documentNamespace" : f"{ tarball_download_location } .spdx.json" ,
343
+ "documentNamespace" : f"{ artifact_download_location } .spdx.json" ,
360
344
"creationInfo" : {
361
345
"created" : (
362
346
datetime .datetime .now (tz = datetime .timezone .utc )
@@ -381,7 +365,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
381
365
"licenseConcluded" : "PSF-2.0" ,
382
366
"originator" : "Organization: Python Software Foundation" ,
383
367
"supplier" : "Organization: Python Software Foundation" ,
384
- "packageFileName" : tarball_name ,
368
+ "packageFileName" : artifact_name ,
385
369
"externalRefs" : [
386
370
{
387
371
"referenceCategory" : "SECURITY" ,
@@ -390,8 +374,8 @@ def create_sbom_for_source_tarball(tarball_path: str):
390
374
}
391
375
],
392
376
"primaryPackagePurpose" : "SOURCE" ,
393
- "downloadLocation" : tarball_download_location ,
394
- "checksums" : [{"algorithm" : "SHA256" , "checksumValue" : tarball_checksum_sha256 }],
377
+ "downloadLocation" : artifact_download_location ,
378
+ "checksums" : [{"algorithm" : "SHA256" , "checksumValue" : artifact_checksum_sha256 }],
395
379
}
396
380
397
381
# The top-level CPython package depends on every vendored sub-package.
@@ -404,6 +388,37 @@ def create_sbom_for_source_tarball(tarball_path: str):
404
388
405
389
sbom_data ["packages" ].append (sbom_cpython_package )
406
390
391
+
392
+ def create_sbom_for_source_tarball (tarball_path : str ):
393
+ """Stitches together an SBOM for a source tarball"""
394
+ tarball_name = os .path .basename (tarball_path )
395
+
396
+ # Open the tarball with known compression settings.
397
+ if tarball_name .endswith (".tgz" ):
398
+ tarball = tarfile .open (tarball_path , mode = "r:gz" )
399
+ elif tarball_name .endswith (".tar.xz" ):
400
+ tarball = tarfile .open (tarball_path , mode = "r:xz" )
401
+ else :
402
+ raise ValueError (f"Unknown tarball format: '{ tarball_name } '" )
403
+
404
+ # Parse the CPython version from the tarball.
405
+ # Calculate the download locations from the CPython version and tarball name.
406
+ cpython_version = re .match (r"^Python-([0-9abrc.]+)\.t" , tarball_name ).group (1 )
407
+
408
+ # There should be an SBOM included in the tarball.
409
+ # If there's not we can't create an SBOM.
410
+ try :
411
+ sbom_tarball_member = tarball .getmember (f"Python-{ cpython_version } /Misc/sbom.spdx.json" )
412
+ except KeyError :
413
+ raise ValueError (
414
+ "Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'"
415
+ ) from None
416
+ sbom_bytes = tarball .extractfile (sbom_tarball_member ).read ()
417
+ sbom_data = json .loads (sbom_bytes )
418
+
419
+ create_cpython_sbom (sbom_data , cpython_version = cpython_version , artifact_path = tarball_path )
420
+ sbom_cpython_package_spdx_id = spdx_id ("SPDXRef-PACKAGE-cpython" )
421
+
407
422
# Find the pip wheel in ensurepip in the tarball
408
423
for member in tarball .getmembers ():
409
424
match = re .match (rf"^Python-{ cpython_version } /Lib/ensurepip/_bundled/(pip-.*\.whl)$" , member .name )
@@ -487,7 +502,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
487
502
)
488
503
sbom_data ["relationships" ].append (
489
504
{
490
- "spdxElementId" : sbom_cpython_package [ "SPDXID" ] ,
505
+ "spdxElementId" : sbom_cpython_package_spdx_id ,
491
506
"relatedSpdxElement" : sbom_file_spdx_id ,
492
507
"relationshipType" : "CONTAINS" ,
493
508
}
@@ -505,7 +520,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
505
520
sbom_data ["relationships" ].append (
506
521
{
507
522
"spdxElementId" : "SPDXRef-DOCUMENT" ,
508
- "relatedSpdxElement" : sbom_cpython_package [ "SPDXID" ] ,
523
+ "relatedSpdxElement" : sbom_cpython_package_spdx_id ,
509
524
"relationshipType" : "DESCRIBES" ,
510
525
}
511
526
)
@@ -519,16 +534,104 @@ def create_sbom_for_source_tarball(tarball_path: str):
519
534
# Calculate the 'packageVerificationCode' values for files in packages.
520
535
calculate_package_verification_codes (sbom_data )
521
536
522
- # Normalize SBOM structures for reproducibility.
523
- normalize_sbom_data (sbom_data )
537
+ return sbom_data
538
+
539
+
540
+ def create_sbom_for_windows_artifact (artifact_path , cpython_source_dir : str ):
541
+ artifact_name = os .path .basename (artifact_path )
542
+ cpython_version = re .match (r"^python-([0-9abrc.]+)(?:-|\.exe|\.zip)" , artifact_name ).group (1 )
543
+
544
+ if not cpython_source_dir :
545
+ raise ValueError ("Must specify --cpython-source-dir for Windows artifacts" )
546
+ cpython_source_dir = pathlib .Path (cpython_source_dir )
547
+
548
+ # Start with the CPython source SBOM as a base
549
+ with (cpython_source_dir / "Misc/externals.spdx.json" ).open () as f :
550
+ sbom_data = json .loads (f .read ())
551
+
552
+ sbom_data ["relationships" ] = []
553
+ sbom_data ["files" ] = []
554
+
555
+ # Add all the packages from the source SBOM
556
+ # We want to skip the file information because
557
+ # the files aren't available in Windows artifacts.
558
+ with (cpython_source_dir / "Misc/sbom.spdx.json" ).open () as f :
559
+ source_sbom_data = json .loads (f .read ())
560
+ for sbom_package in source_sbom_data ["packages" ]:
561
+ sbom_data ["packages" ].append (sbom_package )
562
+
563
+ create_cpython_sbom (
564
+ sbom_data ,
565
+ cpython_version = cpython_version ,
566
+ artifact_path = artifact_path
567
+ )
568
+ sbom_cpython_package_spdx_id = spdx_id ("SPDXRef-PACKAGE-cpython" )
569
+
570
+ # The Windows embed artifacts don't contain pip/ensurepip,
571
+ # but the MSI artifacts do. Add pip for MSI installers.
572
+ if artifact_name .endswith (".exe" ):
573
+
574
+ # Find the pip wheel in ensurepip in the source code
575
+ for pathname in os .listdir (cpython_source_dir / "Lib/ensurepip/_bundled" ):
576
+ if pathname .startswith ("pip-" ) and pathname .endswith (".whl" ):
577
+ pip_wheel_filename = pathname
578
+ pip_wheel_bytes = (cpython_source_dir / f"Lib/ensurepip/_bundled/{ pathname } " ).read_bytes ()
579
+ break
580
+ else :
581
+ raise ValueError ("Could not find pip wheel in 'Lib/ensurepip/_bundled/...'" )
582
+
583
+ create_pip_sbom_from_wheel (
584
+ sbom_data ,
585
+ pip_wheel_filename = pip_wheel_filename ,
586
+ pip_wheel_bytes = pip_wheel_bytes ,
587
+ )
588
+
589
+ # Final relationship, this SBOM describes the CPython package.
590
+ sbom_data ["relationships" ].append (
591
+ {
592
+ "spdxElementId" : "SPDXRef-DOCUMENT" ,
593
+ "relatedSpdxElement" : sbom_cpython_package_spdx_id ,
594
+ "relationshipType" : "DESCRIBES" ,
595
+ }
596
+ )
597
+
598
+ # Apply the 'supplier' tag to every package since we're shipping
599
+ # the package in the artifact itself. Originator field is used for maintainers.
600
+ for sbom_package in sbom_data ["packages" ]:
601
+ sbom_package ["supplier" ] = "Organization: Python Software Foundation"
602
+ # Source packages have been compiled.
603
+ if sbom_package ["primaryPackagePurpose" ] == "SOURCE" :
604
+ sbom_package ["primaryPackagePurpose" ] = "LIBRARY"
524
605
525
606
return sbom_data
526
607
527
608
528
609
def main () -> None :
529
- tarball_path = sys .argv [1 ]
530
- sbom_data = create_sbom_for_source_tarball (tarball_path )
531
- print (json .dumps (sbom_data , indent = 2 , sort_keys = True ))
610
+ parser = argparse .ArgumentParser ()
611
+ parser .add_argument ("--cpython-source-dir" , default = None )
612
+ parser .add_argument ("artifacts" , nargs = "+" )
613
+ parsed_args = parser .parse_args (sys .argv [1 :])
614
+
615
+ artifact_paths = parsed_args .artifacts
616
+ cpython_source_dir = parsed_args .cpython_source_dir
617
+
618
+ for artifact_path in artifact_paths :
619
+ # Windows MSI and Embed artifacts
620
+ if artifact_path .endswith (".exe" ) or artifact_path .endswith (".zip" ):
621
+ sbom_data = create_sbom_for_windows_artifact (
622
+ artifact_path ,
623
+ cpython_source_dir = cpython_source_dir
624
+ )
625
+ # Source artifacts
626
+ else :
627
+ sbom_data = create_sbom_for_source_tarball (artifact_path )
628
+
629
+ # Normalize SBOM data for reproducibility.
630
+ normalize_sbom_data (sbom_data )
631
+ with open (artifact_path + ".spdx.json" , mode = "w" ) as f :
632
+ f .truncate ()
633
+ f .write (json .dumps (sbom_data , indent = 2 , sort_keys = True ))
634
+
532
635
533
636
if __name__ == "__main__" :
534
637
main ()
0 commit comments