diff --git a/scanners/git-repo-scanner/README.md b/scanners/git-repo-scanner/README.md index 89702467e1..10258cc286 100644 --- a/scanners/git-repo-scanner/README.md +++ b/scanners/git-repo-scanner/README.md @@ -31,22 +31,32 @@ or ``` #### GitHub -For type github you can use the following options: -- `--organization`: The name of the github organization you want to scan. -- `--url`: The url of the api for a github enterprise server. Skip this option for repos on . -- `--access-token`: Your personal github access token. -- `--ignore-repos`: A list of github repository ids you want to ignore +For type GitHub you can use the following options: +- `--organization`: The name of the GitHub organization you want to scan. +- `--url`: The url of the api for a GitHub enterprise server. Skip this option for repos on . +- `--access-token`: Your personal GitHub access token. +- `--ignore-repos`: A list of GitHub repository ids you want to ignore +- `--obey-rate-limit`: True to obey the rate limit of the GitHub server (default), otherwise False +- `--activity-since-duration`: Return git repo findings with repo activity (e.g. commits) more recent than a specific date expresed by an duration (now + duration). A duration string is a possibly signed sequence of decimal numbers, each + with optional fraction and a unit suffix, such as '1h' or '2h45m'. Valid time units are 'm', 'h', 'd', 'w'. +- `--activity-until-duration`: Return git repo findings with repo activity (e.g. commits) older than a specific date expresed by an duration (now + duration). A duration string is a possibly signed sequence of decimal numbers, each with + optional fraction and a unit suffix, such as '1h' or '2h45m'. Valid time units are 'm', 'h', 'd', 'w'. For now only organizations are supported so the option is mandatory. We **strongly recommend** providing an access token for authentication. If not provided the rate limiting will kick in after about 30 repositories scanned. #### GitLab -For type gitlab you can use the following options: -- `--url`: The url of the gitlab server. -- `--access-token`: Your personal gitlab access token. -- `--group`: A specific gitlab group id you want to san, including subgroups. -- `--ignore-groups`: A list of gitlab group ids you want to ignore -- `--ignore-repos`: A list of gitlab project ids you want to ignore +For type GitLab you can use the following options: +- `--url`: The url of the GitLab server. +- `--access-token`: Your personal GitLab access token. +- `--group`: A specific GitLab group id you want to san, including subgroups. +- `--ignore-groups`: A list of GitLab group ids you want to ignore +- `--ignore-repos`: A list of GitLab project ids you want to ignore +- `--obey-rate-limit`: True to obey the rate limit of the GitLab server (default), otherwise False +- `--activity-since-duration`: Return git repo findings with repo activity (e.g. commits) more recent than a specific date expresed by an duration (now + duration). A duration string is a possibly signed sequence of decimal numbers, each + with optional fraction and a unit suffix, such as '1h' or '2h45m'. Valid time units are 'm', 'h', 'd', 'w'. +- `--activity-until-duration`: Return git repo findings with repo activity (e.g. commits) older than a specific date expresed by an duration (now + duration). A duration string is a possibly signed sequence of decimal numbers, each with + optional fraction and a unit suffix, such as '1h' or '2h45m'. Valid time units are 'm', 'h', 'd', 'w'. For gitlab the url and the access token is mandatory. If you don't provide a specific group id all projects on the gitlab server are going to be discovered. diff --git a/scanners/git-repo-scanner/README.md.gotmpl b/scanners/git-repo-scanner/README.md.gotmpl index b573606f14..02ab3188cd 100644 --- a/scanners/git-repo-scanner/README.md.gotmpl +++ b/scanners/git-repo-scanner/README.md.gotmpl @@ -32,22 +32,33 @@ or ``` #### GitHub -For type github you can use the following options: -- `--organization`: The name of the github organization you want to scan. -- `--url`: The url of the api for a github enterprise server. Skip this option for repos on . -- `--access-token`: Your personal github access token. -- `--ignore-repos`: A list of github repository ids you want to ignore +For type GitHub you can use the following options: +- `--organization`: The name of the GitHub organization you want to scan. +- `--url`: The url of the api for a GitHub enterprise server. Skip this option for repos on . +- `--access-token`: Your personal GitHub access token. +- `--ignore-repos`: A list of GitHub repository ids you want to ignore +- `--obey-rate-limit`: True to obey the rate limit of the GitHub server (default), otherwise False +- `--activity-since-duration`: Return git repo findings with repo activity (e.g. commits) more recent than a specific date expresed by an duration (now + duration). A duration string is a possibly signed sequence of decimal numbers, each + with optional fraction and a unit suffix, such as '1h' or '2h45m'. Valid time units are 'm', 'h', 'd', 'w'. +- `--activity-until-duration`: Return git repo findings with repo activity (e.g. commits) older than a specific date expresed by an duration (now + duration). A duration string is a possibly signed sequence of decimal numbers, each with + optional fraction and a unit suffix, such as '1h' or '2h45m'. Valid time units are 'm', 'h', 'd', 'w'. For now only organizations are supported so the option is mandatory. We **strongly recommend** providing an access token for authentication. If not provided the rate limiting will kick in after about 30 repositories scanned. #### GitLab -For type gitlab you can use the following options: -- `--url`: The url of the gitlab server. -- `--access-token`: Your personal gitlab access token. -- `--group`: A specific gitlab group id you want to san, including subgroups. -- `--ignore-groups`: A list of gitlab group ids you want to ignore -- `--ignore-repos`: A list of gitlab project ids you want to ignore +For type GitLab you can use the following options: +- `--url`: The url of the GitLab server. +- `--access-token`: Your personal GitLab access token. +- `--group`: A specific GitLab group id you want to san, including subgroups. +- `--ignore-groups`: A list of GitLab group ids you want to ignore +- `--ignore-repos`: A list of GitLab project ids you want to ignore +- `--obey-rate-limit`: True to obey the rate limit of the GitLab server (default), otherwise False +- `--activity-since-duration`: Return git repo findings with repo activity (e.g. commits) more recent than a specific date expresed by an duration (now + duration). A duration string is a possibly signed sequence of decimal numbers, each + with optional fraction and a unit suffix, such as '1h' or '2h45m'. Valid time units are 'm', 'h', 'd', 'w'. +- `--activity-until-duration`: Return git repo findings with repo activity (e.g. commits) older than a specific date expresed by an duration (now + duration). A duration string is a possibly signed sequence of decimal numbers, each with + optional fraction and a unit suffix, such as '1h' or '2h45m'. Valid time units are 'm', 'h', 'd', 'w'. + For gitlab the url and the access token is mandatory. If you don't provide a specific group id all projects on the gitlab server are going to be discovered. diff --git a/scanners/git-repo-scanner/scanner/.dockerignore b/scanners/git-repo-scanner/scanner/.dockerignore new file mode 100644 index 0000000000..f88f2f169d --- /dev/null +++ b/scanners/git-repo-scanner/scanner/.dockerignore @@ -0,0 +1,3 @@ +__pytest_cache +.pytest_cache +*_test.py diff --git a/scanners/git-repo-scanner/scanner/Dockerfile b/scanners/git-repo-scanner/scanner/Dockerfile index 48d3de19b1..50ccc700a1 100644 --- a/scanners/git-repo-scanner/scanner/Dockerfile +++ b/scanners/git-repo-scanner/scanner/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.9.0-alpine -COPY git_repo_scanner.py /scripts/git_repo_scanner.py -RUN pip install PyGithub python-gitlab +COPY . /scripts/ +RUN pip install -r /scripts/requirements.txt CMD ["/bin/sh"] ENTRYPOINT ["python","/scripts/git_repo_scanner.py"] diff --git a/scanners/git-repo-scanner/scanner/git_repo_scanner.py b/scanners/git-repo-scanner/scanner/git_repo_scanner.py index 7f111bfdf6..dbd692994c 100644 --- a/scanners/git-repo-scanner/scanner/git_repo_scanner.py +++ b/scanners/git-repo-scanner/scanner/git_repo_scanner.py @@ -2,9 +2,19 @@ import logging import sys import json +import calendar +import time +from datetime import datetime +import pytz + from typing import List from pathlib import Path +# https://pypi.org/project/pytimeparse/ +from pytimeparse.timeparse import timeparse +# https://docs.python.org/3/library/datetime.html +from datetime import timedelta + import gitlab from gitlab.v4.objects import Project @@ -18,254 +28,399 @@ def main(): - args = get_parser_args() + args = get_parser_args() - findings = process(args) + findings = process(args) - logger.info(' Write findings to file...') - write_findings_to_file(args, findings) - logger.info(' Finished!') + logger.info(' Write findings to file...') + write_findings_to_file(args, findings) + logger.info(' Finished!') def process(args): - if args.git_type == 'gitlab': - return process_gitlab(args) - else: - return process_github(args) + if args.git_type == 'gitlab': + return process_gitlab(args) + else: + return process_github(args) def process_github(args): - try: - return parse_github(args) - except github.GithubException as e: - logger.info(f' Github API Exception: {e.status} -> {e.data["message"]}') - sys.exit(-1) + try: + return parse_github(args) + except github.GithubException as e: + logger.info(f' Github API Exception: {e.status} -> {e.data["message"]}') + sys.exit(-1) def process_gitlab(args): - try: - return parse_gitlab(args) - except gitlab.GitlabError as e: - logger.info(f' Gitlab API Exception: {e}') - sys.exit(-1) + try: + return parse_gitlab(args) + except gitlab.GitlabError as e: + logger.info(f' Gitlab API Exception: {e}') + sys.exit(-1) def write_findings_to_file(args, findings): - Path(args.file_output).mkdir(parents=True, exist_ok=True) - with open(f'{args.file_output}/git-repo-scanner-findings.json', 'w') as out: - json.dump(findings, out) + Path(args.file_output).mkdir(parents=True, exist_ok=True) + with open(f'{args.file_output}/git-repo-scanner-findings.json', 'w') as out: + json.dump(findings, out) def get_parser_args(args=None): - parser = argparse.ArgumentParser(description='Scan public or private git repositories of organizations or groups') - parser.add_argument('--git-type', - help='Repository type can be github or gitlab', - choices=['github', 'gitlab'], - required=True) - parser.add_argument('--file-output', - help='The path of the output file', - required=True), - parser.add_argument('--url', help='The gitlab url or a github enterprise api url.', - required=False) - parser.add_argument('--access-token', - help='An access token for authentication', - required=False) - parser.add_argument('--organization', - help='The name of the githup organization to scan', - required=False) - parser.add_argument('--group', - help='The id of the gitlab group to scan', - required=False) - parser.add_argument('--ignore-repos', - help='A list of repo ids to ignore', - action='extend', - nargs='+', - type=int, - default=[], - required=False) - parser.add_argument('--ignore-groups', - help='A list of gitlab group ids to ignore', - action='extend', - nargs='+', - type=int, - default=[], - required=False) - if args: - return parser.parse_args(args) - else: - return parser.parse_args() + parser = argparse.ArgumentParser(description='Scan public or private git repositories of organizations or groups') + parser.add_argument('--git-type', + help='Repository type can be github or GitLab', + choices=['github', 'gitlab'], + required=True) + parser.add_argument('--file-output', + help='The path of the output file', + required=True), + parser.add_argument('--url', help='The GitLab url or a GitHub enterprise api url.', + required=False) + parser.add_argument('--access-token', + help='An access token for authentication', + required=False) + parser.add_argument('--organization', + help='The name of the GitHub organization to scan', + required=False) + parser.add_argument('--group', + help='The id of the GitLab group to scan', + required=False) + parser.add_argument('--ignore-repos', + help='A list of repo ids to ignore', + action='extend', + nargs='+', + type=int, + default=[], + required=False) + parser.add_argument('--ignore-groups', + help='A list of GitLab group ids to ignore', + action='extend', + nargs='+', + type=int, + default=[], + required=False) + parser.add_argument('--obey-rate-limit', + help='True to obey the rate limit of the GitLab or GitHub server (default), otherwise False', + type=bool, + default=True, + required=False) + parser.add_argument('--activity-since-duration', + help='Return git repo findings with repo activity (e.g. commits) more recent than a specific date expresed by an duration (now + duration)', + type=str, + required=False) + parser.add_argument('--activity-until-duration', + help='Return git repo findings with repo activity (e.g. commits) older than a specific date expresed by an duration (now + duration)', + type=str, + required=False) + + if args: + return parser.parse_args(args) + else: + return parser.parse_args() def parse_gitlab(args): - gl: gitlab.Gitlab - if not args.url: - logger.info(' URL required for gitlab connection.') - sys.exit(-1) - logger.info(' Gitlab authentication...') - - gl = gitlab_authenticate(args) + gl: gitlab.Gitlab + if not args.url: + logger.info(' URL required for GitLab connection.') + sys.exit(-1) + + logger.info(' Gitlab authentication...') + gl = gitlab_authenticate(args) + + logger.info(' Gitlab retrieve all repositories...') + now_utc = pytz.utc.localize(datetime.utcnow()) + # Respect time filtering based on "pushed_at" (not "updated_at") + # The difference is that "pushed_at" represents the date and time of the last commit, whereas the "updated_at" represents the date and time of the last change the the repository. + # A change to the repository might be a commit, but it may also be other things, such as changing the description of the repo, creating wiki pages, etc. + # In other words, commits are a subset of updates, and the pushed_at timestamp will therefore either be the same as the updated_at timestamp, or it will be an earlier timestamp. + duration = 0 + activityDeltaDatetime = now_utc + if args.activity_since_duration: + activityDuration = timeparse(args.activity_since_duration) + activityDeltaDatetime = timedelta(seconds=activityDuration) + logger.info(' Get all GitLab Repos (filtered by last activity since ' + str(activityDeltaDatetime) + ' ago.)') + + projects: List[Project] = get_gitlab_projects_active_since(args, gl) + elif args.activity_until_duration: + activityDuration = timeparse(args.activity_until_duration) + activityDeltaDatetime = timedelta(seconds=activityDuration) + logger.info(' Get all GitLab Repos (filtered by last activity until ' + str(activityDeltaDatetime) + ' ago.)') + + projects: List[Project] = get_gitlab_projects_active_until(args, gl) + else: + logger.info(' Get all Gitlab Repos (not filtered)') + projects: List[Project] = get_gitlab_projects_all(args, gl) + + logger.info(' Process Projects...') + activityDate = now_utc - activityDeltaDatetime + findings = process_gitlab_projects(args, projects, activityDeltaDatetime, activityDate) - projects: List[Project] = get_gitlab_projects(args, gl) + return findings - logger.info(' Process Projects...') - findings = process_gitlab_projects(args, projects) +def process_gitlab_projects(args, projects, activityDeltaDatetime, activityDate): + findings = [] + i = 1 + for project in projects: + if is_not_on_ignore_list_gitlab(project, args.ignore_groups, args.ignore_repos): + lastUpDatetime = datetime.fromisoformat(project.last_activity_at) + logger.info(f' {i} - Name: {project.name} - LastUpdate: {lastUpDatetime}') + i += 1 + + # respect time filtering + if args.activity_since_duration: + if lastUpDatetime > activityDate: + findings.append(create_finding_gitlab(project)) + else: + logger.info( + f' Reached activity limit! Ignoring all repos with latest activity since `{activityDeltaDatetime}` ago ({str(activityDate)}).') + break + elif args.activity_until_duration: + if lastUpDatetime < activityDate: + findings.append(create_finding_gitlab(project)) + else: + logger.info( + f' Reached activity limit! Ignoring all repos with latest activity until `{activityDeltaDatetime}` ago ({str(activityDate)}).') + break + else: + findings.append(create_finding_gitlab(project)) - return findings + return findings -def process_gitlab_projects(args, projects): - findings = [] - i = 1 - for project in projects: - if is_not_on_ignore_list_gitlab(project, args.ignore_groups, args.ignore_repos): - logger.info(f' {i} - {project.name}') - i += 1 - findings.append(create_finding_gitlab(project)) - return findings +def get_gitlab_projects_all(args, gl): + if args.group: + try: + projects = gl.groups.get(args.group).projects.list(all=True, include_subgroups=True, + obey_rate_limit=args.obey_rate_limit) + except gitlab.exceptions.GitlabGetError: + logger.info(' Group does not exist.') + sys.exit(-1) + else: + projects = gl.projects.list(all=True, max_retries=12, obey_rate_limit=args.obey_rate_limit) + return projects + + +def get_gitlab_projects_active_since(args, gl): + if args.group: + try: + projects = gl.groups.get(args.group).projects.list(all=True, include_subgroups=True, + order_by='last_activity_at', + sort='desc', obey_rate_limit=args.obey_rate_limit) + except gitlab.exceptions.GitlabGetError: + logger.info(' Group does not exist.') + sys.exit(-1) + else: + projects = gl.projects.list(all=True, max_retries=12, order_by='last_activity_at', sort='desc', + obey_rate_limit=args.obey_rate_limit) + return projects + + +def get_gitlab_projects_active_until(args, gl): + if args.group: + try: + projects = gl.groups.get(args.group).projects.list(all=True, include_subgroups=True, + order_by='last_activity_at', + sort='asc', obey_rate_limit=args.obey_rate_limit) + except gitlab.exceptions.GitlabGetError: + logger.info(' Group does not exist.') + sys.exit(-1) + else: + projects = gl.projects.list(all=True, max_retries=12, order_by='last_activity_at', sort='asc', + obey_rate_limit=args.obey_rate_limit) + return projects -def get_gitlab_projects(args, gl): - if args.group: - try: - projects = gl.groups.get(args.group).projects.list(all=True, include_subgroups=True) - except gitlab.exceptions.GitlabGetError: - logger.info(' Group does not exist.') - sys.exit(-1) - else: - projects = gl.projects.list(all=True, max_retries=12) - return projects +def gitlab_authenticate(args): + gl: gitlab.Gitlab + if args.access_token: + try: + gl = gitlab.Gitlab(args.url, args.access_token) + gl.auth() + except gitlab.exceptions.GitlabAuthenticationError: + gl = gitlab_authenticate_oauth(args) + else: + logger.info(' Access token required for GitLab authentication.') + sys.exit(-1) + logger.info(' Success') + return gl -def gitlab_authenticate(args): - gl: gitlab.Gitlab - if args.access_token: +def gitlab_authenticate_oauth(args): try: - gl = gitlab.Gitlab(args.url, args.access_token) - gl.auth() + gl = gitlab.Gitlab(args.url, oauth_token=args.access_token) + gl.auth() except gitlab.exceptions.GitlabAuthenticationError: - gl = gitlab_authenticate_oauth(args) - else: - logger.info(' Access token required for gitlab authentication.') - sys.exit(-1) - logger.info(' Success') - return gl + logger.info(' No permission. Check your access token.') + sys.exit(-1) + return gl -def gitlab_authenticate_oauth(args): - try: - gl = gitlab.Gitlab(args.url, oauth_token=args.access_token) - gl.auth() - except gitlab.exceptions.GitlabAuthenticationError: - logger.info(' No permission. Check your access token.') - sys.exit(-1) - return gl +def parse_github(args): + gh: github.Github = setup_github(args) + logger.info(' Process Repositories...') -def parse_github(args): - gh: github.Github = setup_github(args) + if args.organization: + findings = process_github_repos(args, gh) + return findings + else: + logger.info(' No organization provided') + sys.exit(-1) - logger.info(' Process Repositories...') - if args.organization: - findings = process_github_repos(args, gh) - return findings - else: - logger.info(' No organization provided') - sys.exit(-1) +def respect_github_ratelimit(args, gh): + if args.obey_rate_limit: + api_limit = gh.get_rate_limit().core + reset_timestamp = calendar.timegm(api_limit.reset.timetuple()) + seconds_until_reset = reset_timestamp - calendar.timegm( + time.gmtime()) + 5 # add 5 seconds to be sure the rate limit has been reset + sleep_time = seconds_until_reset / api_limit.remaining + + logger.info(' Checking Rate-Limit (' + str(args.obey_rate_limit) + ') [remainingApiCalls: ' + str( + api_limit.remaining) + ', seconds_until_reset: ' + str(seconds_until_reset) + ', sleepTime: ' + str( + sleep_time) + ']') + time.sleep(sleep_time) def process_github_repos(args, gh): - findings = [] - org: Organization = gh.get_organization(args.organization) - repos: PaginatedList[Repository] = org.get_repos(type='all') - for i in range(repos.totalCount): - process_github_repos_page(args, findings, repos.get_page(i)) - return findings + findings = [] + org: Organization = gh.get_organization(args.organization) + + # Respect time filtering based on "pushed_at" (not "updated_at") + # The difference is that "pushed_at" represents the date and time of the last commit, whereas the "updated_at" represents the date and time of the last change the the repository. + # A change to the repository might be a commit, but it may also be other things, such as changing the description of the repo, creating wiki pages, etc. + # In other words, commits are a subset of updates, and the pushed_at timestamp will therefore either be the same as the updated_at timestamp, or it will be an earlier timestamp. + duration = 0 + activityDeltaDatetime = datetime.now() + if args.activity_since_duration: + activityDuration = timeparse(args.activity_since_duration) + activityDeltaDatetime = timedelta(seconds=activityDuration) + logger.info(' Get all GitHub Repos (filtered by last activity since ' + str(activityDeltaDatetime) + ' ago.)') + + repos: PaginatedList[Repository] = org.get_repos(type='all', sort='pushed', direction='desc') + elif args.activity_until_duration: + activityDuration = timeparse(args.activity_until_duration) + activityDeltaDatetime = timedelta(seconds=activityDuration) + logger.info(' Get all GitHub Repos (filtered by last activity until ' + str(activityDeltaDatetime) + ' ago.)') + + repos: PaginatedList[Repository] = org.get_repos(type='all', sort='pushed', direction='asc') + else: + logger.info(' Get all GitHub Repos (not filtered)') + repos: PaginatedList[Repository] = org.get_repos(type='all') + + activityDate = datetime.now() - activityDeltaDatetime + + for i in range(repos.totalCount): + process_github_repos_page(args, findings, repos.get_page(i), gh, activityDeltaDatetime, activityDate) + return findings -def process_github_repos_page(args, findings, repos): - repo: Repository - for repo in repos: - if repo.id not in args.ignore_repos: - logger.info(f' {len(findings) + 1} - {repo.name}') - findings.append(create_finding_github(repo)) +def process_github_repos_page(args, findings, repos, gh, activityDeltaDatetime, activityDate): + repo: Repository + for repo in repos: + if repo.id not in args.ignore_repos: + logger.info( + f' {len(findings) + 1} - Name: {repo.name} - LastUpdate: {repo.updated_at} - LastPush: {repo.pushed_at}') + + # respect time filtering + if args.activity_since_duration: + if repo.updated_at > activityDate: + findings.append(create_finding_github(repo)) + respect_github_ratelimit(args, gh) + else: + logger.info( + f' Reached activity limit! Ignoring all repos with latest activity since `{activityDeltaDatetime}` ago ({str(activityDate)}).') + break + elif args.activity_until_duration: + if repo.updated_at < activityDate: + findings.append(create_finding_github(repo)) + respect_github_ratelimit(args, gh) + else: + logger.info( + f' Reached activity limit! Ignoring all repos with latest activity until `{activityDeltaDatetime}` ago ({str(activityDate)}).') + break + else: + findings.append(create_finding_github(repo)) + respect_github_ratelimit(args, gh) def setup_github(args): - if args.url: - return setup_github_with_url(args) - else: - return setup_github_without_url(args) + if args.url: + return setup_github_with_url(args) + else: + return setup_github_without_url(args) def setup_github_without_url(args): - if args.access_token: - return github.Github(args.access_token) - else: - return github.Github() + if args.access_token: + return github.Github(args.access_token) + else: + return github.Github() def setup_github_with_url(args): - if args.access_token: - return github.Github(base_url=args.url, login_or_token=args.access_token) - else: - logger.info(' Access token required for github enterprise authentication.') - sys.exit(-1) + if args.access_token: + return github.Github(base_url=args.url, login_or_token=args.access_token) + else: + logger.info(' Access token required for github enterprise authentication.') + sys.exit(-1) def is_not_on_ignore_list_gitlab(project: Project, groups: List, repos: List): - id_project = project.id - kind = project.namespace['kind'] - id_namespace = project.namespace['id'] - if id_project in repos: - return False - if kind == 'group' and id_namespace in groups: - return False - return True + id_project = project.id + kind = project.namespace['kind'] + id_namespace = project.namespace['id'] + if id_project in repos: + return False + if kind == 'group' and id_namespace in groups: + return False + return True def create_finding_gitlab(project: Project): - return { - 'name': 'GitLab Repo', - 'description': 'A GitLab repository', - 'category': 'Git Repository', - 'osi_layer': 'APPLICATION', - 'severity': 'INFORMATIONAL', - 'attributes': { - 'id': project.id, - 'web_url': project.web_url, - 'full_name': project.path_with_namespace, - 'owner_type': project.namespace['kind'], - 'owner_id': project.namespace['id'], - 'owner_name': project.namespace['name'], - 'created_at': project.created_at, - 'last_activity_at': project.last_activity_at, - 'visibility': project.visibility + return { + 'name': 'GitLab Repo', + 'description': 'A GitLab repository', + 'category': 'Git Repository', + 'osi_layer': 'APPLICATION', + 'severity': 'INFORMATIONAL', + 'attributes': { + 'id': project.id, + 'web_url': project.web_url, + 'full_name': project.path_with_namespace, + 'owner_type': project.namespace['kind'], + 'owner_id': project.namespace['id'], + 'owner_name': project.namespace['name'], + 'created_at': project.created_at, + 'last_activity_at': project.last_activity_at, + 'visibility': project.visibility + } } - } def create_finding_github(repo: Repository): - return { - 'name': 'GitHub Repo', - 'description': 'A GitHub repository', - 'category': 'Git Repository', - 'osi_layer': 'APPLICATION', - 'severity': 'INFORMATIONAL', - 'attributes': { - 'id': repo.id, - 'web_url': repo.html_url, - 'full_name': repo.full_name, - 'owner_type': repo.owner.type, - 'owner_id': repo.owner.id, - 'owner_name': repo.owner.name, - 'created_at': repo.created_at.strftime("%Y-%m-%dT%H:%M:%SZ"), - 'last_activity_at': repo.updated_at.strftime("%Y-%m-%dT%H:%M:%SZ"), - 'visibility': 'private' if repo.private else 'public' + return { + 'name': 'GitHub Repo', + 'description': 'A GitHub repository', + 'category': 'Git Repository', + 'osi_layer': 'APPLICATION', + 'severity': 'INFORMATIONAL', + 'attributes': { + 'id': repo.id, + 'web_url': repo.html_url, + 'full_name': repo.full_name, + 'owner_type': repo.owner.type, + 'owner_id': repo.owner.id, + 'owner_name': repo.owner.name, + 'created_at': repo.created_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + 'last_activity_at': repo.updated_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + 'visibility': 'private' if repo.private else 'public' + } } - } if __name__ == '__main__': - main() + main() diff --git a/scanners/git-repo-scanner/scanner/git_repo_scanner_test.py b/scanners/git-repo-scanner/scanner/git_repo_scanner_test.py index 5724657a06..402fb5fb09 100644 --- a/scanners/git-repo-scanner/scanner/git_repo_scanner_test.py +++ b/scanners/git-repo-scanner/scanner/git_repo_scanner_test.py @@ -1,4 +1,5 @@ import datetime +from datetime import timezone import unittest import git_repo_scanner from munch import Munch @@ -13,7 +14,7 @@ def test_process_gitlab_projects_with_no_ignore_list(self): projects = assemble_projects() args = get_args() # when - findings = git_repo_scanner.process_gitlab_projects(args, projects) + findings = git_repo_scanner.process_gitlab_projects(args, projects, 0, datetime.datetime.now()) # then self.assertEqual(3, len(findings), msg='There should be exactly 3 findings') self.assertEqual(findings[0]['name'], 'GitLab Repo', msg='Test finding output') @@ -25,7 +26,7 @@ def test_process_gitlab_projects_with_ignore_group(self): projects = assemble_projects() args = get_args(ignore_groups=33) # when - findings = git_repo_scanner.process_gitlab_projects(args, projects) + findings = git_repo_scanner.process_gitlab_projects(args, projects, 0, datetime.datetime.now()) # then self.assertEqual(2, len(findings), msg='There should be exactly 2 findings') self.assertEqual(findings[0]['attributes']['web_url'], 'url1', msg='Test finding output') @@ -36,7 +37,7 @@ def test_process_gitlab_projects_with_ignore_project(self): projects = assemble_projects() args = get_args(ignore_projects=1) # when - findings = git_repo_scanner.process_gitlab_projects(args, projects) + findings = git_repo_scanner.process_gitlab_projects(args, projects, 0, datetime.datetime.now()) # then self.assertEqual(2, len(findings), msg='There should be exactly 2 findings') self.assertEqual(findings[0]['attributes']['web_url'], 'url2', msg='Test finding output') @@ -94,6 +95,7 @@ def test_parse_github_with_no_org_should_exit(self): def get_args(ignore_groups=0, ignore_projects=0, url=None, access_token=None, org=None): args = ['--git-type', 'gitlab', '--file-output', 'out', + '--obey-rate-limit', False, '--ignore-repos', str(ignore_projects), '--ignore-groups', str(ignore_groups)] if url: @@ -117,14 +119,16 @@ def create_mocks(github_mock, org_mock, pag_mock, repos): def assemble_projects(): - project1 = assemble_project(p_id=1, name='name1', url='url1', path='path1', date_created='10.10.2020', - date_updated='10.11.2020', visibility='private', o_id=11, o_kind='group', + created = datetime.datetime(2020, 10, 10, tzinfo=timezone.utc).isoformat() + updated = datetime.datetime(2020, 11, 10, tzinfo=timezone.utc).isoformat() + project1 = assemble_project(p_id=1, name='name1', url='url1', path='path1', date_created=created, + date_updated=updated, visibility='private', o_id=11, o_kind='group', o_name='name11') - project2 = assemble_project(p_id=2, name='name2', url='url2', path='path2', date_created='10.10.2020', - date_updated='10.11.2020', visibility='private', o_id=22, o_kind='user', + project2 = assemble_project(p_id=2, name='name2', url='url2', path='path2', date_created=created, + date_updated=updated, visibility='private', o_id=22, o_kind='user', o_name='name22') - project3 = assemble_project(p_id=3, name='name3', url='url3', path='path3', date_created='10.10.2020', - date_updated='10.11.2020', visibility='private', o_id=33, o_kind='group', + project3 = assemble_project(p_id=3, name='name3', url='url3', path='path3', date_created=created, + date_updated=updated, visibility='private', o_id=33, o_kind='group', o_name='name33') return [project1, project2, project3] @@ -147,20 +151,20 @@ def assemble_project(p_id, name, url, path, date_created, date_updated, visibili def assemble_repos(): - date = datetime.datetime(2020, 5, 17) + date = datetime.datetime(2020, 5, 17, tzinfo=timezone.utc) project1 = assemble_repository(p_id=1, name='name1', url='url1', path='path1', date_created=date, - date_updated=date, visibility=True, o_id=11, o_kind='organization', + date_updated=date, date_pushed=date, visibility=True, o_id=11, o_kind='organization', o_name='name11') project2 = assemble_repository(p_id=2, name='name2', url='url2', path='path2', date_created=date, - date_updated=date, visibility=False, o_id=22, o_kind='organization', + date_updated=date, date_pushed=date, visibility=False, o_id=22, o_kind='organization', o_name='name22') project3 = assemble_repository(p_id=3, name='name3', url='url3', path='path3', date_created=date, - date_updated=date, visibility=False, o_id=33, o_kind='organization', + date_updated=date, date_pushed=date, visibility=False, o_id=33, o_kind='organization', o_name='name33') return [project1, project2, project3] -def assemble_repository(p_id, name, url, path, date_created: datetime, date_updated: datetime, visibility: bool, o_id, +def assemble_repository(p_id, name, url, path, date_created: datetime, date_updated: datetime, date_pushed: datetime, visibility: bool, o_id, o_kind, o_name): repo = Munch() repo.id = p_id @@ -168,6 +172,7 @@ def assemble_repository(p_id, name, url, path, date_created: datetime, date_upda repo.html_url = url repo.full_name = path repo.created_at = date_created + repo.pushed_at = date_pushed repo.updated_at = date_updated repo.private = visibility repo.owner = Munch(type=o_kind, id=o_id, name=o_name) diff --git a/scanners/git-repo-scanner/scanner/requirements.txt b/scanners/git-repo-scanner/scanner/requirements.txt index 3140bb4c74..110b788b82 100644 --- a/scanners/git-repo-scanner/scanner/requirements.txt +++ b/scanners/git-repo-scanner/scanner/requirements.txt @@ -1,4 +1,6 @@ -PyGithub == 1.53 -python-gitlab == 2.5.0 +PyGithub == 1.54.1 +python-gitlab == 2.6.0 munch == 2.5.0 mock == 4.0.2 +pytimeparse == 1.1.8 +pytz == 2021.1