diff --git a/dlp/requirements.txt b/dlp/requirements.txt index b973c95c668..9108c412ab4 100644 --- a/dlp/requirements.txt +++ b/dlp/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-dlp==0.1.1 +google-cloud-dlp==0.2.0 google-cloud-storage==1.8.0 google.cloud.pubsub==0.32.1 diff --git a/dlp/templates.py b/dlp/templates.py new file mode 100644 index 00000000000..7ebde2cef1b --- /dev/null +++ b/dlp/templates.py @@ -0,0 +1,229 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sample app that sets up Data Loss Prevention API inspect templates.""" + +from __future__ import print_function + +import argparse +import os +import time + + +# [START dlp_create_template] +def create_inspect_template(project, info_types, + template_id=None, display_name=None, + min_likelihood=None, max_findings=None, + include_quote=None): + """Creates a Data Loss Prevention API inspect template. + Args: + project: The Google Cloud project id to use as a parent resource. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API. + template_id: The id of the template. If omitted, an id will be randomly + generated. + display_name: The optional display name of the template. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + max_findings: The maximum number of findings to report; 0 = no maximum. + include_quote: Boolean for whether to display a quote of the detected + information in the results. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_types by converting the list of strings into a list of + # dictionaries (protos are also accepted). + info_types = [{'name': info_type} for info_type in info_types] + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + inspect_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + 'include_quote': include_quote, + 'limits': {'max_findings_per_request': max_findings}, + } + + inspect_template = { + 'inspect_config': inspect_config, + 'display_name': display_name, + } + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Call the API. + response = dlp.create_inspect_template( + parent, inspect_template=inspect_template, template_id=template_id) + + print('Successfully created template {}'.format(response.name)) + +# [END dlp_create_template] + + +# [START dlp_list_templates] +def list_inspect_templates(project): + """Lists all Data Loss Prevention API inspect templates. + Args: + project: The Google Cloud project id to use as a parent resource. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Call the API. + response = dlp.list_inspect_templates(parent) + + # Define a helper function to convert the API's "seconds since the epoch" + # time format into a human-readable string. + def human_readable_time(timestamp): + return str(time.localtime(timestamp.seconds)) + + for template in response: + print('Template {}:'.format(template.name)) + if template.display_name: + print(' Display Name: {}'.format(template.display_name)) + print(' Created: {}'.format( + human_readable_time(template.create_time))) + print(' Updated: {}'.format( + human_readable_time(template.update_time))) + + config = template.inspect_config + print(' InfoTypes: {}'.format(', '.join( + [it.name for it in config.info_types] + ))) + print(' Minimum likelihood: {}'.format(config.min_likelihood)) + print(' Include quotes: {}'.format(config.include_quote)) + print(' Max findings per request: {}'.format( + config.limits.max_findings_per_request)) + +# [END dlp_list_templates] + + +# [START dlp_delete_template] +def delete_inspect_template(project, template_id): + """Deletes a Data Loss Prevention API template. + Args: + project: The id of the Google Cloud project which owns the template. + template_id: The id of the template to delete. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Combine the template id with the parent id. + template_resource = '{}/inspectTemplates/{}'.format(parent, template_id) + + # Call the API. + dlp.delete_inspect_template(template_resource) + + print('Template {} successfully deleted.'.format(template_resource)) + +# [END dlp_delete_template] + + +if __name__ == '__main__': + default_project = os.environ.get('GCLOUD_PROJECT') + + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers( + dest='action', help='Select which action to perform.') + subparsers.required = True + + parser_create = subparsers.add_parser('create', help='Create a template.') + parser_create.add_argument( + '--template_id', + help='The id of the template. If omitted, an id will be randomly ' + 'generated') + parser_create.add_argument( + '--display_name', + help='The optional display name of the template.') + parser_create.add_argument( + '--project', + help='The Google Cloud project id to use as a parent resource.', + default=default_project) + parser_create.add_argument( + '--info_types', action='append', + help='Strings representing info types to look for. A full list of ' + 'info categories and types is available from the API. Examples ' + 'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". ' + 'If unspecified, the three above examples will be used.', + default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS']) + parser_create.add_argument( + '--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + parser_create.add_argument( + '--max_findings', type=int, + help='The maximum number of findings to report; 0 = no maximum.') + parser_create.add_argument( + '--include_quote', type=bool, + help='A boolean for whether to display a quote of the detected ' + 'information in the results.', + default=True) + + parser_list = subparsers.add_parser('list', help='List all templates.') + parser_list.add_argument( + '--project', + help='The Google Cloud project id to use as a parent resource.', + default=default_project) + + parser_delete = subparsers.add_parser('delete', help='Delete a template.') + parser_delete.add_argument( + 'template_id', + help='The id of the template to delete.') + parser_delete.add_argument( + '--project', + help='The Google Cloud project id to use as a parent resource.', + default=default_project) + + args = parser.parse_args() + + if args.action == 'create': + create_inspect_template( + args.project, args.info_types, + template_id=args.template_id, display_name=args.display_name, + min_likelihood=args.min_likelihood, + max_findings=args.max_findings, include_quote=args.include_quote + ) + elif args.action == 'list': + list_inspect_templates(args.project) + elif args.action == 'delete': + delete_inspect_template(args.project, args.template_id) diff --git a/dlp/templates_test.py b/dlp/templates_test.py new file mode 100644 index 00000000000..776096719ef --- /dev/null +++ b/dlp/templates_test.py @@ -0,0 +1,57 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import google.api_core.exceptions +import google.cloud.storage + +import templates + + +GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT') +TEST_TEMPLATE_ID = 'test-template' + + +def test_create_list_and_delete_template(capsys): + try: + templates.create_inspect_template( + GCLOUD_PROJECT, ['FIRST_NAME', 'EMAIL_ADDRESS', 'PHONE_NUMBER'], + template_id=TEST_TEMPLATE_ID, + ) + except google.api_core.exceptions.InvalidArgument: + # Template already exists, perhaps due to a previous interrupted test. + templates.delete_inspect_template(GCLOUD_PROJECT, TEST_TEMPLATE_ID) + + out, _ = capsys.readouterr() + assert TEST_TEMPLATE_ID in out + + # Try again and move on. + templates.create_inspect_template( + GCLOUD_PROJECT, ['FIRST_NAME', 'EMAIL_ADDRESS', 'PHONE_NUMBER'], + template_id=TEST_TEMPLATE_ID, + ) + + out, _ = capsys.readouterr() + assert TEST_TEMPLATE_ID in out + + templates.list_inspect_templates(GCLOUD_PROJECT) + + out, _ = capsys.readouterr() + assert TEST_TEMPLATE_ID in out + + templates.delete_inspect_template(GCLOUD_PROJECT, TEST_TEMPLATE_ID) + + out, _ = capsys.readouterr() + assert TEST_TEMPLATE_ID in out diff --git a/dlp/triggers.py b/dlp/triggers.py new file mode 100644 index 00000000000..2d89c51491a --- /dev/null +++ b/dlp/triggers.py @@ -0,0 +1,253 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sample app that sets up Data Loss Prevention API automation triggers.""" + +from __future__ import print_function + +import argparse +import os +import time + + +# [START dlp_create_trigger] +def create_trigger(project, bucket, scan_period_days, info_types, + trigger_id=None, display_name=None, description=None, + min_likelihood=None, max_findings=None): + """Creates a scheduled Data Loss Prevention API inspect_content trigger. + Args: + project: The Google Cloud project id to use as a parent resource. + bucket: The name of the GCS bucket to scan. This sample scans all + files in the bucket using a wildcard. + scan_period_days: How often to repeat the scan, in days. + The minimum is 1 day. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API. + trigger_id: The id of the trigger. If omitted, an id will be randomly + generated. + display_name: The optional display name of the trigger. + description: The optional description of the trigger. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + max_findings: The maximum number of findings to report; 0 = no maximum. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_types by converting the list of strings into a list of + # dictionaries (protos are also accepted). + info_types = [{'name': info_type} for info_type in info_types] + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + inspect_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + 'limits': {'max_findings_per_request': max_findings}, + } + + # Construct a cloud_storage_options dictionary with the bucket's URL. + url = 'gs://{}/*'.format(bucket) + storage_config = { + 'cloud_storage_options': { + 'file_set': {'url': url} + } + } + + # Construct the job definition. + job = { + 'inspect_config': inspect_config, + 'storage_config': storage_config, + } + + # Construct the schedule definition: + schedule = { + 'recurrence_period_duration': { + 'seconds': scan_period_days * 60 * 60 * 24, + } + } + + # Construct the trigger definition. + job_trigger = { + 'inspect_job': job, + 'display_name': display_name, + 'description': description, + 'triggers': [ + {'schedule': schedule} + ], + 'status': 'HEALTHY' + } + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Call the API. + response = dlp.create_job_trigger( + parent, job_trigger=job_trigger, trigger_id=trigger_id) + + print('Successfully created trigger {}'.format(response.name)) + +# [END dlp_create_trigger] + + +# [START dlp_list_triggers] +def list_triggers(project): + """Lists all Data Loss Prevention API triggers. + Args: + project: The Google Cloud project id to use as a parent resource. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Call the API. + response = dlp.list_job_triggers(parent) + + # Define a helper function to convert the API's "seconds since the epoch" + # time format into a human-readable string. + def human_readable_time(timestamp): + return str(time.localtime(timestamp.seconds)) + + for trigger in response: + print('Trigger {}:'.format(trigger.name)) + print(' Created: {}'.format(human_readable_time(trigger.create_time))) + print(' Updated: {}'.format(human_readable_time(trigger.update_time))) + if trigger.display_name: + print(' Display Name: {}'.format(trigger.display_name)) + if trigger.description: + print(' Description: {}'.format(trigger.discription)) + print(' Status: {}'.format(trigger.status)) + print(' Error count: {}'.format(len(trigger.errors))) + +# [END dlp_list_triggers] + + +# [START dlp_delete_trigger] +def delete_trigger(project, trigger_id): + """Deletes a Data Loss Prevention API trigger. + Args: + project: The id of the Google Cloud project which owns the trigger. + trigger_id: The id of the trigger to delete. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Combine the trigger id with the parent id. + trigger_resource = '{}/jobTriggers/{}'.format(parent, trigger_id) + + # Call the API. + dlp.delete_job_trigger(trigger_resource) + + print('Trigger {} successfully deleted.'.format(trigger_resource)) + +# [END dlp_delete_triggers] + + +if __name__ == '__main__': + default_project = os.environ.get('GCLOUD_PROJECT') + + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers( + dest='action', help='Select which action to perform.') + subparsers.required = True + + parser_create = subparsers.add_parser('create', help='Create a trigger.') + parser_create.add_argument( + 'bucket', help='The name of the GCS bucket containing the file.') + parser_create.add_argument( + 'scan_period_days', type=int, + help='How often to repeat the scan, in days. The minimum is 1 day.') + parser_create.add_argument( + '--trigger_id', + help='The id of the trigger. If omitted, an id will be randomly ' + 'generated') + parser_create.add_argument( + '--display_name', + help='The optional display name of the trigger.') + parser_create.add_argument( + '--description', + help='The optional description of the trigger.') + parser_create.add_argument( + '--project', + help='The Google Cloud project id to use as a parent resource.', + default=default_project) + parser_create.add_argument( + '--info_types', action='append', + help='Strings representing info types to look for. A full list of ' + 'info categories and types is available from the API. Examples ' + 'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". ' + 'If unspecified, the three above examples will be used.', + default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS']) + parser_create.add_argument( + '--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + parser_create.add_argument( + '--max_findings', type=int, + help='The maximum number of findings to report; 0 = no maximum.') + + parser_list = subparsers.add_parser('list', help='List all triggers.') + parser_list.add_argument( + '--project', + help='The Google Cloud project id to use as a parent resource.', + default=default_project) + + parser_delete = subparsers.add_parser('delete', help='Delete a trigger.') + parser_delete.add_argument( + 'trigger_id', + help='The id of the trigger to delete.') + parser_delete.add_argument( + '--project', + help='The Google Cloud project id to use as a parent resource.', + default=default_project) + + args = parser.parse_args() + + if args.action == 'create': + create_trigger( + args.project, args.bucket, args.scan_period_days, args.info_types, + trigger_id=args.trigger_id, display_name=args.display_name, + description=args.description, min_likelihood=args.min_likelihood, + max_findings=args.max_findings, + ) + elif args.action == 'list': + list_triggers(args.project) + elif args.action == 'delete': + delete_trigger(args.project, args.trigger_id) diff --git a/dlp/triggers_test.py b/dlp/triggers_test.py new file mode 100644 index 00000000000..75e587b5a8d --- /dev/null +++ b/dlp/triggers_test.py @@ -0,0 +1,94 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import google.api_core.exceptions +import google.cloud.storage + +import pytest + +import triggers + + +GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT') +TEST_BUCKET_NAME = GCLOUD_PROJECT + '-dlp-python-client-test' +RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), 'resources') +RESOURCE_FILE_NAMES = ['test.txt', 'test.png', 'harmless.txt', 'accounts.txt'] +TEST_TRIGGER_ID = 'test-trigger' + + +@pytest.fixture(scope='module') +def bucket(): + # Creates a GCS bucket, uploads files required for the test, and tears down + # the entire bucket afterwards. + + client = google.cloud.storage.Client() + try: + bucket = client.get_bucket(TEST_BUCKET_NAME) + except google.cloud.exceptions.NotFound: + bucket = client.create_bucket(TEST_BUCKET_NAME) + + # Upoad the blobs and keep track of them in a list. + blobs = [] + for name in RESOURCE_FILE_NAMES: + path = os.path.join(RESOURCE_DIRECTORY, name) + blob = bucket.blob(name) + blob.upload_from_filename(path) + blobs.append(blob) + + # Yield the object to the test; lines after this execute as a teardown. + yield bucket + + # Delete the files. + for blob in blobs: + blob.delete() + + # Attempt to delete the bucket; this will only work if it is empty. + bucket.delete() + + +def test_create_list_and_delete_trigger(bucket, capsys): + try: + triggers.create_trigger( + GCLOUD_PROJECT, bucket.name, 7, + ['FIRST_NAME', 'EMAIL_ADDRESS', 'PHONE_NUMBER'], + trigger_id=TEST_TRIGGER_ID, + ) + except google.api_core.exceptions.InvalidArgument: + # Trigger already exists, perhaps due to a previous interrupted test. + triggers.delete_trigger(GCLOUD_PROJECT, TEST_TRIGGER_ID) + + out, _ = capsys.readouterr() + assert TEST_TRIGGER_ID in out + + # Try again and move on. + triggers.create_trigger( + GCLOUD_PROJECT, bucket.name, 7, + ['FIRST_NAME', 'EMAIL_ADDRESS', 'PHONE_NUMBER'], + trigger_id=TEST_TRIGGER_ID, + ) + + out, _ = capsys.readouterr() + assert TEST_TRIGGER_ID in out + + triggers.list_triggers(GCLOUD_PROJECT) + + out, _ = capsys.readouterr() + assert TEST_TRIGGER_ID in out + + triggers.delete_trigger(GCLOUD_PROJECT, TEST_TRIGGER_ID) + + out, _ = capsys.readouterr() + assert TEST_TRIGGER_ID in out