8000 Add DLP code samples for custom info types (#1524) · MagicLegends/python-docs-samples@87ce0ff · GitHub
[go: up one dir, main page]

Skip to content

Commit 87ce0ff

Browse files
mwdaubandrewsg
authored andcommitted
Add DLP code samples for custom info types (GoogleCloudPlatform#1524)
* Add custom info type samples to inspect_content.py Use flags to indicate dictionary word lists and regex patterns, then parse them into custom info types. * Make code compatible with python 2.7 * Add missing commas * Remove bad import * Add tests for custom info types * Add info_types parameter to deid.py * Update deid tests to use info_types parameter * Fix indentation * Add blank lines * Share logic for building custom info types * Fix line too long * Fix typo. * Revert "Fix typo." This reverts commit b4ffea6, so that the sharing of the custom info type logic can be reverted as well to make the code samples more readable. * Revert "Share logic for building custom info types" This reverts commit 47fc04f. This makes the code samples more readable. * Switch from indexes to using enumerate. * Updated help message for custom dictionaries. * Fix enumerate syntax error.
1 parent 5e5ea6d commit 87ce0ff

File tree

4 files changed

+268
-12
lines changed

4 files changed

+268
-12
lines changed

dlp/deid.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121

2222
# [START dlp_deidentify_masking]
23-
def deidentify_with_mask(project, string, masking_character=None,
23+
def deidentify_with_mask(project, string, info_types, masking_character=None,
2424
number_to_mask=0):
2525
"""Uses the Data Loss Prevention API to deidentify sensitive data in a
2626
string by masking it with a character.
@@ -44,6 +44,11 @@ def deidentify_with_mask(project, string, masking_character=None,
4444
# Convert the project id into a full resource id.
4545
parent = dlp.project_path(project)
4646

47+
# Construct inspect configuration dictionary
48+
inspect_config = {
49+
'info_types': [{'name': info_type} for info_type in info_types]
50+
}
51+
4752
# Construct deidentify configuration dictionary
4853
deidentify_config = {
4954
'info_type_transformations': {
@@ -65,15 +70,16 @@ def deidentify_with_mask(project, string, masking_character=None,
6570

6671
# Call the API
6772
response = dlp.deidentify_content(
68-
parent, deidentify_config=deidentify_config, item=item)
73+
parent, inspect_config=inspect_config,
74+
deidentify_config=deidentify_config, item=item)
6975

7076
# Print out the results.
7177
print(response.item.value)
7278
# [END dlp_deidentify_masking]
7379

7480

7581
# [START dlp_deidentify_fpe]
76-
def deidentify_with_fpe(project, string, alphabet=None,
82+
def deidentify_with_fpe(project, string, info_types, alphabet=None,
7783
surrogate_type=None, key_name=None, wrapped_key=None):
7884
"""Uses the Data Loss Prevention API to deidentify sensitive data in a
7985
string using Format Preserving Encryption (FPE).
@@ -127,6 +133,11 @@ def deidentify_with_fpe(project, string, alphabet=None,
127133
'name': surrogate_type
128134
}
129135

136+
# Construct inspect configuration dictionary
137+
inspect_config = {
138+
'info_types': [{'name': info_type} for info_type in info_types]
139+
}
140+
130141
# Construct deidentify configuration dictionary
131142
deidentify_config = {
132143
'info_type_transformations': {
@@ -146,7 +157,8 @@ def deidentify_with_fpe(project, string, alphabet=None,
146157

147158
# Call the API
148159
response = dlp.deidentify_content(
149-
parent, deidentify_config=deidentify_config, item=item)
160+
parent, inspect_config=inspect_config,
161+
deidentify_config=deidentify_config, item=item)
150162

151163
# Print results
152164
print(response.item.value)
@@ -404,6 +416,13 @@ def write_data(data):
404416
'deid_mask',
405417
help='Deidentify sensitive data in a string by masking it with a '
406418
'character.')
419+
mask_parser.add_argument(
420+
'--info_types', action='append',
421+
help='Strings representing info types to look for. A full list of '
422+
'info categories and types is available from the API. Examples '
423+
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
424+
'If unspecified, the three above examples will be used.',
425+
default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
407426
mask_parser.add_argument(
408427
'project',
409428
help='The Google Cloud project id to use as a parent resource.')
@@ -423,6 +442,13 @@ def write_data(data):
423442
'deid_fpe',
424443
help='Deidentify sensitive data in a string using Format Preserving '
425444
'Encryption (FPE).')
445+
fpe_parser.add_argument(
446+
'--info_types', action='append',
447+
help='Strings representing info types to look for. A full list of '
448+
'info categories and types is available from the API. Examples '
449+
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
450+
'If unspecified, the three above examples will be used.',
451+
default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
426452
fpe_parser.add_argument(
427453
'project',
428454
help='The Google Cloud project id to use as a parent resource.')
@@ -532,11 +558,12 @@ def write_data(data):
532558
args = parser.parse_args()
533559

534560
if args.content == 'deid_mask':
535-
deidentify_with_mask(args.project, args.item,
561+
deidentify_with_mask(args.project, args.item, args.info_types,
536562
masking_character=args.masking_character,
537563
number_to_mask=args.number_to_mask)
538564
elif args.content == 'deid_fpe':
539-
deidentify_with_fpe(args.project, args.item, alphabet=args.alphabet,
565+
deidentify_with_fpe(args.project, args.item, args.info_types,
566+
alphabet=args.alphabet,
540567
wrapped_key=args.wrapped_key,
541568
key_name=args.key_name,
542569
surrogate_type=args.surrogate_type)

dlp/deid_test.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ def tempdir():
4343

4444

4545
def test_deidentify_with_mask(capsys):
46-
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING)
46+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
47+
['US_SOCIAL_SECURITY_NUMBER'])
4748

4849
out, _ = capsys.readouterr()
4950
assert 'My SSN is *********' in out
@@ -60,14 +61,17 @@ def test_deidentify_with_mask_masking_character_specified(capsys):
6061
deid.deidentify_with_mask(
6162
GCLOUD_PROJECT,
6263
HARMFUL_STRING,
64+
['US_SOCIAL_SECURITY_NUMBER'],
6365
masking_character='#')
6466

6567
out, _ = capsys.readouterr()
6668
assert 'My SSN is #########' in out
6769

6870

6971
def test_deidentify_with_mask_masking_number_specified(capsys):
70-
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING, number_to_mask=7)
72+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
73+
['US_SOCIAL_SECURITY_NUMBER'],
74+
number_to_mask=7)
7175

7276
out, _ = capsys.readouterr()
7377
assert 'My SSN is *******27' in out
@@ -77,6 +81,7 @@ def test_deidentify_with_fpe(capsys):
7781
deid.deidentify_with_fpe(
7882
GCLOUD_PROJECT,
7983
HARMFUL_STRING,
84+
['US_SOCIAL_SECURITY_NUMBER'],
8085
alphabet='NUMERIC',
8186
wrapped_key=WRAPPED_KEY,
8287
key_name=KEY_NAME)
@@ -90,6 +95,7 @@ def test_deidentify_with_fpe_uses_surrogate_info_types(capsys):
9095
deid.deidentify_with_fpe(
9196
GCLOUD_PROJECT,
9297
HARMFUL_STRING,
98+
['US_SOCIAL_SECURITY_NUMBER'],
9399
alphabet='NUMERIC',
94100
wrapped_key=WRAPPED_KEY,
95101
key_name=KEY_NAME,

0 commit comments

Comments
 (0)
0