From 6bdeaf6b30b747341ef5cde112c10b14b20478e5 Mon Sep 17 00:00:00 2001 From: Jisha Abubaker Date: Fri, 9 Mar 2018 11:09:57 -0800 Subject: [PATCH 01/23] DLP => v2 (WIP) Pending tasks: -> Update / Add Tests -> Region tag / comment review -> Submit for code review + fixes -> Merge once google-cloud-java PR : https://github.com/GoogleCloudPlatform/google-cloud-java/pull/2958 is released --- dlp/pom.xml | 55 +- .../com/example/dlp/DeIdentification.java | 414 ++++++++++-- .../main/java/com/example/dlp/Inspect.java | 608 +++++++++++------- dlp/src/main/java/com/example/dlp/Jobs.java | 145 +++++ .../main/java/com/example/dlp/Metadata.java | 33 +- .../main/java/com/example/dlp/QuickStart.java | 57 +- dlp/src/main/java/com/example/dlp/Redact.java | 180 ++---- .../java/com/example/dlp/RiskAnalysis.java | 551 +++++++++------- .../main/java/com/example/dlp/Templates.java | 254 ++++++++ .../main/java/com/example/dlp/Triggers.java | 282 ++++++++ .../com/example/dlp/DeIdentificationIT.java | 27 +- .../test/java/com/example/dlp/InspectIT.java | 12 +- .../test/java/com/example/dlp/MetadataIT.java | 4 +- .../java/com/example/dlp/QuickStartIT.java | 5 +- .../test/java/com/example/dlp/RedactIT.java | 5 +- .../java/com/example/dlp/RiskAnalysisIT.java | 79 ++- 16 files changed, 1960 insertions(+), 751 deletions(-) create mode 100644 dlp/src/main/java/com/example/dlp/Jobs.java create mode 100644 dlp/src/main/java/com/example/dlp/Templates.java create mode 100644 dlp/src/main/java/com/example/dlp/Triggers.java diff --git a/dlp/pom.xml b/dlp/pom.xml index 7f89ff03d6c..d749d86159e 100644 --- a/dlp/pom.xml +++ b/dlp/pom.xml @@ -40,13 +40,18 @@ - + com.google.cloud google-cloud-dlp - 0.37.0-beta + 0.35.1-beta-SNAPSHOT + + + + com.google.cloud + google-cloud-pubsub + 0.35.1-beta-SNAPSHOT - commons-cli commons-cli @@ -60,27 +65,27 @@ - - - - maven-assembly-plugin - 3.0.0 - - - jar-with-dependencies - - - - - make-assembly - package - - single - - - - - - + + + + maven-assembly-plugin + 3.0.0 + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 0e98bbf2041..cbd0c2c5f1e 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -16,21 +16,44 @@ package com.example.dlp; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; import com.google.common.io.BaseEncoding; -import com.google.privacy.dlp.v2beta1.CharacterMaskConfig; -import com.google.privacy.dlp.v2beta1.ContentItem; -import com.google.privacy.dlp.v2beta1.CryptoKey; -import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig; -import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; -import com.google.privacy.dlp.v2beta1.DeidentifyConfig; -import com.google.privacy.dlp.v2beta1.DeidentifyContentRequest; -import com.google.privacy.dlp.v2beta1.DeidentifyContentResponse; -import com.google.privacy.dlp.v2beta1.InfoTypeTransformations; -import com.google.privacy.dlp.v2beta1.InfoTypeTransformations.InfoTypeTransformation; -import com.google.privacy.dlp.v2beta1.KmsWrappedCryptoKey; -import com.google.privacy.dlp.v2beta1.PrimitiveTransformation; +import com.google.privacy.dlp.v2.ByteContentItem; +import com.google.privacy.dlp.v2.CharacterMaskConfig; +import com.google.privacy.dlp.v2.ContentItem; +import com.google.privacy.dlp.v2.CryptoKey; +import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig; +import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; +import com.google.privacy.dlp.v2.DateShiftConfig; +import com.google.privacy.dlp.v2.DeidentifyConfig; +import com.google.privacy.dlp.v2.DeidentifyContentRequest; +import com.google.privacy.dlp.v2.DeidentifyContentResponse; +import com.google.privacy.dlp.v2.FieldId; +import com.google.privacy.dlp.v2.FieldTransformation; +import com.google.privacy.dlp.v2.InfoTypeTransformations; +import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation; +import com.google.privacy.dlp.v2.KmsWrappedCryptoKey; +import com.google.privacy.dlp.v2.PrimitiveTransformation; +import com.google.privacy.dlp.v2.RecordTransformations; +import com.google.privacy.dlp.v2.Table; +import com.google.privacy.dlp.v2.Value; import com.google.protobuf.ByteString; +import com.google.type.Date; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.format.DateTimeParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; @@ -42,6 +65,17 @@ public class DeIdentification { + /** + * [START dlp_deidentify_mask] + * + *

Deidentify a string by masking sensitive information with a character using the DLP API. + * + * @param string The string to deidentify. + * @param maskingCharacter (Optional) The character to mask sensitive data with. + * @param numberToMask (Optional) The number of characters' worth of sensitive data to mask. + * Omitting this value or setting it to 0 masks all sensitive chars. + * @param projectId ID of Google Cloud project to run the API under. + */ private static void deIdentifyWithMask( String string, Character maskingCharacter, @@ -62,12 +96,14 @@ private static void deIdentifyWithMask( // numberToMask = 5; // maskingCharacter = 'x'; - ContentItem contentItem = - ContentItem.newBuilder() - .setType("text/plain") - .setValue(string) + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(ByteContentItem.BytesType.TEXT_UTF8) + .setData(ByteString.copyFrom(string, StandardCharsets.UTF_8)) .build(); + ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); + CharacterMaskConfig characterMaskConfig = CharacterMaskConfig.newBuilder() .setMaskingCharacter(maskingCharacter.toString()) @@ -76,9 +112,7 @@ private static void deIdentifyWithMask( // Create the deidentification transformation configuration PrimitiveTransformation primitiveTransformation = - PrimitiveTransformation.newBuilder() - .setCharacterMaskConfig(characterMaskConfig) - .build(); + PrimitiveTransformation.newBuilder().setCharacterMaskConfig(characterMaskConfig).build(); InfoTypeTransformation infoTypeTransformationObject = InfoTypeTransformation.newBuilder() @@ -98,8 +132,9 @@ private static void deIdentifyWithMask( DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() + .setParent(projectId) .setDeidentifyConfig(deidentifyConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); // Execute the deidentification request @@ -107,27 +142,32 @@ private static void deIdentifyWithMask( // Print the character-masked input value // e.g. "My SSN is 123456789" --> "My SSN is *********" - for (ContentItem item : response.getItemsList()) { - System.out.println(item.getValue()); - } + ContentItem item = response.getItem(); + System.out.println(item.getValue()); } catch (Exception e) { System.out.println("Error in deidentifyWithMask: " + e.getMessage()); } - // [END dlp_deidentify_masking] } + // [END dlp_deidentify_mask] + /** + * [START dlp_deidentify_fpe] + * + *

Deidentify a string by encrypting sensitive information while preserving format. + * + * @param string The string to deidentify. + * @param alphabet The set of characters to use when encrypting the input. For more information, + * see cloud.google.com/dlp/docs/reference/rest/v2/content/deidentify + * @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key. + * @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key. + * @param projectId ID of Google Cloud project to run the API under. + */ private static void deIdentifyWithFpe( - String string, FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey) { - // [START dlp_deidentify_fpe] - /** - * Deidentify a string by encrypting sensitive information while preserving format. - * @param string The string to deidentify. - * @param alphabet The set of characters to use when encrypting the input. For more information, - * see cloud.google.com/dlp/docs/reference/rest/v2beta1/content/deidentify - * @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key. - * @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key. - */ - + String string, + FfxCommonNativeAlphabet alphabet, + String keyName, + String wrappedKey, + String projectId) { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -136,12 +176,14 @@ private static void deIdentifyWithFpe( // keyName = "projects/GCP_PROJECT/locations/REGION/keyRings/KEYRING_ID/cryptoKeys/KEY_NAME"; // wrappedKey = "YOUR_ENCRYPTED_AES_256_KEY" - ContentItem contentItem = - ContentItem.newBuilder() - .setType("text/plain") - .setValue(string) + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(ByteContentItem.BytesType.TEXT_UTF8) + .setData(ByteString.copyFrom(string, StandardCharsets.UTF_8)) .build(); + ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); + // Create the format-preserving encryption (FPE) configuration KmsWrappedCryptoKey kmsWrappedCryptoKey = KmsWrappedCryptoKey.newBuilder() @@ -149,10 +191,7 @@ private static void deIdentifyWithFpe( .setCryptoKeyName(keyName) .build(); - CryptoKey cryptoKey = - CryptoKey.newBuilder() - .setKmsWrapped(kmsWrappedCryptoKey) - .build(); + CryptoKey cryptoKey = CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build(); CryptoReplaceFfxFpeConfig cryptoReplaceFfxFpeConfig = CryptoReplaceFfxFpeConfig.newBuilder() @@ -184,8 +223,9 @@ private static void deIdentifyWithFpe( DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() + .setParent(projectId) .setDeidentifyConfig(deidentifyConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); // Execute the deidentification request @@ -193,30 +233,229 @@ private static void deIdentifyWithFpe( // Print the deidentified input value // e.g. "My SSN is 123456789" --> "My SSN is 7261298621" - for (ContentItem item : response.getItemsList()) { - System.out.println(item.getValue()); - } + ContentItem item = response.getItem(); + System.out.println(item.getValue()); } catch (Exception e) { System.out.println("Error in deidentifyWithFpe: " + e.getMessage()); } - // [END dlp_deidentify_fpe] } + // [END dlp_deidentify_fpe] + + /** + * [START dlp_deidentify_date_shift] + * + * @param inputCsvPath The path to the CSV file to deidentify + * @param outputCsvPath (Optional) path to the output CSV file + * @param dateFields The list of (date) fields in the CSV file to date shift + * @param lowerBoundDays The maximum number of days to shift a date backward + * @param upperBoundDays The maximum number of days to shift a date forward + * @param contextFieldId (Optional) The column to determine date shift, default : a random shift + * amount + * @param wrappedKey (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates + * @param keyName (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 + * key + * @param projectId ID of Google Cloud project to run the API under. + */ + private static void deidentifyWithDateShift( + Path inputCsvPath, + Path outputCsvPath, + String[] dateFields, + int lowerBoundDays, + int upperBoundDays, + String contextFieldId, + String wrappedKey, + String keyName, + String projectId) + throws Exception { + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // Set the maximum days to shift a day backward (lowerbound), forward (upperbound) + DateShiftConfig.Builder dateShiftConfigBuilder = + DateShiftConfig.newBuilder() + .setLowerBoundDays(lowerBoundDays) + .setUpperBoundDays(upperBoundDays); + + // (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key + // If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set + // String keyName = + // 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; + + // (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates + // This key should be encrypted using the Cloud KMS key specified above + // If this is specified, then 'keyName' and 'contextFieldId' must also be set + // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' + + // If contextFieldId , keyName or wrappedKey is set : all three arguments must be valid + if (contextFieldId != null && keyName != null && wrappedKey != null) { + dateShiftConfigBuilder.setContext(FieldId.newBuilder().setName(contextFieldId).build()); + KmsWrappedCryptoKey kmsWrappedCryptoKey = + KmsWrappedCryptoKey.newBuilder() + .setCryptoKeyName(keyName) + .setWrappedKey(ByteString.copyFromUtf8(wrappedKey)) + .build(); + dateShiftConfigBuilder.setCryptoKey( + CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build()); + + } else if (contextFieldId != null || keyName != null || wrappedKey != null) { + throw new IllegalArgumentException( + "You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!"); + } + + DateShiftConfig dateShiftConfig = dateShiftConfigBuilder.build(); + + // Read and parse the CSV file + // The first row of the file must specify column names, and all other rows + // Path inputCsvFile = Paths.get("/path/to/file.csv"); + BufferedReader br = null; + String line; + List rows = new ArrayList<>(); + List headers; + + br = new BufferedReader(new FileReader(inputCsvPath.toFile())); + + // convert csv header to FieldId + headers = + Arrays.stream(br.readLine().split(",")) + .map(header -> FieldId.newBuilder().setName(header).build()) + .collect(Collectors.toList()); + + while ((line = br.readLine()) != null) { + // convert csv rows to Table.Row + rows.add(convertCsvRowToTableRow(line)); + } + br.close(); + + Table table = Table.newBuilder().addAllHeaders(headers).addAllRows(rows).build(); + + List dateFieldIds = + Arrays.stream(dateFields) + .map(field -> FieldId.newBuilder().setName(field).build()) + .collect(Collectors.toList()); + + FieldTransformation fieldTransformation = + FieldTransformation.newBuilder() + .addAllFields(dateFieldIds) + .setPrimitiveTransformation( + PrimitiveTransformation.newBuilder().setDateShiftConfig(dateShiftConfig).build()) + .build(); + + DeidentifyConfig deidentifyConfig = + DeidentifyConfig.newBuilder() + .setRecordTransformations( + RecordTransformations.newBuilder() + .addFieldTransformations(fieldTransformation) + .build()) + .build(); + + ContentItem tableItem = ContentItem.newBuilder().setTable(table).build(); + + DeidentifyContentRequest request = + DeidentifyContentRequest.newBuilder() + .setParent(projectId) + .setDeidentifyConfig(deidentifyConfig) + .setItem(tableItem) + .build(); + + // Execute the deidentification request + DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); + + // Write out the response as a CSV file + List outputHeaderFields = response.getItem().getTable().getHeadersList(); + List outputRows = response.getItem().getTable().getRowsList(); + + List outputHeaders = + outputHeaderFields.stream().map(FieldId::getName).collect(Collectors.toList()); + + File outputFile = outputCsvPath.toFile(); + if (!outputFile.exists()) { + outputFile.mkdirs(); + outputFile.createNewFile(); + } + BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(outputFile)); + + // write out headers + bufferedWriter.append(String.join(",", outputHeaders) + "\n"); + + // write out each row + for (Table.Row outputRow : outputRows) { + String row = + outputRow + .getValuesList() + .stream() + .map( + value -> + (value.getDateValue() != null) + ? (String.valueOf(value.getDateValue().getMonth()) + + "/" + + String.valueOf(value.getDateValue().getDay()) + + "/" + + String.valueOf(value.getDateValue().getYear())) + : value.getStringValue()) + .collect(Collectors.joining(",")); + bufferedWriter.append(row + "\n"); + } + + bufferedWriter.flush(); + bufferedWriter.close(); + + System.out.println("Successfully saved date-shift output to:" + outputCsvPath.getFileName()); + } + } + + // Parse string to valid date, return null when invalid + private static LocalDate getValidDate(String dateString) { + try { + return LocalDate.parse(dateString); + } catch (DateTimeParseException e) { + return null; + } + } + + // convert CSV row into Table.Row + private static Table.Row convertCsvRowToTableRow(String row) { + String[] values = row.split(","); + Table.Row.Builder tableRowBuilder = Table.Row.newBuilder(); + for (String value : values) { + LocalDate date = getValidDate(value); + if (date != null) { + // convert to com.google.type.Date + Date dateValue = + Date.newBuilder() + .setYear(date.getYear()) + .setMonth(date.getMonthValue()) + .setDay(date.getDayOfMonth()) + .build(); + Value tableValue = Value.newBuilder().setDateValue(dateValue).build(); + tableRowBuilder.addValues(tableValue); + } else { + tableRowBuilder.addValues(Value.newBuilder().setStringValue(value).build()); + } + } + return tableRowBuilder.build(); + } + // [END dlp_deidentify_date_shift] /** - * Command line application to de-identify data using the Data Loss Prevention API. - * Supported data format: strings + * Command line application to de-identify data using the Data Loss Prevention API. Supported data + * format: strings */ public static void main(String[] args) throws Exception { OptionGroup optionsGroup = new OptionGroup(); optionsGroup.setRequired(true); - Option deidentifyMaskingOption = new Option("m", "mask", true, "deid with character masking"); + Option deidentifyMaskingOption = + new Option("m", "mask", true, "Deidentify with character masking"); optionsGroup.addOption(deidentifyMaskingOption); - Option deidentifyFpeOption = new Option("f", "fpe", true, "deid with FFX FPE"); + Option deidentifyFpeOption = new Option("f", "fpe", true, "Deidentify with FFX FPE"); optionsGroup.addOption(deidentifyFpeOption); + Option deidentifyDateShiftOption = + new Option( + "d", "date", true, "Deidentify dates in a CSV file by pseudorandomly shifting them."); + Options commandLineOptions = new Options(); commandLineOptions.addOptionGroup(optionsGroup); @@ -224,22 +463,43 @@ public static void main(String[] args) throws Exception { Option.builder("maskingCharacter").hasArg(true).required(false).build(); commandLineOptions.addOption(maskingCharacterOption); - Option numberToMaskOption = - Option.builder("numberToMask").hasArg(true).required(false).build(); + Option numberToMaskOption = Option.builder("numberToMask").hasArg(true).required(false).build(); commandLineOptions.addOption(numberToMaskOption); - Option alphabetOption = - Option.builder("commonAlphabet").hasArg(true).required(false).build(); + Option alphabetOption = Option.builder("commonAlphabet").hasArg(true).required(false).build(); commandLineOptions.addOption(alphabetOption); - Option wrappedKeyOption = - Option.builder("wrappedKey").hasArg(true).required(false).build(); + Option wrappedKeyOption = Option.builder("wrappedKey").hasArg(true).required(false).build(); commandLineOptions.addOption(wrappedKeyOption); - Option keyNameOption = - Option.builder("keyName").hasArg(true).required(false).build(); + Option keyNameOption = Option.builder("keyName").hasArg(true).required(false).build(); commandLineOptions.addOption(keyNameOption); + Option inputCsvPathOption = Option.builder("inputCsvPath").hasArg(true).required(false).build(); + commandLineOptions.addOption(inputCsvPathOption); + + Option outputCsvPathOption = + Option.builder("outputCsvPath").hasArg(true).required(false).build(); + commandLineOptions.addOption(outputCsvPathOption); + + Option dateFieldsOption = Option.builder("dateFields").hasArg(true).required(false).build(); + commandLineOptions.addOption(dateFieldsOption); + + Option lowerBoundDaysOption = + Option.builder("lowerBoundDays").hasArg(true).required(false).build(); + commandLineOptions.addOption(lowerBoundDaysOption); + + Option upperBoundDaysOption = + Option.builder("upperBoundDays").hasArg(true).required(false).build(); + commandLineOptions.addOption(upperBoundDaysOption); + + Option contextFieldNameOption = + Option.builder("contextField").hasArg(true).required(false).build(); + commandLineOptions.addOption(contextFieldNameOption); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + commandLineOptions.addOption(projectIdOption); + CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; @@ -253,12 +513,16 @@ public static void main(String[] args) throws Exception { return; } + // default to auto-detected project id when not explicitly provided + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + if (cmd.hasOption("m")) { // deidentification with character masking int numberToMask = Integer.parseInt(cmd.getOptionValue(numberToMaskOption.getOpt(), "0")); char maskingCharacter = cmd.getOptionValue(maskingCharacterOption.getOpt(), "*").charAt(0); String val = cmd.getOptionValue(deidentifyMaskingOption.getOpt()); - deIdentifyWithMask(val, maskingCharacter, numberToMask); + deIdentifyWithMask(val, maskingCharacter, numberToMask, projectId); } else if (cmd.hasOption("f")) { // deidentification with FPE String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); @@ -268,7 +532,31 @@ public static void main(String[] args) throws Exception { FfxCommonNativeAlphabet.valueOf( cmd.getOptionValue( alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); - deIdentifyWithFpe(val, alphabet, keyName, wrappedKey); + deIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId); + } // deidentify with date shift + else if (cmd.hasOption("d")) { + String inputCsv = cmd.getOptionValue(inputCsvPathOption.getOpt()); + String outputCsv = cmd.getOptionValue(outputCsvPathOption.getOpt()); + + String contextField = cmd.getOptionValue(contextFieldNameOption.getOpt(), null); + String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt(), null); + String keyName = cmd.getOptionValue(keyNameOption.getOpt(), null); + + String[] dateFields = cmd.getOptionValue(dateFieldsOption.getOpt(), "").split(","); + + int lowerBoundsDay = Integer.valueOf(cmd.getOptionValue(lowerBoundDaysOption.getOpt())); + int upperBoundsDay = Integer.valueOf(cmd.getOptionValue(upperBoundDaysOption.getOpt())); + + deidentifyWithDateShift( + Paths.get(inputCsv), + Paths.get(outputCsv), + dateFields, + lowerBoundsDay, + upperBoundsDay, + contextField, + wrappedKey, + keyName, + projectId); } } } diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 1d2edf45027..b9cabcbf23c 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -16,37 +16,40 @@ package com.example.dlp; -import com.google.api.gax.longrunning.OperationFuture; +import com.google.api.core.SettableApiFuture; import com.google.cloud.ServiceOptions; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.privacy.dlp.v2beta1.BigQueryOptions; -import com.google.privacy.dlp.v2beta1.BigQueryTable; -import com.google.privacy.dlp.v2beta1.CloudStorageOptions; -import com.google.privacy.dlp.v2beta1.CloudStorageOptions.FileSet; -import com.google.privacy.dlp.v2beta1.ContentItem; -import com.google.privacy.dlp.v2beta1.DatastoreOptions; -import com.google.privacy.dlp.v2beta1.Finding; -import com.google.privacy.dlp.v2beta1.InfoType; -import com.google.privacy.dlp.v2beta1.InspectConfig; -import com.google.privacy.dlp.v2beta1.InspectContentRequest; -import com.google.privacy.dlp.v2beta1.InspectContentResponse; -import com.google.privacy.dlp.v2beta1.InspectOperationMetadata; -import com.google.privacy.dlp.v2beta1.InspectOperationResult; -import com.google.privacy.dlp.v2beta1.InspectResult; -import com.google.privacy.dlp.v2beta1.KindExpression; -import com.google.privacy.dlp.v2beta1.Likelihood; -import com.google.privacy.dlp.v2beta1.OutputStorageConfig; -import com.google.privacy.dlp.v2beta1.PartitionId; -import com.google.privacy.dlp.v2beta1.ResultName; -import com.google.privacy.dlp.v2beta1.StorageConfig; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.cloud.pubsub.v1.Subscriber; +import com.google.privacy.dlp.v2.Action; +import com.google.privacy.dlp.v2.BigQueryOptions; +import com.google.privacy.dlp.v2.BigQueryTable; +import com.google.privacy.dlp.v2.ByteContentItem; +import com.google.privacy.dlp.v2.CloudStorageOptions; +import com.google.privacy.dlp.v2.ContentItem; +import com.google.privacy.dlp.v2.CreateDlpJobRequest; +import com.google.privacy.dlp.v2.DatastoreOptions; +import com.google.privacy.dlp.v2.DlpJob; +import com.google.privacy.dlp.v2.Finding; +import com.google.privacy.dlp.v2.GetDlpJobRequest; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InfoTypeStats; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectContentRequest; +import com.google.privacy.dlp.v2.InspectContentResponse; +import com.google.privacy.dlp.v2.InspectDataSourceDetails; +import com.google.privacy.dlp.v2.InspectJobConfig; +import com.google.privacy.dlp.v2.InspectResult; +import com.google.privacy.dlp.v2.KindExpression; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.PartitionId; +import com.google.privacy.dlp.v2.StorageConfig; import com.google.protobuf.ByteString; +import com.google.pubsub.v1.ProjectSubscriptionName; import java.net.URLConnection; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.ExecutionException; import javax.activation.MimetypesFileTypeMap; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -59,232 +62,302 @@ public class Inspect { + /** + * [START dlp_inspect_string] Inspect a text for given InfoTypes + * + * @param string String to instpect + * @param minLikelihood The minimum likelihood required before returning a match + * @param maxFindings The maximum number of findings to report (0 = server maximum) + * @param infoTypes The infoTypes of information to match + * @param includeQuote Whether to include the matching string + */ private static void inspectString( String string, Likelihood minLikelihood, int maxFindings, List infoTypes, boolean includeQuote) { - // [START dlp_inspect_string] // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The maximum number of findings to report (0 = server maximum) - // maxFindings = 0; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; - - // Whether to include the matching string - // includeQuote = true; + InspectConfig.FindingLimits findingLimits = + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) - .setMaxFindings(maxFindings) + .setLimits(findingLimits) .setIncludeQuote(includeQuote) .build(); // The string to inspect // string = 'My name is Gary and my email is gary@example.com'; - ContentItem contentItem = - ContentItem.newBuilder().setType("text/plain").setValue(string).build(); + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(ByteContentItem.BytesType.TEXT_UTF8) + .setData(ByteString.copyFromUtf8(string)) + .build(); + + ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); InspectContentRequest request = InspectContentRequest.newBuilder() .setInspectConfig(inspectConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); InspectContentResponse response = dlpServiceClient.inspectContent(request); - for (InspectResult result : response.getResultsList()) { - if (result.getFindingsCount() > 0) { - System.out.println("Findings: "); - for (Finding finding : result.getFindingsList()) { - if (includeQuote) { - System.out.print("Quote: " + finding.getQuote()); - } - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + if (response.getResult().getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : response.getResult().getFindingsList()) { + if (includeQuote) { + System.out.print("Quote: " + finding.getQuote()); } - } else { - System.out.println("No findings."); + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); } + } else { + System.out.println("No findings."); } } catch (Exception e) { System.out.println("Error in inspectString: " + e.getMessage()); } - // [END dlp_inspect_string] } + // [END dlp_inspect_string] + /** + * [START dlp_inspect_file] + * + * @param filePath The path to a local file to inspect. Can be a text, JPG, or PNG file. + * @param minLikelihood The minimum likelihood required before returning a match + * @param maxFindings The maximum number of findings to report (0 = server maximum) + * @param infoTypes The infoTypes of information to match + * @param includeQuote Whether to include the matching string + */ private static void inspectFile( String filePath, Likelihood minLikelihood, int maxFindings, List infoTypes, boolean includeQuote) { - // [START dlp_inspect_file] // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // The path to a local file to inspect. Can be a text, JPG, or PNG file. - // fileName = 'path/to/image.png'; - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The maximum number of findings to report (0 = server maximum) - // maxFindings = 0; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; - - // Whether to include the matching string - // includeQuote = true; - Path path = Paths.get(filePath); - // detect file mime type, default to application/octet-stream String mimeType = URLConnection.guessContentTypeFromName(filePath); if (mimeType == null) { mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); } - if (mimeType == null) { - mimeType = "application/octet-stream"; + ByteContentItem.BytesType bytesType = ByteContentItem.BytesType.TEXT_UTF8; + + switch (mimeType) { + case "image/jpeg": + bytesType = ByteContentItem.BytesType.IMAGE_JPEG; + break; + case "image/bmp": + bytesType = ByteContentItem.BytesType.IMAGE_BMP; + break; + case "image/png": + bytesType = ByteContentItem.BytesType.IMAGE_PNG; + break; + case "image/svg": + bytesType = ByteContentItem.BytesType.IMAGE_SVG; + break; } - byte[] data = Files.readAllBytes(path); - ContentItem contentItem = - ContentItem.newBuilder().setType(mimeType).setData(ByteString.copyFrom(data)).build(); + ByteContentItem byteContentItem = ByteContentItem.newBuilder().setType(bytesType).build(); + ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); + + InspectConfig.FindingLimits findingLimits = + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) - .setMaxFindings(maxFindings) + .setLimits(findingLimits) .setIncludeQuote(includeQuote) .build(); InspectContentRequest request = InspectContentRequest.newBuilder() .setInspectConfig(inspectConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); + InspectContentResponse response = dlpServiceClient.inspectContent(request); - for (InspectResult result : response.getResultsList()) { - if (result.getFindingsCount() > 0) { - System.out.println("Findings: "); - for (Finding finding : result.getFindingsList()) { - if (includeQuote) { - System.out.print("Quote: " + finding.getQuote()); - } - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + InspectResult result = response.getResult(); + if (result.getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : result.getFindingsList()) { + if (includeQuote) { + System.out.print("Quote: " + finding.getQuote()); } - } else { - System.out.println("No findings."); + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); } + } else { + System.out.println("No findings."); } } catch (Exception e) { e.printStackTrace(); System.out.println("Error in inspectFile: " + e.getMessage()); } - // [END dlp_inspect_file] } + // [END dlp_inspect_file] + /** + * [START inspect_gcs_file] + * + *

Inspect GCS file for Info types and wait on job completion using Google Cloud Pub/Sub + * notification + * + * @param bucketName The name of the bucket where the file resides. + * @param fileName The path to the file within the bucket to inspect (can include wildcards, eg. + * my-image.*) + * @param minLikelihood The minimum likelihood required before returning a match + * @param infoTypes The infoTypes of information to match + * @param maxFindings The maximum number of findings to report (0 = server maximum) + * @param topicId Google Cloud Pub/Sub topic Id to notify of job status + * @param subscriptionId Google Cloud Subscription to above topic to listen for job status updates + * @param projectId Google Cloud project ID + */ private static void inspectGcsFile( - String bucketName, String fileName, Likelihood minLikelihood, List infoTypes) + String bucketName, + String fileName, + Likelihood minLikelihood, + List infoTypes, + int maxFindings, + String topicId, + String subscriptionId, + String projectId) throws Exception { - // [START dlp_inspect_gcs] // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // The name of the bucket where the file resides. - // bucketName = 'YOUR-BUCKET'; - - // The path to the file within the bucket to inspect. - // Can contain wildcards, e.g. "my-image.*" - // fileName = 'my-image.png'; - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The maximum number of findings to report (0 = server maximum) - // maxFindings = 0; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; CloudStorageOptions cloudStorageOptions = CloudStorageOptions.newBuilder() - .setFileSet(FileSet.newBuilder().setUrl("gs://" + bucketName + "/" + fileName)) + .setFileSet( + CloudStorageOptions.FileSet.newBuilder() + .setUrl("gs://" + bucketName + "/" + fileName)) .build(); StorageConfig storageConfig = StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build(); + InspectConfig.FindingLimits findingLimits = + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) + .setLimits(findingLimits) + .build(); + + String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId); + Action.PublishToPubSub publishToPubSub = + Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build(); + + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + InspectJobConfig inspectJobConfig = + InspectJobConfig.newBuilder() + .setStorageConfig(storageConfig) + .setInspectConfig(inspectConfig) + .addActions(action) + .build(); + + // asynchronously submit an inspect job, and wait on results + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(projectId) + .setInspectJob(inspectJobConfig) .build(); - // optionally provide an output configuration to store results, default : none - OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + + System.out.println("Job created with ID:" + dlpJob.getName()); - // asynchronously submit an inspect operation - OperationFuture responseFuture = - dlpServiceClient.createInspectOperationAsync(inspectConfig, storageConfig, outputConfig); + waitOnJobCompletion(projectId, subscriptionId, dlpJob.getName()); - // ... - // block on response, returning job id of the operation - InspectOperationResult inspectOperationResult = responseFuture.get(); - String resultName = inspectOperationResult.getName(); - InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); + DlpJob completedJob = + dlpServiceClient.getDlpJob( + GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build()); - if (inspectResult.getFindingsCount() > 0) { + System.out.println("Job status: " + completedJob.getState()); + InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails(); + InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult(); + if (result.getInfoTypeStatsCount() > 0) { System.out.println("Findings: "); - for (Finding finding : inspectResult.getFindingsList()) { - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) { + System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName()); + System.out.println("\tCount: " + infoTypeStat.getCount()); } } else { System.out.println("No findings."); } - } catch (Exception e) { - e.printStackTrace(); - System.out.println("Error in inspectGCSFileAsync: " + e.getMessage()); } // [END dlp_inspect_gcs] } + // [START wait_on_dlp_job_completion] + // wait on receiving a job status update over a Google Cloud Pub/Sub subscriber + private static void waitOnJobCompletion( + String projectId, String subscriptionId, String dlpJobName) + throws InterruptedException, ExecutionException { + // wait for job completion + final SettableApiFuture done = SettableApiFuture.create(); + + // setup a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + ackReplyConsumer.ack(); + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + } + }) + .build(); + + // wait for job completion + done.get(); + } + // [END wait_on_dlp_job_completion] + + // [START dlp_inspect_datastore] + /** + * Inspect a Datastore kind + * + * @param projectId The project ID containing the target Datastore + * @param namespaceId The ID namespace of the Datastore document to inspect + * @param kind The kind of the Datastore entity to inspect + * @param minLikelihood The minimum likelihood required before returning a match + * @param infoTypes The infoTypes of information to match + * @param maxFindings max number of findings + * @param topicId Google Cloud Pub/Sub topic to notify job status updates + * @param subscriptionId Google Cloud Pub/Sub subscription to above topic to receive status + * updates + * @throws Exception + */ private static void inspectDatastore( String projectId, String namespaceId, String kind, Likelihood minLikelihood, - List infoTypes) { - // [START dlp_inspect_datastore] + List infoTypes, + int maxFindings, + String topicId, + String subscriptionId) + throws Exception { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // (Optional) The project ID containing the target Datastore - // projectId = my-project-id - - // (Optional) The ID namespace of the Datastore document to inspect. - // To ignore Datastore namespaces, set this to an empty string ('') - // namespaceId = ''; - - // The kind of the Datastore entity to inspect. - // kind = 'Person'; - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; - // Reference to the Datastore namespace PartitionId partitionId = PartitionId.newBuilder().setProjectId(projectId).setNamespaceId(namespaceId).build(); @@ -298,123 +371,161 @@ private static void inspectDatastore( StorageConfig storageConfig = StorageConfig.newBuilder().setDatastoreOptions(datastoreOptions).build(); + InspectConfig.FindingLimits findingLimits = + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) + .setLimits(findingLimits) + .build(); + + String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId); + Action.PublishToPubSub publishToPubSub = + Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build(); + + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + InspectJobConfig inspectJobConfig = + InspectJobConfig.newBuilder() + .setStorageConfig(storageConfig) + .setInspectConfig(inspectConfig) + .addActions(action) + .build(); + + // asynchronously submit an inspect job, and wait on results + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(projectId) + .setInspectJob(inspectJobConfig) .build(); - // optionally provide an output configuration to store results, default : none - OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); - // asynchronously submit an inspect operation - OperationFuture responseFuture = - dlpServiceClient.createInspectOperationAsync(inspectConfig, storageConfig, outputConfig); + System.out.println("Job created with ID:" + dlpJob.getName()); + // asynchronously submit an inspect job, and wait on results + waitOnJobCompletion(projectId, subscriptionId, dlpJob.getName()); - // ... - // block on response, returning job id of the operation - InspectOperationResult inspectOperationResult = responseFuture.get(); - String resultName = inspectOperationResult.getName(); - InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); + DlpJob completedJob = + dlpServiceClient.getDlpJob( + GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build()); - if (inspectResult.getFindingsCount() > 0) { + System.out.println("Job status: " + completedJob.getState()); + InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails(); + InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult(); + if (result.getInfoTypeStatsCount() > 0) { System.out.println("Findings: "); - for (Finding finding : inspectResult.getFindingsList()) { - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) { + System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName()); + System.out.println("\tCount: " + infoTypeStat.getCount()); } } else { System.out.println("No findings."); } - } catch (Exception e) { - e.printStackTrace(); - System.out.println("Error in inspectDatastore: " + e.getMessage()); } - // [END dlp_inspect_datastore] } + // [END dlp_inspect_datastore] + /** + * [START dlp_inspect_bigquery] + * + * @param projectId The project ID to run the API call under + * @param datasetId The ID of the dataset to inspect, e.g. 'my_dataset' + * @param tableId The ID of the table to inspect, e.g. 'my_table' + * @param minLikelihood The minimum likelihood required before returning a match + * @param infoTypes The infoTypes of information to match + * @param maxFindings + * @param topicId + * @param subscriptionId + * @throws Exception + */ private static void inspectBigquery( - String projectId, - String datasetId, - String tableId, - Likelihood minLikelihood, - List infoTypes) { - // [START dlp_inspect_bigquery] + String projectId, + String datasetId, + String tableId, + Likelihood minLikelihood, + List infoTypes, + int maxFindings, + String topicId, + String subscriptionId) + throws Exception { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - - // (Optional) The project ID to run the API call under - // projectId = my-project-id - - // The ID of the dataset to inspect, e.g. 'my_dataset' - // datasetId = "my_dataset"; - - // The ID of the table to inspect, e.g. 'my_table' - // tableId = "my_table"; - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; - // Reference to the BigQuery table BigQueryTable tableReference = - BigQueryTable.newBuilder() - .setProjectId(projectId) - .setDatasetId(datasetId) - .setTableId(tableId) - .build(); + BigQueryTable.newBuilder() + .setProjectId(projectId) + .setDatasetId(datasetId) + .setTableId(tableId) + .build(); BigQueryOptions bigQueryOptions = - BigQueryOptions.newBuilder() - .setTableReference(tableReference) - .build(); + BigQueryOptions.newBuilder().setTableReference(tableReference).build(); // Construct BigQuery configuration to be inspected StorageConfig storageConfig = - StorageConfig.newBuilder() - .setBigQueryOptions(bigQueryOptions) - .build(); + StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build(); + + InspectConfig.FindingLimits findingLimits = + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = - InspectConfig.newBuilder() - .addAllInfoTypes(infoTypes) - .setMinLikelihood(minLikelihood) - .build(); - - // optionally provide an output configuration to store results, default : none - OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); - - // asynchronously submit an inspect operation - OperationFuture responseFuture = - dlpServiceClient.createInspectOperationAsync( - inspectConfig, storageConfig, outputConfig); - - // ... - // block on response, returning job id of the operation - InspectOperationResult inspectOperationResult = responseFuture.get(); - String resultName = inspectOperationResult.getName(); - InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); - - if (inspectResult.getFindingsCount() > 0) { + InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .setLimits(findingLimits) + .build(); + + String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId); + Action.PublishToPubSub publishToPubSub = + Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build(); + + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + InspectJobConfig inspectJobConfig = + InspectJobConfig.newBuilder() + .setStorageConfig(storageConfig) + .setInspectConfig(inspectConfig) + .addActions(action) + .build(); + + // asynchronously submit an inspect job, and wait on results + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(projectId) + .setInspectJob(inspectJobConfig) + .build(); + + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + + System.out.println("Job created with ID:" + dlpJob.getName()); + + // wait on completion + waitOnJobCompletion(dlpJob.getName(), projectId, subscriptionId); + + DlpJob completedJob = + dlpServiceClient.getDlpJob( + GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build()); + + System.out.println("Job status: " + completedJob.getState()); + InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails(); + InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult(); + if (result.getInfoTypeStatsCount() > 0) { System.out.println("Findings: "); - for (Finding finding : inspectResult.getFindingsList()) { - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) { + System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName()); + System.out.println("\tCount: " + infoTypeStat.getCount()); } } else { System.out.println("No findings."); } - } catch (Exception e) { - e.printStackTrace(); - System.out.println("Error in inspectBigguery: " + e.getMessage()); } - // [END dlp_inspect_bigquery] } + // [END dlp_inspect_bigquery] /** - * Command line application to inspect data using the Data Loss Prevention API. - * Supported data formats: string, file, text file on GCS, BigQuery table, and Datastore entity + * Command line application to inspect data using the Data Loss Prevention API. Supported data + * formats: string, file, text file on GCS, BigQuery table, and Datastore entity */ public static void main(String[] args) throws Exception { @@ -466,10 +577,16 @@ public static void main(String[] args) throws Exception { Option tableIdOption = Option.builder("tableId").hasArg(true).required(false).build(); commandLineOptions.addOption(tableIdOption); - Option projectIdOption = - Option.builder("projectId").hasArg(true).required(false).build(); + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); commandLineOptions.addOption(projectIdOption); + Option topicIdOption = Option.builder("topicId").hasArg(true).required(false).build(); + commandLineOptions.addOption(topicIdOption); + + Option subscriptionIdOption = + Option.builder("subscriptionId").hasArg(true).required(false).build(); + commandLineOptions.addOption(subscriptionIdOption); + Option datastoreNamespaceOption = Option.builder("namespace").hasArg(true).required(false).build(); commandLineOptions.addOption(datastoreNamespaceOption); @@ -498,6 +615,11 @@ public static void main(String[] args) throws Exception { boolean includeQuote = Boolean.parseBoolean(cmd.getOptionValue(includeQuoteOption.getOpt(), "true")); + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + String topicId = cmd.getOptionValue(topicIdOption.getOpt()); + String subscriptionId = cmd.getOptionValue(subscriptionIdOption.getOpt()); + List infoTypesList = Collections.emptyList(); if (cmd.hasOption(infoTypesOption.getOpt())) { infoTypesList = new ArrayList<>(); @@ -517,24 +639,42 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("gcs")) { String bucketName = cmd.getOptionValue(bucketNameOption.getOpt()); String fileName = cmd.getOptionValue(gcsFileNameOption.getOpt()); - inspectGcsFile(bucketName, fileName, minLikelihood, infoTypesList); + inspectGcsFile( + bucketName, + fileName, + minLikelihood, + infoTypesList, + maxFindings, + topicId, + subscriptionId, + projectId); // datastore kind inspection } else if (cmd.hasOption("ds")) { String namespaceId = cmd.getOptionValue(datastoreNamespaceOption.getOpt(), ""); String kind = cmd.getOptionValue(datastoreKindOption.getOpt()); // use default project id when project id is not specified - String projectId = - cmd.getOptionValue( - projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); - inspectDatastore(projectId, namespaceId, kind, minLikelihood, infoTypesList); + inspectDatastore( + projectId, + namespaceId, + kind, + minLikelihood, + infoTypesList, + maxFindings, + topicId, + subscriptionId); } else if (cmd.hasOption("bq")) { String datasetId = cmd.getOptionValue(datasetIdOption.getOpt()); String tableId = cmd.getOptionValue(tableIdOption.getOpt()); // use default project id when project id is not specified - String projectId = - cmd.getOptionValue( - projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); - inspectBigquery(projectId, datasetId, tableId, minLikelihood, infoTypesList); + inspectBigquery( + projectId, + datasetId, + tableId, + minLikelihood, + infoTypesList, + maxFindings, + topicId, + subscriptionId); } } } diff --git a/dlp/src/main/java/com/example/dlp/Jobs.java b/dlp/src/main/java/com/example/dlp/Jobs.java new file mode 100644 index 00000000000..f2b4b2c106b --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/Jobs.java @@ -0,0 +1,145 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.example.dlp; + +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.DeleteDlpJobRequest; +import com.google.privacy.dlp.v2.DlpJob; +import com.google.privacy.dlp.v2.DlpJobType; +import com.google.privacy.dlp.v2.ListDlpJobsRequest; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class Jobs { + + private static void listJobs(String projectId, String filter, DlpJobType jobType) + throws Exception { + /** + * [START dlp_list_jobs] + * + * List DLP jobs + * + * @param projectId The project ID to run the API call under + * @param filter The filter expression to use, eg. state=DONE For more information on filter + * syntax see https://cloud.google.com/dlp/docs/reference/rest/v2/projects.dlpJobs/list + * @param jobType The type of job to list (either 'INSPECT_JOB' or 'RISK_ANALYSIS_JOB') + */ + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + ListDlpJobsRequest listDlpJobsRequest = + ListDlpJobsRequest.newBuilder() + .setParent(projectId) + .setFilter(filter) + .setType(jobType) + .build(); + DlpServiceClient.ListDlpJobsPagedResponse response = + dlpServiceClient.listDlpJobs(listDlpJobsRequest); + for (DlpJob dlpJob : response.getPage().getValues()) { + System.out.println("Job name: " + dlpJob.getState()); + System.out.println("Job state: " + dlpJob.getState()); + } + } + } + // [END dlp_list_jobs] + + /** + * [START dlp_delete_job] + * + * Delete a DLP Job + * + * @param projectId Google Cloud ProjectID + * @param jobId DLP Job ID + */ + private static void deleteJob(String projectId, String jobId) { + + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + // construct complete job name + String jobName = String.format("projects/%s/dlpJobs/%s", projectId, jobId); + + DeleteDlpJobRequest deleteDlpJobRequest = + DeleteDlpJobRequest.newBuilder().setName(jobName).build(); + + // submit job deletion request + dlpServiceClient.deleteDlpJob(deleteDlpJobRequest); + } catch (Exception e) { + System.err.println("Error deleting DLP job: " + e.getMessage()); + } + // [END dlp_delete_job] + } + + /** Command line application to list and delete DLP jobs the Data Loss Prevention API. */ + public static void main(String[] args) throws Exception { + + OptionGroup optionsGroup = new OptionGroup(); + optionsGroup.setRequired(true); + Option listOption = new Option("l", "list", true, "List DLP Jobs"); + optionsGroup.addOption(listOption); + + Option deleteOption = new Option("d", "delete", true, "Delete DLP Jobs"); + optionsGroup.addOption(deleteOption); + + Options commandLineOptions = new Options(); + commandLineOptions.addOptionGroup(optionsGroup); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + commandLineOptions.addOption(projectIdOption); + + Option filterOption = Option.builder("filter").hasArg(true).required(false).build(); + commandLineOptions.addOption(filterOption); + + Option jobTypeOption = Option.builder("jobType").hasArg(true).required(false).build(); + commandLineOptions.addOption(jobTypeOption); + + Option jobIdOption = Option.builder("jobId").hasArg(true).required(false).build(); + commandLineOptions.addOption(jobIdOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(Inspect.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + + if (cmd.hasOption(listOption.getOpt())) { + String filter = cmd.getOptionValue(filterOption.getOpt(), ""); + DlpJobType jobType = + DlpJobType.valueOf( + cmd.getOptionValue( + jobTypeOption.getOpt(), DlpJobType.DLP_JOB_TYPE_UNSPECIFIED.name())); + listJobs(projectId, filter, jobType); + } + + if (cmd.hasOption(deleteOption.getOpt())) { + String jobId = cmd.getOptionValue(jobIdOption.getOpt()); + deleteJob(projectId, jobId); + } + } +} diff --git a/dlp/src/main/java/com/example/dlp/Metadata.java b/dlp/src/main/java/com/example/dlp/Metadata.java index 12702284108..49247eb5843 100644 --- a/dlp/src/main/java/com/example/dlp/Metadata.java +++ b/dlp/src/main/java/com/example/dlp/Metadata.java @@ -16,11 +16,10 @@ package com.example.dlp; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.privacy.dlp.v2beta1.CategoryDescription; -import com.google.privacy.dlp.v2beta1.InfoTypeDescription; -import com.google.privacy.dlp.v2beta1.ListInfoTypesResponse; -import com.google.privacy.dlp.v2beta1.ListRootCategoriesResponse; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.InfoTypeDescription; +import com.google.privacy.dlp.v2.ListInfoTypesRequest; +import com.google.privacy.dlp.v2.ListInfoTypesResponse; import java.util.List; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -32,13 +31,16 @@ public class Metadata { - private static void listInfoTypes(String category, String languageCode) throws Exception { + private static void listInfoTypes(String filter, String languageCode) throws Exception { // [START dlp_list_info_types] // Instantiate a DLP client try (DlpServiceClient dlpClient = DlpServiceClient.create()) { // The category of info types to list, e.g. category = 'GOVERNMENT'; // Optional BCP-47 language code for localized info type friendly names, e.g. 'en-US' - ListInfoTypesResponse infoTypesResponse = dlpClient.listInfoTypes(category, languageCode); + // filter supported_by=INSPECT + ListInfoTypesRequest listInfoTypesRequest = + ListInfoTypesRequest.newBuilder().setFilter(filter).setLanguageCode(languageCode).build(); + ListInfoTypesResponse infoTypesResponse = dlpClient.listInfoTypes(listInfoTypesRequest); List infoTypeDescriptions = infoTypesResponse.getInfoTypesList(); for (InfoTypeDescription infoTypeDescription : infoTypeDescriptions) { System.out.println("Name : " + infoTypeDescription.getName()); @@ -67,13 +69,11 @@ private static void listRootCategories(String languageCode) throws Exception { /** Retrieve infoTypes. */ public static void main(String[] args) throws Exception { Options options = new Options(); - Option languageCodeOption = new Option("language", null, true, "BCP-47 language code"); - languageCodeOption.setRequired(false); + Option languageCodeOption = Option.builder("language").hasArg(true).required(true).build(); options.addOption(languageCodeOption); - Option categoryOption = new Option("category", null, true, "Category of info types to list."); - categoryOption.setRequired(false); - options.addOption(categoryOption); + Option filterOption = Option.builder("filter").hasArg(true).required(false).build(); + options.addOption(filterOption); CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); @@ -87,11 +87,8 @@ public static void main(String[] args) throws Exception { return; } String languageCode = cmd.getOptionValue(languageCodeOption.getOpt(), "en-US"); - if (cmd.hasOption(categoryOption.getOpt())) { - String category = cmd.getOptionValue(categoryOption.getOpt()); - listInfoTypes(category, languageCode); - } else { - listRootCategories(languageCode); - } + String filter = cmd.getOptionValue(filterOption.getOpt(), ""); + + listInfoTypes(languageCode, filter); } } diff --git a/dlp/src/main/java/com/example/dlp/QuickStart.java b/dlp/src/main/java/com/example/dlp/QuickStart.java index 941a5fb71e8..375a3148823 100644 --- a/dlp/src/main/java/com/example/dlp/QuickStart.java +++ b/dlp/src/main/java/com/example/dlp/QuickStart.java @@ -16,15 +16,17 @@ package com.example.dlp; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.privacy.dlp.v2beta1.ContentItem; -import com.google.privacy.dlp.v2beta1.Finding; -import com.google.privacy.dlp.v2beta1.InfoType; -import com.google.privacy.dlp.v2beta1.InspectConfig; -import com.google.privacy.dlp.v2beta1.InspectContentRequest; -import com.google.privacy.dlp.v2beta1.InspectContentResponse; -import com.google.privacy.dlp.v2beta1.InspectResult; -import com.google.privacy.dlp.v2beta1.Likelihood; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.ByteContentItem; +import com.google.privacy.dlp.v2.ContentItem; +import com.google.privacy.dlp.v2.Finding; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectContentRequest; +import com.google.privacy.dlp.v2.InspectContentResponse; +import com.google.privacy.dlp.v2.InspectResult; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.protobuf.ByteString; import java.util.Arrays; import java.util.List; @@ -56,40 +58,45 @@ public static void main(String[] args) throws Exception { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + InspectConfig.FindingLimits findingLimits = + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerItem(maxFindings).build(); + InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) - .setMaxFindings(maxFindings) + .setLimits(findingLimits) .setIncludeQuote(includeQuote) .build(); - ContentItem contentItem = - ContentItem.newBuilder().setType("text/plain").setValue(text).build(); + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(ByteContentItem.BytesType.TEXT_UTF8) + .setData(ByteString.copyFromUtf8(text)) + .build(); + ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); InspectContentRequest request = InspectContentRequest.newBuilder() .setInspectConfig(inspectConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); // Inspect the text for info types InspectContentResponse response = dlpServiceClient.inspectContent(request); - // Print the response - for (InspectResult result : response.getResultsList()) { - if (result.getFindingsCount() > 0) { - System.out.println("Findings: "); - for (Finding finding : result.getFindingsList()) { - if (includeQuote) { - System.out.print("Quote: " + finding.getQuote()); - } - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + InspectResult result = response.getResult(); + if (result.getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : result.getFindingsList()) { + if (includeQuote) { + System.out.print("Quote: " + finding.getQuote()); } - } else { - System.out.println("No findings."); + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); } + } else { + System.out.println("No findings."); } } catch (Exception e) { System.out.println("Error in inspectString: " + e.getMessage()); diff --git a/dlp/src/main/java/com/example/dlp/Redact.java b/dlp/src/main/java/com/example/dlp/Redact.java index 0e738ca36b5..63ff2e5e5e5 100644 --- a/dlp/src/main/java/com/example/dlp/Redact.java +++ b/dlp/src/main/java/com/example/dlp/Redact.java @@ -16,100 +16,65 @@ package com.example.dlp; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.privacy.dlp.v2beta1.ContentItem; -import com.google.privacy.dlp.v2beta1.InfoType; -import com.google.privacy.dlp.v2beta1.InspectConfig; -import com.google.privacy.dlp.v2beta1.Likelihood; -import com.google.privacy.dlp.v2beta1.RedactContentRequest; -import com.google.privacy.dlp.v2beta1.RedactContentRequest.ImageRedactionConfig; -import com.google.privacy.dlp.v2beta1.RedactContentRequest.ReplaceConfig; -import com.google.privacy.dlp.v2beta1.RedactContentResponse; +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.ByteContentItem; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.RedactImageRequest; +import com.google.privacy.dlp.v2.RedactImageResponse; import com.google.protobuf.ByteString; import java.io.FileOutputStream; import java.net.URLConnection; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; import javax.activation.MimetypesFileTypeMap; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionGroup; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; public class Redact { - private static void redactString( - String string, String replacement, Likelihood minLikelihood, List infoTypes) - throws Exception { - // [START dlp_redact_string] - // Instantiate the DLP client - try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - // The minimum likelihood required before returning a match - // eg.minLikelihood = LIKELIHOOD_VERY_LIKELY; - InspectConfig inspectConfig = - InspectConfig.newBuilder() - .addAllInfoTypes(infoTypes) - .setMinLikelihood(minLikelihood) - .build(); - - ContentItem contentItem = - ContentItem.newBuilder() - .setType("text/plain") - .setData(ByteString.copyFrom(string.getBytes())) - .build(); - - List replaceConfigs = new ArrayList<>(); - - if (infoTypes.isEmpty()) { - // replace all detected sensitive elements with replacement string - replaceConfigs.add(ReplaceConfig.newBuilder().setReplaceWith(replacement).build()); - } else { - // Replace select info types with chosen replacement string - for (InfoType infoType : infoTypes) { - replaceConfigs.add( - ReplaceConfig.newBuilder().setInfoType(infoType).setReplaceWith(replacement).build()); - } - } - - RedactContentRequest request = RedactContentRequest.newBuilder() - .setInspectConfig(inspectConfig) - .addAllItems(Collections.singletonList(contentItem)) - .addAllReplaceConfigs(replaceConfigs) - .build(); - - RedactContentResponse contentResponse = dlpClient.redactContent(request); - for (ContentItem responseItem : contentResponse.getItemsList()) { - // print out string with redacted content - System.out.println(responseItem.getData().toStringUtf8()); - } - } - // [END dlp_redact_string] - } - private static void redactImage( - String filePath, Likelihood minLikelihood, List infoTypes, String outputPath) + String filePath, + Likelihood minLikelihood, + List infoTypes, + String outputPath, + String projectId) throws Exception { // [START dlp_redact_image] // Instantiate the DLP client try (DlpServiceClient dlpClient = DlpServiceClient.create()) { // The path to a local file to inspect. Can be a JPG or PNG image file. // filePath = 'path/to/image.png' - // detect file mime type, default to application/octet-stream String mimeType = URLConnection.guessContentTypeFromName(filePath); if (mimeType == null) { mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); } - if (mimeType == null) { - mimeType = "application/octet-stream"; + ByteContentItem.BytesType bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED; + + switch (mimeType) { + case "image/jpeg": + bytesType = ByteContentItem.BytesType.IMAGE_JPEG; + break; + case "image/bmp": + bytesType = ByteContentItem.BytesType.IMAGE_BMP; + break; + case "image/png": + bytesType = ByteContentItem.BytesType.IMAGE_PNG; + break; + case "image/svg": + bytesType = ByteContentItem.BytesType.IMAGE_SVG; + break; } - byte[] data = Files.readAllBytes(Paths.get(filePath)); // The minimum likelihood required before redacting a match @@ -126,66 +91,65 @@ private static void redactImage( .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) .build(); - ContentItem contentItem = - ContentItem.newBuilder().setType(mimeType).setData(ByteString.copyFrom(data)).build(); - - List imageRedactionConfigs = new ArrayList<>(); - for (InfoType infoType : infoTypes) { - // clear the specific info type if detected in the image - // use .setRedactionColor to color detected info type without clearing - ImageRedactionConfig imageRedactionConfig = - ImageRedactionConfig.newBuilder().setInfoType(infoType).clearTarget().build(); - imageRedactionConfigs.add(imageRedactionConfig); - } - RedactContentRequest redactContentRequest = - RedactContentRequest.newBuilder() - .setInspectConfig(inspectConfig) + + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(bytesType) + .setData(ByteString.copyFrom(data)) + .build(); + + List imageRedactionConfigs = + infoTypes + .stream() + .map( + infoType -> + RedactImageRequest.ImageRedactionConfig.newBuilder() + .setInfoType(infoType) + .build()) + .collect(Collectors.toList()); + + RedactImageRequest redactImageRequest = + RedactImageRequest.newBuilder() + .setParent(projectId) .addAllImageRedactionConfigs(imageRedactionConfigs) - .addItems(contentItem) + .setByteItem(byteContentItem) + .setInspectConfig(inspectConfig) .build(); - RedactContentResponse contentResponse = dlpClient.redactContent(redactContentRequest); - for (ContentItem responseItem : contentResponse.getItemsList()) { - // redacted image data - ByteString redactedImageData = responseItem.getData(); - FileOutputStream outputStream = new FileOutputStream(outputPath); - outputStream.write(redactedImageData.toByteArray()); - outputStream.close(); - } - // [END dlp_redact_image] + RedactImageResponse redactImageResponse = dlpClient.redactImage(redactImageRequest); + + // redacted image data + ByteString redactedImageData = redactImageResponse.getRedactedImage(); + FileOutputStream outputStream = new FileOutputStream(outputPath); + outputStream.write(redactedImageData.toByteArray()); + outputStream.close(); } + // [END dlp_redact_image] } /** Command line application to redact strings, images using the Data Loss Prevention API. */ public static void main(String[] args) throws Exception { - OptionGroup optionsGroup = new OptionGroup(); - optionsGroup.setRequired(true); - Option stringOption = new Option("s", "string", true, "redact string"); - optionsGroup.addOption(stringOption); - - Option fileOption = new Option("f", "file path", true, "redact input file path"); - optionsGroup.addOption(fileOption); Options commandLineOptions = new Options(); - commandLineOptions.addOptionGroup(optionsGroup); Option minLikelihoodOption = Option.builder("minLikelihood").hasArg(true).required(false).build(); commandLineOptions.addOption(minLikelihoodOption); - Option replaceOption = - Option.builder("r").longOpt("replace string").hasArg(true).required(false).build(); - commandLineOptions.addOption(replaceOption); - Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); infoTypesOption.setArgs(Option.UNLIMITED_VALUES); commandLineOptions.addOption(infoTypesOption); + Option inputFilePathOption = + Option.builder("o").hasArg(true).longOpt("inputFilePath").required(false).build(); + commandLineOptions.addOption(inputFilePathOption); + Option outputFilePathOption = Option.builder("o").hasArg(true).longOpt("outputFilePath").required(false).build(); commandLineOptions.addOption(outputFilePathOption); + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; @@ -199,8 +163,6 @@ public static void main(String[] args) throws Exception { return; } - String replacement = cmd.getOptionValue(replaceOption.getOpt(), "_REDACTED_"); - List infoTypesList = new ArrayList<>(); String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); if (infoTypes != null) { @@ -213,14 +175,10 @@ public static void main(String[] args) throws Exception { cmd.getOptionValue( minLikelihoodOption.getOpt(), Likelihood.LIKELIHOOD_UNSPECIFIED.name())); - // string inspection - if (cmd.hasOption("s")) { - String source = cmd.getOptionValue(stringOption.getOpt()); - redactString(source, replacement, minLikelihood, infoTypesList); - } else if (cmd.hasOption("f")) { - String filePath = cmd.getOptionValue(fileOption.getOpt()); - String outputFilePath = cmd.getOptionValue(outputFilePathOption.getOpt()); - redactImage(filePath, minLikelihood, infoTypesList, outputFilePath); - } + String inputFilePath = cmd.getOptionValue(inputFilePathOption.getOpt()); + String outputFilePath = cmd.getOptionValue(outputFilePathOption.getOpt()); + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + redactImage(inputFilePath, minLikelihood, infoTypesList, outputFilePath, projectId); } } diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index 853b902c555..fbd78ad5946 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -16,30 +16,37 @@ package com.example.dlp; -import com.google.api.gax.longrunning.OperationFuture; +import com.google.api.core.SettableApiFuture; import com.google.cloud.ServiceOptions; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.longrunning.Operation; -import com.google.privacy.dlp.v2beta1.AnalyzeDataSourceRiskRequest; -import com.google.privacy.dlp.v2beta1.BigQueryTable; -import com.google.privacy.dlp.v2beta1.FieldId; -import com.google.privacy.dlp.v2beta1.PrivacyMetric; -import com.google.privacy.dlp.v2beta1.PrivacyMetric.CategoricalStatsConfig; -import com.google.privacy.dlp.v2beta1.PrivacyMetric.KAnonymityConfig; -import com.google.privacy.dlp.v2beta1.PrivacyMetric.LDiversityConfig; -import com.google.privacy.dlp.v2beta1.PrivacyMetric.NumericalStatsConfig; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationMetadata; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.CategoricalStatsResult.CategoricalStatsHistogramBucket; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityEquivalenceClass; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityHistogramBucket; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityEquivalenceClass; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityHistogramBucket; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.NumericalStatsResult; -import com.google.privacy.dlp.v2beta1.Value; -import com.google.privacy.dlp.v2beta1.ValueFrequency; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.cloud.pubsub.v1.Subscriber; +import com.google.privacy.dlp.v2.Action; +import com.google.privacy.dlp.v2.Action.PublishToPubSub; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket; +import com.google.privacy.dlp.v2.BigQueryTable; +import com.google.privacy.dlp.v2.CreateDlpJobRequest; +import com.google.privacy.dlp.v2.DlpJob; +import com.google.privacy.dlp.v2.FieldId; +import com.google.privacy.dlp.v2.GetDlpJobRequest; +import com.google.privacy.dlp.v2.PrivacyMetric; +import com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig; +import com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig; +import com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig; +import com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig; +import com.google.privacy.dlp.v2.RiskAnalysisJobConfig; +import com.google.privacy.dlp.v2.Value; +import com.google.privacy.dlp.v2.ValueFrequency; +import com.google.pubsub.v1.ProjectSubscriptionName; import java.util.Arrays; import java.util.List; +import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -53,90 +60,131 @@ public class RiskAnalysis { private static void calculateNumericalStats( - String projectId, String datasetId, String tableId, String columnName) + String projectId, + String datasetId, + String tableId, + String columnName, + String topicId, + String subscriptionId) throws Exception { // [START dlp_numerical_stats] - /** * Calculate numerical statistics for a column in a BigQuery table using the DLP API. + * * @param projectId The Google Cloud Platform project ID to run the API call under. * @param datasetId The BigQuery dataset to analyze. * @param tableId The BigQuery table to analyze. * @param columnName The name of the column to analyze, which must contain only numerical data. + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. */ // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - - // projectId = process.env.GCLOUD_PROJECT; - // datasetId = "my_dataset"; - // tableId = "my_table"; - // columnName = "firstName"; - - FieldId fieldId = - FieldId.newBuilder() - .setColumnName(columnName) - .build(); - - NumericalStatsConfig numericalStatsConfig = - NumericalStatsConfig.newBuilder() - .setField(fieldId) - .build(); - BigQueryTable bigQueryTable = BigQueryTable.newBuilder() - .setProjectId(projectId) - .setDatasetId(datasetId) .setTableId(tableId) + .setDatasetId(datasetId) + .setProjectId(projectId) .build(); + FieldId fieldId = FieldId.newBuilder().setName(columnName).build(); + + NumericalStatsConfig numericalStatsConfig = + NumericalStatsConfig.newBuilder().setField(fieldId).build(); + PrivacyMetric privacyMetric = - PrivacyMetric.newBuilder() - .setNumericalStatsConfig(numericalStatsConfig) - .build(); + PrivacyMetric.newBuilder().setNumericalStatsConfig(numericalStatsConfig).build(); - AnalyzeDataSourceRiskRequest request = - AnalyzeDataSourceRiskRequest.newBuilder() - .setPrivacyMetric(privacyMetric) + String topicName = String.format("projects/%s/topics/%s", projectId, topicId); + + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); + + // create /action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) .build(); - // asynchronously submit a risk analysis operation - OperationFuture - responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setRiskJob(riskAnalysisJobConfig) + .setParent(projectId) + .build(); - // ... - // block on response - RiskAnalysisOperationResult response = responseFuture.get(); - NumericalStatsResult results = - response.getNumericalStatsResult(); + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); - System.out.println( - "Value range: [" + results.getMaxValue() + ", " + results.getMinValue() + "]"); + // wait on job completion + waitOnJobCompletion(projectId, subscriptionId, dlpJobName); - // Print out unique quantiles - String previousValue = ""; - for (int i = 0; i < results.getQuantileValuesCount(); i++) { - Value valueObj = results.getQuantileValues(i); - String value = valueObj.toString(); + // retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); - if (!previousValue.equals(value)) { - System.out.println("Value at " + i + "% quantile: " + value.toString()); - previousValue = value; - } + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + AnalyzeDataSourceRiskDetails.NumericalStatsResult result = + riskDetails.getNumericalStatsResult(); + + System.out.printf( + "Value range : [%.3f, %.3f]\n", + result.getMinValue().getFloatValue(), result.getMaxValue().getFloatValue()); + + int percent = 1; + for (Value quantileValue : result.getQuantileValuesList()) { + System.out.printf( + "Value at %d \\% quantile : %.3f", percent, quantileValue.getFloatValue()); } - } catch (Exception e) { - System.out.println("Error in numericalStatsAnalysis: " + e.getMessage()); } - // [END dlp_numerical_stats] + } + + private static void waitOnJobCompletion( + String projectId, String subscriptionId, String dlpJobName) + throws InterruptedException, ExecutionException { + // [START wait_on_dlp_job_completion] + // wait for job completion + final SettableApiFuture done = SettableApiFuture.create(); + + // setup a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + ackReplyConsumer.ack(); + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + } + }) + .build(); + + // wait for job completion + done.get(); + // [END wait_on_dlp_job_completion] } private static void calculateCategoricalStats( - String projectId, String datasetId, String tableId, String columnName) + String projectId, + String datasetId, + String tableId, + String columnName, + String topicId, + String subscriptionId) throws Exception { // [START dlp_categorical_stats] /** * Calculate categorical statistics for a column in a BigQuery table using the DLP API. + * * @param projectId The Google Cloud Platform project ID to run the API call under. * @param datasetId The BigQuery dataset to analyze. * @param tableId The BigQuery table to analyze. @@ -151,15 +199,10 @@ private static void calculateCategoricalStats( // tableId = "my_table"; // columnName = "firstName"; - FieldId fieldId = - FieldId.newBuilder() - .setColumnName(columnName) - .build(); + FieldId fieldId = FieldId.newBuilder().setName(columnName).build(); CategoricalStatsConfig categoricalStatsConfig = - CategoricalStatsConfig.newBuilder() - .setField(fieldId) - .build(); + CategoricalStatsConfig.newBuilder().setField(fieldId).build(); BigQueryTable bigQueryTable = BigQueryTable.newBuilder() @@ -169,58 +212,81 @@ private static void calculateCategoricalStats( .build(); PrivacyMetric privacyMetric = - PrivacyMetric.newBuilder() - .setCategoricalStatsConfig(categoricalStatsConfig) - .build(); + PrivacyMetric.newBuilder().setCategoricalStatsConfig(categoricalStatsConfig).build(); - AnalyzeDataSourceRiskRequest request = - AnalyzeDataSourceRiskRequest.newBuilder() - .setPrivacyMetric(privacyMetric) + String topicName = String.format("projects/%s/topics/%s", projectId, topicId); + + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); + + // create /action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) .build(); - // asynchronously submit a risk analysis operation - OperationFuture - responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); - - // ... - // block on response - RiskAnalysisOperationResult response = responseFuture.get(); - CategoricalStatsHistogramBucket results = - response.getCategoricalStatsResult().getValueFrequencyHistogramBuckets(0); - - System.out.println( - "Most common value occurs " + results.getValueFrequencyUpperBound() + " time(s)"); - System.out.println( - "Least common value occurs " + results.getValueFrequencyLowerBound() + " time(s)"); - - for (ValueFrequency valueFrequency : results.getBucketValuesList()) { - System.out.println("Value " - + valueFrequency.getValue().toString() - + " occurs " - + valueFrequency.getCount() - + " time(s)." - ); - } + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setRiskJob(riskAnalysisJobConfig) + .setParent(projectId) + .build(); + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); + + // wait on job completion + waitOnJobCompletion(projectId, subscriptionId, dlpJobName); + + // retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); + + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + AnalyzeDataSourceRiskDetails.CategoricalStatsResult result = + riskDetails.getCategoricalStatsResult(); + + for (CategoricalStatsHistogramBucket bucket : + result.getValueFrequencyHistogramBucketsList()) { + System.out.println( + "Most common value occurs " + bucket.getValueFrequencyUpperBound() + " time(s)"); + System.out.println( + "Least common value occurs " + bucket.getValueFrequencyLowerBound() + " time(s)"); + for (ValueFrequency valueFrequency : bucket.getBucketValuesList()) { + System.out.println( + "Value " + + valueFrequency.getValue().toString() + + " occurs " + + valueFrequency.getCount() + + " time(s)."); + } + } } catch (Exception e) { System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage()); } - // [END dlp_categorical_stats] } + // [END dlp_categorical_stats_analysis] + // [START dlp_k_anonymity] + /** + * Calculate k-anonymity for quasi-identifiers in a BigQuery table using the DLP API. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param quasiIds The names of columns that form a composite key ('quasi-identifiers'). + */ private static void calculateKAnonymity( - String projectId, String datasetId, String tableId, List quasiIds) + String projectId, + String datasetId, + String tableId, + List quasiIds, + String topicId, + String subscriptionId) throws Exception { - // [START dlp_k_anonymity] - /** - * Calculate k-anonymity for quasi-identifiers in a BigQuery table using the DLP API. - * @param projectId The Google Cloud Platform project ID to run the API call under. - * @param datasetId The BigQuery dataset to analyze. - * @param tableId The BigQuery table to analyze. - * @param quasiIds The names of columns that form a composite key ('quasi-identifiers'). - */ - // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -232,13 +298,11 @@ private static void calculateKAnonymity( List quasiIdFields = quasiIds .stream() - .map(columnName -> FieldId.newBuilder().setColumnName(columnName).build()) + .map(columnName -> FieldId.newBuilder().setName(columnName).build()) .collect(Collectors.toList()); KAnonymityConfig kanonymityConfig = - KAnonymityConfig.newBuilder() - .addAllQuasiIds(quasiIdFields) - .build(); + KAnonymityConfig.newBuilder().addAllQuasiIds(quasiIdFields).build(); BigQueryTable bigQueryTable = BigQueryTable.newBuilder() @@ -248,83 +312,99 @@ private static void calculateKAnonymity( .build(); PrivacyMetric privacyMetric = - PrivacyMetric.newBuilder() - .setKAnonymityConfig(kanonymityConfig) - .build(); + PrivacyMetric.newBuilder().setKAnonymityConfig(kanonymityConfig).build(); - AnalyzeDataSourceRiskRequest request = - AnalyzeDataSourceRiskRequest.newBuilder() - .setPrivacyMetric(privacyMetric) + String topicName = String.format("projects/%s/topics/%s", projectId, topicId); + + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); + + // create /action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) + .build(); + + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setRiskJob(riskAnalysisJobConfig) + .setParent(projectId) .build(); - // asynchronously submit a risk analysis operation - OperationFuture - responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); - - // ... - // block on response - RiskAnalysisOperationResult response = responseFuture.get(); - KAnonymityHistogramBucket results = - response.getKAnonymityResult().getEquivalenceClassHistogramBuckets(0); - - System.out.println("Bucket size range: [" - + results.getEquivalenceClassSizeLowerBound() - + ", " - + results.getEquivalenceClassSizeUpperBound() - + "]" - ); - - for (KAnonymityEquivalenceClass bucket : results.getBucketValuesList()) { - List quasiIdValues = bucket.getQuasiIdsValuesList() - .stream() - .map(v -> v.toString()) - .collect(Collectors.toList()); - - System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); - System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); + + // wait on job completion + waitOnJobCompletion(projectId, subscriptionId, dlpJobName); + + // retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); + + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + + KAnonymityResult kAnonymityResult = riskDetails.getKAnonymityResult(); + for (KAnonymityHistogramBucket result : + kAnonymityResult.getEquivalenceClassHistogramBucketsList()) { + System.out.println( + "Bucket size range: [" + + result.getEquivalenceClassSizeLowerBound() + + ", " + + result.getEquivalenceClassSizeUpperBound() + + "]"); + + for (KAnonymityEquivalenceClass bucket : result.getBucketValuesList()) { + List quasiIdValues = + bucket + .getQuasiIdsValuesList() + .stream() + .map(v -> v.toString()) + .collect(Collectors.toList()); + + System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); + System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); + } } } catch (Exception e) { System.out.println("Error in kAnonymityAnalysis: " + e.getMessage()); } - // [END dlp_k_anonymity] } + // [END dlp_k_anonymity] + /** + * [START dlp_l_diversity] + * + *

Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param sensitiveAttribute The name of the attribute to compare the quasi-ID against + * @param quasiIds A set of column names that form a composite key ('quasi-identifiers'). + */ private static void calculateLDiversity( String projectId, String datasetId, String tableId, String sensitiveAttribute, - List quasiIds - ) throws Exception { - // [START dlp_l_diversity] - /** - * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table. - * @param projectId The Google Cloud Platform project ID to run the API call under. - * @param datasetId The BigQuery dataset to analyze. - * @param tableId The BigQuery table to analyze. - * @param sensitiveAttribute The name of the attribute to compare the quasi-ID against - * @param quasiIds A set of column names that form a composite key ('quasi-identifiers'). - */ + List quasiIds, + String topicId, + String subscriptionId) + throws Exception { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // projectId = process.env.GCLOUD_PROJECT; - // datasetId = "my_dataset"; - // tableId = "my_table"; - // sensitiveAttribute = "name"; - // quasiIds = [{ columnName: "age" }, { columnName: "city" }]; - - FieldId sensitiveAttributeField = - FieldId.newBuilder() - .setColumnName(sensitiveAttribute) - .build(); + FieldId sensitiveAttributeField = FieldId.newBuilder().setName(sensitiveAttribute).build(); List quasiIdFields = quasiIds .stream() - .map(columnName -> FieldId.newBuilder().setColumnName(columnName).build()) + .map(columnName -> FieldId.newBuilder().setName(columnName).build()) .collect(Collectors.toList()); LDiversityConfig ldiversityConfig = @@ -341,41 +421,63 @@ private static void calculateLDiversity( .build(); PrivacyMetric privacyMetric = - PrivacyMetric.newBuilder() - .setLDiversityConfig(ldiversityConfig) - .build(); + PrivacyMetric.newBuilder().setLDiversityConfig(ldiversityConfig).build(); - AnalyzeDataSourceRiskRequest request = - AnalyzeDataSourceRiskRequest.newBuilder() - .setPrivacyMetric(privacyMetric) + String topicName = String.format("projects/%s/topics/%s", projectId, topicId); + + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); + + // create /action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) .build(); - // asynchronously submit a risk analysis operation - OperationFuture - responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); - - // ... - // block on response - RiskAnalysisOperationResult response = responseFuture.get(); - LDiversityHistogramBucket results = - response.getLDiversityResult().getSensitiveValueFrequencyHistogramBuckets(0); - - for (LDiversityEquivalenceClass bucket : results.getBucketValuesList()) { - List quasiIdValues = bucket.getQuasiIdsValuesList() - .stream() - .map(v -> v.toString()) - .collect(Collectors.toList()); - - System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); - System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); - - for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) { - System.out.println("\t\tSensitive value " - + valueFrequency.getValue().toString() - + " occurs " - + valueFrequency.getCount() - + " time(s)."); + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setRiskJob(riskAnalysisJobConfig) + .setParent(projectId) + .build(); + + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); + + // wait on job completion + waitOnJobCompletion(projectId, subscriptionId, dlpJobName); + + // retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); + + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + + LDiversityResult lDiversityResult = riskDetails.getLDiversityResult(); + for (LDiversityHistogramBucket result : + lDiversityResult.getSensitiveValueFrequencyHistogramBucketsList()) { + for (LDiversityEquivalenceClass bucket : result.getBucketValuesList()) { + List quasiIdValues = + bucket + .getQuasiIdsValuesList() + .stream() + .map(Value::toString) + .collect(Collectors.toList()); + + System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); + System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); + + for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) { + System.out.println( + "\t\tSensitive value " + + valueFrequency.getValue().toString() + + " occurs " + + valueFrequency.getCount() + + " time(s)."); + } } } } catch (Exception e) { @@ -384,10 +486,9 @@ private static void calculateLDiversity( // [END dlp_l_diversity] } - /** - * Command line application to perform risk analysis using the Data Loss Prevention API. - * Supported data format: BigQuery tables + * Command line application to perform risk analysis using the Data Loss Prevention API. Supported + * data format: BigQuery tables */ public static void main(String[] args) throws Exception { @@ -418,8 +519,14 @@ public static void main(String[] args) throws Exception { Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); commandLineOptions.addOption(projectIdOption); - Option columnNameOption = - Option.builder("columnName").hasArg(true).required(false).build(); + Option topicIdOption = Option.builder("topicId").hasArg(true).required(false).build(); + commandLineOptions.addOption(topicIdOption); + + Option subscriptionIdOption = + Option.builder("subscriptionId").hasArg(true).required(false).build(); + commandLineOptions.addOption(subscriptionIdOption); + + Option columnNameOption = Option.builder("columnName").hasArg(true).required(false).build(); commandLineOptions.addOption(columnNameOption); Option sensitiveAttributeOption = @@ -447,28 +554,38 @@ public static void main(String[] args) throws Exception { String tableId = cmd.getOptionValue(tableIdOption.getOpt()); // use default project id when project id is not specified String projectId = - cmd.getOptionValue( - projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + + String topicId = cmd.getOptionValue(topicIdOption.getOpt()); + String subscriptionId = cmd.getOptionValue(subscriptionIdOption.getOpt()); if (cmd.hasOption("n")) { // numerical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); - calculateNumericalStats(projectId, datasetId, tableId, columnName); + calculateNumericalStats(projectId, datasetId, tableId, columnName, topicId, subscriptionId); } else if (cmd.hasOption("c")) { // categorical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); - calculateCategoricalStats(projectId, datasetId, tableId, columnName); + calculateCategoricalStats(projectId, datasetId, tableId, columnName, topicId, subscriptionId); } else if (cmd.hasOption("k")) { // k-anonymity analysis List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); - calculateKAnonymity(projectId, datasetId, tableId, quasiIdColumnNames); + calculateKAnonymity( + projectId, datasetId, tableId, quasiIdColumnNames, topicId, subscriptionId); } else if (cmd.hasOption("l")) { // l-diversity analysis String sensitiveAttribute = cmd.getOptionValue(sensitiveAttributeOption.getOpt()); List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); - calculateLDiversity(projectId, datasetId, tableId, sensitiveAttribute, quasiIdColumnNames); + calculateLDiversity( + projectId, + datasetId, + tableId, + sensitiveAttribute, + quasiIdColumnNames, + topicId, + subscriptionId); } } } diff --git a/dlp/src/main/java/com/example/dlp/Templates.java b/dlp/src/main/java/com/example/dlp/Templates.java new file mode 100644 index 00000000000..fbd33e58c4e --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/Templates.java @@ -0,0 +1,254 @@ +/** + * Copyright 2018, Google, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may obtain a copy of the License + * at + * + *

http://www.apache.org/licenses/LICENSE-2.0 + * + *

Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.example.dlp; + +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.cloud.dlp.v2.DlpServiceClient.ListInspectTemplatesPage; +import com.google.cloud.dlp.v2.DlpServiceClient.ListInspectTemplatesPagedResponse; +import com.google.privacy.dlp.v2.CreateInspectTemplateRequest; +import com.google.privacy.dlp.v2.DeleteInspectTemplateRequest; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectConfig.FindingLimits; +import com.google.privacy.dlp.v2.InspectTemplate; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.ListInspectTemplatesRequest; +import com.google.privacy.dlp.v2.ListInspectTemplatesResponse; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class Templates { + + /** + * [START dlp_create_template] + * + * @param projectId Google Cloud Project ID to call the API under + * @param templateId (Optional) The name of the template to be created + * @param displayName (Optional) The human-readable name to give the template + * @param infoTypeList The infoTypes of information to match + * @param includeQuote Whether to include the matching string + * @param minLikelihood The minimum likelihood required before returning a match + * @param maxFindings The maximum number of findings to report per request (0 = server maximum) + */ + private static void createInspectTemplate( + String projectId, + String templateId, + String displayName, + List infoTypeList, + boolean includeQuote, + Likelihood minLikelihood, + int maxFindings) { + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + + // Construct the inspection configuration for the template + InspectConfig inspectConfig = + InspectConfig.newBuilder() + .addAllInfoTypes(infoTypeList) + .setMinLikelihood(minLikelihood) + .setIncludeQuote(includeQuote) + .setLimits(findingLimits) + .build(); + + InspectTemplate inspectTemplate = + InspectTemplate.newBuilder() + .setInspectConfig(inspectConfig) + .setDisplayName(displayName) + .build(); + + CreateInspectTemplateRequest createInspectTemplateRequest = + CreateInspectTemplateRequest.newBuilder() + .setParent(projectId) + .setInspectTemplate(inspectTemplate) + .setTemplateId(templateId) + .build(); + + InspectTemplate response = + dlpServiceClient.createInspectTemplate(createInspectTemplateRequest); + System.out.printf("Template created: %s", response.getName()); + } catch (Exception e) { + System.out.printf("Error creating template: %s", e.getMessage()); + } + } + // [END dlp_create_template] + + /** + * [START dlp_list_templates] + * + *

List DLP inspection templates created in a given project + * + * @param projectId Google Cloud Project ID + */ + private static void listInspectTemplates(String projectId) { + + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + ListInspectTemplatesRequest request = + ListInspectTemplatesRequest.newBuilder().setParent(projectId).setPageSize(1).build(); + + ListInspectTemplatesPagedResponse response = dlpServiceClient.listInspectTemplates(request); + ListInspectTemplatesPage page = response.getPage(); + ListInspectTemplatesResponse templatesResponse = page.getResponse(); + + for (InspectTemplate template : templatesResponse.getInspectTemplatesList()) { + System.out.printf("Template name: %s", template.getName()); + if (template.getDisplayName() != null) { + System.out.printf("Template display name: %s", template.getDisplayName()); + System.out.printf("Template create time: %s", template.getCreateTime()); + System.out.printf("Template update time: %s", template.getUpdateTime()); + + // print inspection config + InspectConfig inspectConfig = template.getInspectConfig(); + for (InfoType infoType : inspectConfig.getInfoTypesList()) { + System.out.printf("InfoType: %s\n", infoType.getName()); + } + System.out.printf("Min likelihood: %s\n", inspectConfig.getMinLikelihood()); + System.out.printf("Limits: %s\n", inspectConfig.getLimits().getMaxFindingsPerRequest()); + } + } + } catch (Exception e) { + System.out.printf("Error creating template: %s", e.getMessage()); + } + } + // [END dlp_list_templates] + + /** + * [START dlp_delete_template] + * + * @param projectId Google Cloud Project ID + * @param templateId Template ID to be deleted + */ + private static void deleteInspectTemplate(String projectId, String templateId) { + + // construct the template name to be deleted + String templateName = String.format("projects/%s/inspectTemplates/%s", projectId, templateId); + // instantiate the client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + // create delete template request + DeleteInspectTemplateRequest request = + DeleteInspectTemplateRequest.newBuilder().setName(templateName).build(); + + dlpServiceClient.deleteInspectTemplate(request); + System.out.printf("Deleted template: %s\n", templateName); + } catch (Exception e) { + System.err.printf("Error deleting template: %s\n", templateName); + } + } + // [END dlp_delete_template] + + /** Command line application to create, list and delete DLP inspect templates. */ + public static void main(String[] args) throws Exception { + + Options commandLineOptions = new Options(); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + + OptionGroup optionsGroup = new OptionGroup(); + optionsGroup.setRequired(true); + + Option createOption = new Option("c", "create", true, "Create inspect template"); + optionsGroup.addOption(createOption); + + Option listOption = new Option("l", "list", true, "List inspect templates"); + optionsGroup.addOption(listOption); + + Option deleteOption = new Option("d", "delete", true, "Delete inspect template"); + optionsGroup.addOption(deleteOption); + + commandLineOptions.addOptionGroup(optionsGroup); + + Option minLikelihoodOption = + Option.builder("minLikelihood").hasArg(true).required(false).build(); + + commandLineOptions.addOption(minLikelihoodOption); + + Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); + infoTypesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(infoTypesOption); + + Option templateIdOption = Option.builder("templateId").hasArg(true).required(false).build(); + commandLineOptions.addOption(templateIdOption); + + Option templateDisplayNameOption = + Option.builder("displayName").hasArg(true).required(false).build(); + commandLineOptions.addOption(templateDisplayNameOption); + + Option includeQuoteOption = Option.builder("includeQuote").hasArg(true).required(false).build(); + commandLineOptions.addOption(includeQuoteOption); + + Option maxFindingsOption = Option.builder("maxFindings").hasArg(true).required(false).build(); + commandLineOptions.addOption(maxFindingsOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(Redact.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + + if (cmd.hasOption(createOption.getOpt())) { + String templateId = cmd.getOptionValue(templateIdOption.getOpt()); + String displayName = cmd.getOptionValue(templateDisplayNameOption.getOpt()); + + Likelihood minLikelihood = + Likelihood.valueOf( + cmd.getOptionValue( + minLikelihoodOption.getOpt(), Likelihood.LIKELIHOOD_UNSPECIFIED.name())); + + List infoTypesList = new ArrayList<>(); + String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); + if (infoTypes != null) { + for (String infoType : infoTypes) { + infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); + } + } + Boolean includeQuote = + Boolean.valueOf(cmd.getOptionValue(includeQuoteOption.getOpt(), "false")); + int maxFindings = Integer.valueOf(maxFindingsOption.getOpt(), 0); + createInspectTemplate( + projectId, + templateId, + displayName, + infoTypesList, + includeQuote, + minLikelihood, + maxFindings); + + } else if (cmd.hasOption(listOption.getOpt())) { + listInspectTemplates(projectId); + } else if (cmd.hasOption(deleteOption.getOpt())) { + String templateId = cmd.getOptionValue(templateIdOption.getOpt()); + deleteInspectTemplate(projectId, templateId); + } + } +} diff --git a/dlp/src/main/java/com/example/dlp/Triggers.java b/dlp/src/main/java/com/example/dlp/Triggers.java new file mode 100644 index 00000000000..6eb36e0041c --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/Triggers.java @@ -0,0 +1,282 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.CloudStorageOptions; +import com.google.privacy.dlp.v2.CreateJobTriggerRequest; +import com.google.privacy.dlp.v2.DeleteJobTriggerRequest; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectJobConfig; +import com.google.privacy.dlp.v2.JobTrigger; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.ListJobTriggersRequest; +import com.google.privacy.dlp.v2.Schedule; +import com.google.privacy.dlp.v2.StorageConfig; +import com.google.protobuf.Duration; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class Triggers { + + /** + * [START dlp_create_trigger] + * + *

Schedule a DLP inspection trigger for a GCS location. + * + * @param triggerId (Optional) name of the trigger to be created + * @param displayName (Optional) display name for the trigger to be created + * @param description (Optional) description for the trigger to be created + * @param gcsUrl URL path to GCS bucket, eg. gs://my-bucket-name + * @param scanPeriod How often to wait between scans, in days (minimum = 1 day) + * @param infoTypes infoTypes of information to match eg. InfoType.PHONE_NUMBER, + * InfoType.EMAIL_ADDRESS + * @param minLikelihood minimum likelihood required before returning a match + * @param maxFindings maximum number of findings to report per request (0 = server maximum) + * @param projectId The project ID to run the API call under + */ + private static void createTrigger( + String triggerId, + String displayName, + String description, + String gcsUrl, + int scanPeriod, + List infoTypes, + Likelihood minLikelihood, + int maxFindings, + String projectId) { + + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + CloudStorageOptions.FileSet fileSet = + CloudStorageOptions.FileSet.newBuilder().setUrl(gcsUrl).build(); + CloudStorageOptions cloudStorageOptions = + CloudStorageOptions.newBuilder().setFileSet(fileSet).build(); + + StorageConfig storageConfig = + StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build(); + + InspectConfig.FindingLimits findingLimits = + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + + InspectConfig inspectConfig = + InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .setLimits(findingLimits) + .build(); + + InspectJobConfig inspectJobConfig = + InspectJobConfig.newBuilder() + .setInspectConfig(inspectConfig) + .setStorageConfig(storageConfig) + .build(); + + // Schedule scan of GCS bucket every scanPeriod number of days + Duration duration = Duration.newBuilder().setSeconds(scanPeriod * 24 * 3600).build(); + Schedule schedule = Schedule.newBuilder().setRecurrencePeriodDuration(duration).build(); + JobTrigger.Trigger trigger = JobTrigger.Trigger.newBuilder().setSchedule(schedule).build(); + JobTrigger jobTrigger = + JobTrigger.newBuilder() + .setInspectJob(inspectJobConfig) + .setName(triggerId) + .setDisplayName(displayName) + .setDescription(description) + .setStatus(JobTrigger.Status.HEALTHY) + .addTriggers(trigger) + .build(); + + // Create scan request + CreateJobTriggerRequest createJobTriggerRequest = + CreateJobTriggerRequest.newBuilder() + .setParent(projectId) + .setJobTrigger(jobTrigger) + .build(); + + JobTrigger createdJobTrigger = dlpServiceClient.createJobTrigger(createJobTriggerRequest); + + System.out.println("Created Trigger: " + createdJobTrigger.getDisplayName()); + } catch (Exception e) { + System.out.println("Error creating trigger :" + e.getMessage()); + } + } + // [END dlp_create_trigger] + + /** + * [START dlp_list_triggers] List all DLP triggers for a given project. + * + * @param projectId The project ID to run the API call under. + */ + private static void listTriggers(String projectId) { + // Instantiates a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + ListJobTriggersRequest listJobTriggersRequest = + ListJobTriggersRequest.newBuilder().setParent(projectId).build(); + DlpServiceClient.ListJobTriggersPagedResponse response = + dlpServiceClient.listJobTriggers(listJobTriggersRequest); + response + .getPage() + .getValues() + .forEach( + trigger -> { + System.out.println("Trigger: " + trigger.getName()); + System.out.println("Created: " + trigger.getCreateTime()); + System.out.println("Updated: " + trigger.getUpdateTime()); + if (trigger.getDisplayName() != null) { + System.out.println("Display name: " + trigger.getDisplayName()); + } + if (trigger.getDescription() != null) { + System.out.println("Description: " + trigger.getDescription()); + } + System.out.println("Status: " + trigger.getStatus()); + System.out.println("Error count: " + trigger.getErrorsCount()); + }); + } catch (Exception e) { + System.out.println("Error listing triggers :" + e.getMessage()); + } + } + // [END dlp_list_trigger] + + /** + * [START dlp_delete_trigger] + * + *

Delete a DLP trigger in a project. + * + * @param projectId The project ID to run the API call under. + * @param triggerId Trigger ID + */ + private static void deleteTrigger(String projectId, String triggerId) { + // Instantiates a client + // triggerName to provided as projects/project-id/jobTriggers/triggerId + String triggerName = String.format("projects/%s/jobTriggers/%s", projectId, triggerId); + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + DeleteJobTriggerRequest deleteJobTriggerRequest = + DeleteJobTriggerRequest.newBuilder().setName(triggerName).build(); + dlpServiceClient.deleteJobTrigger(deleteJobTriggerRequest); + } catch (Exception e) { + System.out.println("Error deleting trigger :" + e.getMessage()); + } + } + + // [END dlp_delete_trigger] + + /** Command line application to crate, list and delete triggers. */ + public static void main(String[] args) throws Exception { + + OptionGroup optionsGroup = new OptionGroup(); + optionsGroup.setRequired(true); + + Option createTriggerOption = + new Option("c", "create", true, "Create trigger to scan a GCS bucket"); + optionsGroup.addOption(createTriggerOption); + + Option listTriggersOption = new Option("l", "list", true, "List triggers"); + optionsGroup.addOption(listTriggersOption); + + Option deleteTriggerOption = new Option("d", "delete", true, "Delete trigger"); + optionsGroup.addOption(deleteTriggerOption); + + Options commandLineOptions = new Options(); + commandLineOptions.addOptionGroup(optionsGroup); + + Option gcsUrlOption = Option.builder("gcsUrl").hasArg(true).required(false).build(); + commandLineOptions.addOption(gcsUrlOption); + + Option minLikelihoodOption = + Option.builder("minLikelihood").hasArg(true).required(false).build(); + + commandLineOptions.addOption(minLikelihoodOption); + + Option maxFindingsOption = Option.builder("maxFindings").hasArg(true).required(false).build(); + + commandLineOptions.addOption(maxFindingsOption); + + Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); + infoTypesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(infoTypesOption); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + commandLineOptions.addOption(projectIdOption); + + Option triggerIdOption = Option.builder("triggerId").hasArg(true).required(false).build(); + Option displayNameOption = Option.builder("displayName").hasArg(true).required(false).build(); + Option descriptionOption = Option.builder("description").hasArg(true).required(false).build(); + + Option scanPeriodOption = Option.builder("scanPeriod").hasArg(true).required(false).build(); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(DeIdentification.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + String projectId = cmd.getOptionValue(projectIdOption.getOpt()); + if (cmd.hasOption("c")) { + Likelihood minLikelihood = + Likelihood.valueOf( + cmd.getOptionValue( + minLikelihoodOption.getOpt(), Likelihood.LIKELIHOOD_UNSPECIFIED.name())); + int maxFindings = Integer.parseInt(cmd.getOptionValue(maxFindingsOption.getOpt(), "0")); + String triggerId = cmd.getOptionValue(triggerIdOption.getOpt()); + String displayName = cmd.getOptionValue(displayNameOption.getOpt(), ""); + String description = cmd.getOptionValue(descriptionOption.getOpt(), ""); + String gcsUrl = cmd.getOptionValue(gcsUrlOption.getOpt()); + int scanPeriod = Integer.valueOf(cmd.getOptionValue(scanPeriodOption.getOpt())); + List infoTypesList = new ArrayList<>(); + if (cmd.hasOption(infoTypesOption.getOpt())) { + infoTypesList = new ArrayList<>(); + String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); + for (String infoType : infoTypes) { + infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); + } + } + createTrigger( + triggerId, + displayName, + description, + gcsUrl, + scanPeriod, + infoTypesList, + minLikelihood, + maxFindings, + projectId); + } else if (cmd.hasOption("l")) { + // list triggers + listTriggers(projectId); + } else if (cmd.hasOption("d")) { + String triggerId = cmd.getOptionValue(triggerIdOption.getOpt()); + deleteTrigger(projectId, triggerId); + } + } +} diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index d31708c568a..b5f427282a9 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -31,9 +31,10 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class DeIdentificationIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; @@ -56,11 +57,12 @@ public void setUp() { @Test public void testDeidStringMasksCharacters() throws Exception { String text = "\"My SSN is 372819127\""; - DeIdentification.main(new String[] { - "-m", text, - "-maskingCharacter", "x", - "-numberToMask", "5" - }); + DeIdentification.main( + new String[] { + "-m", text, + "-maskingCharacter", "x", + "-numberToMask", "5" + }); String output = bout.toString(); assertEquals(output, "My SSN is xxxxx9127\n"); } @@ -68,11 +70,12 @@ public void testDeidStringMasksCharacters() throws Exception { @Test public void testDeidStringPerformsFpe() throws Exception { String text = "\"My SSN is 372819127\""; - DeIdentification.main(new String[] { - "-f", text, - "-wrappedKey", wrappedKey, - "-keyName", keyName - }); + DeIdentification.main( + new String[] { + "-f", text, + "-wrappedKey", wrappedKey, + "-keyName", keyName + }); String output = bout.toString(); assertFalse(output.contains(text)); assertTrue(Pattern.compile("My SSN is \\w+").matcher(output).find()); diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 5f57d194a8a..7cd8b85ea11 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -30,9 +30,10 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class InspectIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; @@ -103,11 +104,8 @@ public void testDatastoreInspectionReturnsInfoTypes() throws Exception { @Test public void testBigqueryInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] { - "-bq", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful" - }); + Inspect.main( + new String[] {"-bq", "-datasetId", "integration_tests_dlp", "-tableId", "harmful"}); String output = bout.toString(); assertTrue(output.contains("PHONE_NUMBER")); } diff --git a/dlp/src/test/java/com/example/dlp/MetadataIT.java b/dlp/src/test/java/com/example/dlp/MetadataIT.java index 25ba2b68f5d..eeacbf35e9a 100644 --- a/dlp/src/test/java/com/example/dlp/MetadataIT.java +++ b/dlp/src/test/java/com/example/dlp/MetadataIT.java @@ -28,9 +28,9 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class MetadataIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; diff --git a/dlp/src/test/java/com/example/dlp/QuickStartIT.java b/dlp/src/test/java/com/example/dlp/QuickStartIT.java index 1fa9d7b36d6..2e6c16f5802 100644 --- a/dlp/src/test/java/com/example/dlp/QuickStartIT.java +++ b/dlp/src/test/java/com/example/dlp/QuickStartIT.java @@ -28,9 +28,10 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class QuickStartIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; diff --git a/dlp/src/test/java/com/example/dlp/RedactIT.java b/dlp/src/test/java/com/example/dlp/RedactIT.java index 798c11de258..5c68b9ebd1c 100644 --- a/dlp/src/test/java/com/example/dlp/RedactIT.java +++ b/dlp/src/test/java/com/example/dlp/RedactIT.java @@ -31,9 +31,10 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class RedactIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index 58a1bbb22a2..a6eb0491cb0 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -29,9 +29,10 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class RiskAnalysisIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; @@ -47,40 +48,46 @@ public void setUp() { @Test public void testNumericalStats() throws Exception { - RiskAnalysis.main(new String[] { - "-n", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-columnName", "Age" - }); + RiskAnalysis.main( + new String[] { + "-n", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-columnName", "Age" + }); String output = bout.toString(); - assertTrue(Pattern.compile( - "Value at 0% quantile: integer_value: \\d{2}").matcher(output).find()); - assertTrue(Pattern.compile( - "Value at \\d{2}% quantile: integer_value: \\d{2}").matcher(output).find()); + assertTrue( + Pattern.compile("Value at 0% quantile: integer_value: \\d{2}").matcher(output).find()); + assertTrue( + Pattern.compile("Value at \\d{2}% quantile: integer_value: \\d{2}").matcher(output).find()); } @Test public void testCategoricalStats() throws Exception { - RiskAnalysis.main(new String[] { - "-c", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-columnName", "Mystery" - }); + RiskAnalysis.main( + new String[] { + "-c", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-columnName", + "Mystery" + }); String output = bout.toString(); - assertTrue(Pattern.compile( - "Most common value occurs \\d time\\(s\\)").matcher(output).find()); + assertTrue(Pattern.compile("Most common value occurs \\d time\\(s\\)").matcher(output).find()); } @Test public void testKAnonymity() throws Exception { - RiskAnalysis.main(new String[] { - "-k", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-quasiIdColumnNames", "Age", "Mystery" - }); + RiskAnalysis.main( + new String[] { + "-k", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-quasiIdColumnNames", + "Age", + "Mystery" + }); String output = bout.toString(); assertTrue(Pattern.compile("Bucket size range: \\[\\d, \\d\\]").matcher(output).find()); assertTrue(output.contains("Quasi-ID values: integer_value: 19")); @@ -89,13 +96,19 @@ public void testKAnonymity() throws Exception { @Test public void testLDiversity() throws Exception { - RiskAnalysis.main(new String[] { - "-l", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-sensitiveAttribute", "Name", - "-quasiIdColumnNames", "Age", "Mystery" - }); + RiskAnalysis.main( + new String[] { + "-l", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-sensitiveAttribute", + "Name", + "-quasiIdColumnNames", + "Age", + "Mystery" + }); String output = bout.toString(); assertTrue(output.contains("Quasi-ID values: integer_value: 19")); assertTrue(output.contains("Class size: 1")); From 284902e105ace15d97f1833fcfe63b213cdc0931 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Tue, 13 Mar 2018 09:43:20 -0700 Subject: [PATCH 02/23] Update to most recent versioning. --- dlp/pom.xml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dlp/pom.xml b/dlp/pom.xml index d749d86159e..d345c5203ea 100644 --- a/dlp/pom.xml +++ b/dlp/pom.xml @@ -41,16 +41,14 @@ - com.google.cloud google-cloud-dlp - 0.35.1-beta-SNAPSHOT + 0.38.1-beta-SNAPSHOT - com.google.cloud google-cloud-pubsub - 0.35.1-beta-SNAPSHOT + 0.38.1-beta-SNAPSHOT commons-cli From 184c006adee31afb39b5d34d7d14b4509cf17ca3 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Tue, 13 Mar 2018 13:43:37 -0700 Subject: [PATCH 03/23] Updated DeIdentification samples and tests. --- .../com/example/dlp/DeIdentification.java | 65 +++++++++---------- .../com/example/dlp/DeIdentificationIT.java | 31 +++++++-- dlp/src/test/resources/dates.csv | 0 3 files changed, 56 insertions(+), 40 deletions(-) create mode 100644 dlp/src/test/resources/dates.csv diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index cbd0c2c5f1e..16dd8059c5d 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -77,9 +77,7 @@ public class DeIdentification { * @param projectId ID of Google Cloud project to run the API under. */ private static void deIdentifyWithMask( - String string, - Character maskingCharacter, - int numberToMask) { + String string, Character maskingCharacter, int numberToMask, String projectId) { // [START dlp_deidentify_masking] /** * Deidentify a string by masking sensitive information with a character using the DLP API. @@ -95,7 +93,6 @@ private static void deIdentifyWithMask( // string = "My SSN is 372819127"; // numberToMask = 5; // maskingCharacter = 'x'; - ByteContentItem byteContentItem = ByteContentItem.newBuilder() .setType(ByteContentItem.BytesType.TEXT_UTF8) @@ -112,7 +109,9 @@ private static void deIdentifyWithMask( // Create the deidentification transformation configuration PrimitiveTransformation primitiveTransformation = - PrimitiveTransformation.newBuilder().setCharacterMaskConfig(characterMaskConfig).build(); + PrimitiveTransformation.newBuilder() + .setCharacterMaskConfig(characterMaskConfig) + .build(); InfoTypeTransformation infoTypeTransformationObject = InfoTypeTransformation.newBuilder() @@ -124,15 +123,15 @@ private static void deIdentifyWithMask( .addTransformations(infoTypeTransformationObject) .build(); - // Create the deidentification request object DeidentifyConfig deidentifyConfig = DeidentifyConfig.newBuilder() .setInfoTypeTransformations(infoTypeTransformationArray) .build(); + // Create the deidentification request object DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() - .setParent(projectId) + .setParent(String.format("projects/%s", projectId)) .setDeidentifyConfig(deidentifyConfig) .setItem(contentItem) .build(); @@ -142,10 +141,11 @@ private static void deIdentifyWithMask( // Print the character-masked input value // e.g. "My SSN is 123456789" --> "My SSN is *********" - ContentItem item = response.getItem(); - System.out.println(item.getValue()); + String result = response.getItem().getByteItem().getData().toStringUtf8(); + System.out.println(result); } catch (Exception e) { System.out.println("Error in deidentifyWithMask: " + e.getMessage()); + System.out.println(e.getStackTrace()); } } // [END dlp_deidentify_mask] @@ -223,7 +223,7 @@ private static void deIdentifyWithFpe( DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() - .setParent(projectId) + .setParent(String.format("projects/%s", projectId)) .setDeidentifyConfig(deidentifyConfig) .setItem(contentItem) .build(); @@ -233,8 +233,8 @@ private static void deIdentifyWithFpe( // Print the deidentified input value // e.g. "My SSN is 123456789" --> "My SSN is 7261298621" - ContentItem item = response.getItem(); - System.out.println(item.getValue()); + String result = response.getItem().getByteItem().getData().toStringUtf8(); + System.out.println(result); } catch (Exception e) { System.out.println("Error in deidentifyWithFpe: " + e.getMessage()); } @@ -292,7 +292,7 @@ private static void deidentifyWithDateShift( KmsWrappedCryptoKey kmsWrappedCryptoKey = KmsWrappedCryptoKey.newBuilder() .setCryptoKeyName(keyName) - .setWrappedKey(ByteString.copyFromUtf8(wrappedKey)) + .setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey))) .build(); dateShiftConfigBuilder.setCryptoKey( CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build()); @@ -352,7 +352,7 @@ private static void deidentifyWithDateShift( DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() - .setParent(projectId) + .setParent(String.format("projects/%s", projectId)) .setDeidentifyConfig(deidentifyConfig) .setItem(tableItem) .build(); @@ -369,7 +369,7 @@ private static void deidentifyWithDateShift( File outputFile = outputCsvPath.toFile(); if (!outputFile.exists()) { - outputFile.mkdirs(); + outputFile.getParentFile().mkdirs(); outputFile.createNewFile(); } BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(outputFile)); @@ -379,27 +379,19 @@ private static void deidentifyWithDateShift( // write out each row for (Table.Row outputRow : outputRows) { - String row = - outputRow - .getValuesList() - .stream() - .map( - value -> - (value.getDateValue() != null) - ? (String.valueOf(value.getDateValue().getMonth()) - + "/" - + String.valueOf(value.getDateValue().getDay()) - + "/" - + String.valueOf(value.getDateValue().getYear())) - : value.getStringValue()) - .collect(Collectors.joining(",")); + String row = outputRow.getValuesList() + .stream() + .map(value -> value.getStringValue()) + .collect(Collectors.joining(",")); bufferedWriter.append(row + "\n"); } bufferedWriter.flush(); bufferedWriter.close(); - System.out.println("Successfully saved date-shift output to:" + outputCsvPath.getFileName()); + System.out.println("Successfully saved date-shift output to: " + outputCsvPath.getFileName()); + } catch (Exception e) { + System.out.println("Error in deidentifyWithDateShift: " + e.getMessage()); } } @@ -446,15 +438,16 @@ public static void main(String[] args) throws Exception { optionsGroup.setRequired(true); Option deidentifyMaskingOption = - new Option("m", "mask", true, "Deidentify with character masking"); + new Option("m", "mask", true, "Deidentify with character masking."); optionsGroup.addOption(deidentifyMaskingOption); - Option deidentifyFpeOption = new Option("f", "fpe", true, "Deidentify with FFX FPE"); + Option deidentifyFpeOption = + new Option("f", "fpe", true, "Deidentify with FFX FPE."); optionsGroup.addOption(deidentifyFpeOption); Option deidentifyDateShiftOption = - new Option( - "d", "date", true, "Deidentify dates in a CSV file by pseudorandomly shifting them."); + new Option("d", "date", false, "Deidentify dates in a CSV file."); + optionsGroup.addOption(deidentifyDateShiftOption); Options commandLineOptions = new Options(); commandLineOptions.addOptionGroup(optionsGroup); @@ -533,8 +526,8 @@ public static void main(String[] args) throws Exception { cmd.getOptionValue( alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); deIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId); - } // deidentify with date shift - else if (cmd.hasOption("d")) { + } else if (cmd.hasOption("d")) { + //deidentify with date shift String inputCsv = cmd.getOptionValue(inputCsvPathOption.getOpt()); String outputCsv = cmd.getOptionValue(outputCsvPathOption.getOpt()); diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index b5f427282a9..d53170453ab 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -16,7 +16,8 @@ package com.example.dlp; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; +import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -25,6 +26,7 @@ import java.io.PrintStream; import java.util.regex.Pattern; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -64,7 +66,7 @@ public void testDeidStringMasksCharacters() throws Exception { "-numberToMask", "5" }); String output = bout.toString(); - assertEquals(output, "My SSN is xxxxx9127\n"); + assertThat(output, containsString("My SSN is xxxxx9127")); } @Test @@ -77,8 +79,29 @@ public void testDeidStringPerformsFpe() throws Exception { "-keyName", keyName }); String output = bout.toString(); - assertFalse(output.contains(text)); - assertTrue(Pattern.compile("My SSN is \\w+").matcher(output).find()); + assertFalse( + "Response contains original SSN.", + output.contains("372819127")); + assertThat(output, containsString("My SSN is ")); + } + + @Test + public void testDeidentifyWithDateShift() throws Exception { + DeIdentification.main( + new String[] { + "-d", + "-inputCsvPath", "src/test/resources/dates.csv", + "-outputCsvPath", "src/test/resources/results.temp.csv", + "-dateFields", "birth_date,register_date", + "-lowerBoundDays", "5", + "-upperBoundDays", "5", + "-contextField", "name", + "-wrappedKey", wrappedKey, + "-keyName", keyName + }); + String output = bout.toString(); + assertThat( + output, containsString("Successfully saved date-shift output to: results.temp.csv")); } @After diff --git a/dlp/src/test/resources/dates.csv b/dlp/src/test/resources/dates.csv new file mode 100644 index 00000000000..e69de29bb2d From 1592ef4d1e36998380818092f16221c6ec0f3742 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Tue, 13 Mar 2018 14:31:35 -0700 Subject: [PATCH 04/23] Revert pubsub to public version. --- dlp/pom.xml | 2 +- dlp/src/main/java/com/example/dlp/DeIdentification.java | 7 ++++--- dlp/src/test/java/com/example/dlp/DeIdentificationIT.java | 5 +---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/dlp/pom.xml b/dlp/pom.xml index d345c5203ea..0e9f1aee795 100644 --- a/dlp/pom.xml +++ b/dlp/pom.xml @@ -48,7 +48,7 @@ com.google.cloud google-cloud-pubsub - 0.38.1-beta-SNAPSHOT + 0.38.0-beta commons-cli diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 16dd8059c5d..159d66ac326 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -35,6 +35,7 @@ import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation; import com.google.privacy.dlp.v2.KmsWrappedCryptoKey; import com.google.privacy.dlp.v2.PrimitiveTransformation; +import com.google.privacy.dlp.v2.ProjectName; import com.google.privacy.dlp.v2.RecordTransformations; import com.google.privacy.dlp.v2.Table; import com.google.privacy.dlp.v2.Value; @@ -131,7 +132,7 @@ private static void deIdentifyWithMask( // Create the deidentification request object DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() - .setParent(String.format("projects/%s", projectId)) + .setParent(ProjectName.of(projectId).toString()) .setDeidentifyConfig(deidentifyConfig) .setItem(contentItem) .build(); @@ -223,7 +224,7 @@ private static void deIdentifyWithFpe( DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() - .setParent(String.format("projects/%s", projectId)) + .setParent(ProjectName.of(projectId).toString()) .setDeidentifyConfig(deidentifyConfig) .setItem(contentItem) .build(); @@ -352,7 +353,7 @@ private static void deidentifyWithDateShift( DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() - .setParent(String.format("projects/%s", projectId)) + .setParent(ProjectName.of(projectId).toString()) .setDeidentifyConfig(deidentifyConfig) .setItem(tableItem) .build(); diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index d53170453ab..e62b113e464 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -16,17 +16,14 @@ package com.example.dlp; -import static org.junit.Assert.assertThat; import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertThat; import java.io.ByteArrayOutputStream; import java.io.PrintStream; -import java.util.regex.Pattern; import org.junit.After; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; From e38238bd39893a7fd8615ab9bc6e7328a079e484 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Wed, 14 Mar 2018 12:45:14 -0700 Subject: [PATCH 05/23] Fix Inspect samples/tests (minus pubsub). --- .../main/java/com/example/dlp/Inspect.java | 38 ++++++++++---- .../main/java/com/example/dlp/Metadata.java | 31 +++++------ .../java/com/example/dlp/RiskAnalysis.java | 5 +- .../test/java/com/example/dlp/InspectIT.java | 52 ++++++++++++++----- dlp/src/test/resources/dates.csv | 5 ++ 5 files changed, 90 insertions(+), 41 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index b9cabcbf23c..0ea44d63412 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -20,6 +20,8 @@ import com.google.cloud.ServiceOptions; import com.google.cloud.dlp.v2.DlpServiceClient; import com.google.cloud.pubsub.v1.Subscriber; +import com.google.privacy.dlp.v2.ProjectName; +import com.google.pubsub.v1.ProjectSubscriptionName; import com.google.privacy.dlp.v2.Action; import com.google.privacy.dlp.v2.BigQueryOptions; import com.google.privacy.dlp.v2.BigQueryTable; @@ -44,8 +46,11 @@ import com.google.privacy.dlp.v2.PartitionId; import com.google.privacy.dlp.v2.StorageConfig; import com.google.protobuf.ByteString; -import com.google.pubsub.v1.ProjectSubscriptionName; +import com.google.pubsub.v1.ProjectTopicName; +import com.google.pubsub.v1.TopicName; import java.net.URLConnection; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -76,7 +81,8 @@ private static void inspectString( Likelihood minLikelihood, int maxFindings, List infoTypes, - boolean includeQuote) { + boolean includeQuote, + String projectId) { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { InspectConfig.FindingLimits findingLimits = @@ -101,6 +107,7 @@ private static void inspectString( InspectContentRequest request = InspectContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setInspectConfig(inspectConfig) .setItem(contentItem) .build(); @@ -138,7 +145,8 @@ private static void inspectFile( Likelihood minLikelihood, int maxFindings, List infoTypes, - boolean includeQuote) { + boolean includeQuote, + String projectId) { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { // detect file mime type, default to application/octet-stream @@ -146,6 +154,7 @@ private static void inspectFile( if (mimeType == null) { mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); } + ByteContentItem.BytesType bytesType = ByteContentItem.BytesType.TEXT_UTF8; switch (mimeType) { @@ -163,7 +172,11 @@ private static void inspectFile( break; } - ByteContentItem byteContentItem = ByteContentItem.newBuilder().setType(bytesType).build(); + byte[] data = Files.readAllBytes(Paths.get(filePath)); + ByteContentItem byteContentItem = ByteContentItem.newBuilder() + .setType(bytesType) + .setData(ByteString.copyFrom(data)) + .build(); ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); InspectConfig.FindingLimits findingLimits = @@ -179,6 +192,7 @@ private static void inspectFile( InspectContentRequest request = InspectContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setInspectConfig(inspectConfig) .setItem(contentItem) .build(); @@ -270,7 +284,7 @@ private static void inspectGcsFile( // asynchronously submit an inspect job, and wait on results CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() - .setParent(projectId) + .setParent(ProjectName.of(projectId).toString()) .setInspectJob(inspectJobConfig) .build(); @@ -397,7 +411,7 @@ private static void inspectDatastore( // asynchronously submit an inspect job, and wait on results CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() - .setParent(projectId) + .setParent(ProjectName.of(projectId).toString()) .setInspectJob(inspectJobConfig) .build(); @@ -476,9 +490,11 @@ private static void inspectBigquery( .setLimits(findingLimits) .build(); - String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId); + ProjectTopicName topic = ProjectTopicName.of(projectId, topicId); Action.PublishToPubSub publishToPubSub = - Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build(); + Action.PublishToPubSub.newBuilder() + .setTopic(topic.toString()) + .build(); Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); @@ -492,7 +508,7 @@ private static void inspectBigquery( // asynchronously submit an inspect job, and wait on results CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() - .setParent(projectId) + .setParent(ProjectName.of(projectId).toString()) .setInspectJob(inspectJobConfig) .build(); @@ -631,10 +647,10 @@ public static void main(String[] args) throws Exception { // string inspection if (cmd.hasOption("s")) { String val = cmd.getOptionValue(stringOption.getOpt()); - inspectString(val, minLikelihood, maxFindings, infoTypesList, includeQuote); + inspectString(val, minLikelihood, maxFindings, infoTypesList, includeQuote, projectId); } else if (cmd.hasOption("f")) { String filePath = cmd.getOptionValue(fileOption.getOpt()); - inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, includeQuote); + inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, includeQuote, projectId); // gcs file inspection } else if (cmd.hasOption("gcs")) { String bucketName = cmd.getOptionValue(bucketNameOption.getOpt()); diff --git a/dlp/src/main/java/com/example/dlp/Metadata.java b/dlp/src/main/java/com/example/dlp/Metadata.java index 49247eb5843..8d219139d3c 100644 --- a/dlp/src/main/java/com/example/dlp/Metadata.java +++ b/dlp/src/main/java/com/example/dlp/Metadata.java @@ -50,21 +50,22 @@ private static void listInfoTypes(String filter, String languageCode) throws Exc // [END dlp_list_info_types] } - private static void listRootCategories(String languageCode) throws Exception { - // [START dlp_list_categories] - // Instantiate a DLP client - try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - // The BCP-47 language code to use, e.g. 'en-US' - // languageCode = 'en-US' - ListRootCategoriesResponse rootCategoriesResponse = - dlpClient.listRootCategories(languageCode); - for (CategoryDescription categoryDescription : rootCategoriesResponse.getCategoriesList()) { - System.out.println("Name : " + categoryDescription.getName()); - System.out.println("Display name : " + categoryDescription.getDisplayName()); - } - } - // [END dlp_list_categories] - } + // TODO + // private static void listRootCategories(String languageCode) throws Exception { + // // [START dlp_list_categories] + // // Instantiate a DLP client + // try (DlpServiceClient dlpClient = DlpServiceClient.create()) { + // // The BCP-47 language code to use, e.g. 'en-US' + // // languageCode = 'en-US' + // ListRootCategoriesResponse rootCategoriesResponse = + // dlpClient.listRootCategories(languageCode); + // for (CategoryDescription categoryDescription : rootCategoriesResponse.getCategoriesList()) { + // System.out.println("Name : " + categoryDescription.getName()); + // System.out.println("Display name : " + categoryDescription.getDisplayName()); + // } + // } + // // [END dlp_list_categories] + // } /** Retrieve infoTypes. */ public static void main(String[] args) throws Exception { diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index fbd78ad5946..8e4a0c57f46 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -43,7 +43,6 @@ import com.google.privacy.dlp.v2.RiskAnalysisJobConfig; import com.google.privacy.dlp.v2.Value; import com.google.privacy.dlp.v2.ValueFrequency; -import com.google.pubsub.v1.ProjectSubscriptionName; import java.util.Arrays; import java.util.List; import java.util.concurrent.ExecutionException; @@ -151,7 +150,7 @@ private static void waitOnJobCompletion( // wait for job completion final SettableApiFuture done = SettableApiFuture.create(); - // setup a Pub/Sub subscriber to listen on the job completion status +/* TODO // setup a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( ProjectSubscriptionName.newBuilder() @@ -166,7 +165,7 @@ private static void waitOnJobCompletion( done.set(true); } }) - .build(); + .build();*/ // wait for job completion done.get(); diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 7cd8b85ea11..5111bf11b09 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -39,6 +39,8 @@ public class InspectIT { // Update to Google Cloud Storage path containing test.txt private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; + private String topicId = "dlp-tests"; + private String subscriptionId = "dlp-test"; // Update to Google Cloud Datastore Kind containing an entity // with phone number and email address properties. @@ -56,7 +58,10 @@ public void setUp() { public void testStringInspectionReturnsInfoTypes() throws Exception { String text = "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; - Inspect.main(new String[] {"-s", text}); + Inspect.main(new String[] { + "-s", text, + "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); assertTrue(output.contains("PHONE_NUMBER")); assertTrue(output.contains("EMAIL_ADDRESS")); @@ -64,20 +69,21 @@ public void testStringInspectionReturnsInfoTypes() throws Exception { @Test public void testTextFileInspectionReturnsInfoTypes() throws Exception { - ClassLoader classLoader = getClass().getClassLoader(); - File file = new File(classLoader.getResource("test.txt").getFile()); - Inspect.main(new String[] {"-f", file.getAbsolutePath()}); + Inspect.main(new String[]{ + "-f", "src/test/resources/test.txt", + "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); assertTrue(output.contains("PHONE_NUMBER")); assertTrue(output.contains("EMAIL_ADDRESS")); } - @Ignore // TODO: b/69461298 @Test public void testImageFileInspectionReturnsInfoTypes() throws Exception { - ClassLoader classLoader = getClass().getClassLoader(); - File file = new File(classLoader.getResource("test.png").getFile()); - Inspect.main(new String[] {"-f", file.getAbsolutePath()}); + Inspect.main(new String[]{ + "-f", "src/test/resources/test.png", + "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); assertTrue(output.contains("PHONE_NUMBER")); assertTrue(output.contains("EMAIL_ADDRESS")); @@ -85,8 +91,16 @@ public void testImageFileInspectionReturnsInfoTypes() throws Exception { // Requires that bucket by the specified name exists @Test + @Ignore // TODO: Fix Pubsub public void testGcsFileInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] {"-gcs", "-bucketName", bucketName, "-fileName", "test.txt"}); + Inspect.main(new String[] { + "-gcs", + "-bucketName", bucketName, + "-topicId", topicId, + "-subscriptionId", subscriptionId, + "-fileName", "test.txt", + "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); assertTrue(output.contains("PHONE_NUMBER")); assertTrue(output.contains("EMAIL_ADDRESS")); @@ -95,17 +109,31 @@ public void testGcsFileInspectionReturnsInfoTypes() throws Exception { // Requires a Datastore kind containing an entity // with phone number and email address properties. @Test + @Ignore // TODO: Fix Pubsub public void testDatastoreInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] {"-ds", "-kind", datastoreKind}); + Inspect.main(new String[] { + "-ds", + "-kind", datastoreKind, + "-topicId", topicId, + "-subscriptionId", subscriptionId, + "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); assertTrue(output.contains("PHONE_NUMBER")); assertTrue(output.contains("EMAIL_ADDRESS")); } @Test + @Ignore // TODO: Fix Pubsub public void testBigqueryInspectionReturnsInfoTypes() throws Exception { - Inspect.main( - new String[] {"-bq", "-datasetId", "integration_tests_dlp", "-tableId", "harmful"}); + Inspect.main(new String[] { + "-bq", + "-datasetId", "integration_tests_dlp", + "-topicId", topicId, + "-subscriptionId", subscriptionId, + "-tableId", "harmful", + "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); assertTrue(output.contains("PHONE_NUMBER")); } diff --git a/dlp/src/test/resources/dates.csv b/dlp/src/test/resources/dates.csv index e69de29bb2d..676c2b4567f 100644 --- a/dlp/src/test/resources/dates.csv +++ b/dlp/src/test/resources/dates.csv @@ -0,0 +1,5 @@ +name,birth_date,credit_card,register_date +Ann,01/01/1970,4532908762519852,07/21/1996 +James,03/06/1988,4301261899725540,04/09/2001 +Dan,08/14/1945,4620761856015295,11/15/2011 +Laura,11/03/1992,4564981067258901,01/04/2017 \ No newline at end of file From d99f68046ab16dc6547898dc22642da10e1ba971 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Wed, 14 Mar 2018 14:57:53 -0700 Subject: [PATCH 06/23] Updated Jobs and add tests. --- dlp/src/main/java/com/example/dlp/Jobs.java | 23 +++++++++++-------- dlp/src/test/java/com/example/dlp/JobsIT.java | 5 ++++ 2 files changed, 18 insertions(+), 10 deletions(-) create mode 100644 dlp/src/test/java/com/example/dlp/JobsIT.java diff --git a/dlp/src/main/java/com/example/dlp/Jobs.java b/dlp/src/main/java/com/example/dlp/Jobs.java index f2b4b2c106b..7d6a499870d 100644 --- a/dlp/src/main/java/com/example/dlp/Jobs.java +++ b/dlp/src/main/java/com/example/dlp/Jobs.java @@ -19,8 +19,10 @@ import com.google.cloud.dlp.v2.DlpServiceClient; import com.google.privacy.dlp.v2.DeleteDlpJobRequest; import com.google.privacy.dlp.v2.DlpJob; +import com.google.privacy.dlp.v2.DlpJobName; import com.google.privacy.dlp.v2.DlpJobType; import com.google.privacy.dlp.v2.ListDlpJobsRequest; +import com.google.privacy.dlp.v2.ProjectName; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; @@ -32,10 +34,10 @@ public class Jobs { + // [START dlp_list_jobs] private static void listJobs(String projectId, String filter, DlpJobType jobType) throws Exception { /** - * [START dlp_list_jobs] * * List DLP jobs * @@ -47,54 +49,55 @@ private static void listJobs(String projectId, String filter, DlpJobType jobType try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { ListDlpJobsRequest listDlpJobsRequest = ListDlpJobsRequest.newBuilder() - .setParent(projectId) + .setParent(ProjectName.of(projectId).toString()) .setFilter(filter) .setType(jobType) .build(); DlpServiceClient.ListDlpJobsPagedResponse response = dlpServiceClient.listDlpJobs(listDlpJobsRequest); for (DlpJob dlpJob : response.getPage().getValues()) { - System.out.println("Job name: " + dlpJob.getState()); - System.out.println("Job state: " + dlpJob.getState()); + System.out.println(dlpJob.getName() + " -- " + dlpJob.getState()); } } } // [END dlp_list_jobs] /** - * [START dlp_delete_job] * * Delete a DLP Job * * @param projectId Google Cloud ProjectID * @param jobId DLP Job ID */ + // [START dlp_delete_job] private static void deleteJob(String projectId, String jobId) { try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { // construct complete job name - String jobName = String.format("projects/%s/dlpJobs/%s", projectId, jobId); + DlpJobName job = DlpJobName.of(projectId, jobId); DeleteDlpJobRequest deleteDlpJobRequest = - DeleteDlpJobRequest.newBuilder().setName(jobName).build(); + DeleteDlpJobRequest.newBuilder().setName(job.toString()).build(); // submit job deletion request dlpServiceClient.deleteDlpJob(deleteDlpJobRequest); + + System.out.println("Job deleted successfully."); } catch (Exception e) { System.err.println("Error deleting DLP job: " + e.getMessage()); } - // [END dlp_delete_job] } + // [END dlp_delete_job] /** Command line application to list and delete DLP jobs the Data Loss Prevention API. */ public static void main(String[] args) throws Exception { OptionGroup optionsGroup = new OptionGroup(); optionsGroup.setRequired(true); - Option listOption = new Option("l", "list", true, "List DLP Jobs"); + Option listOption = new Option("l", "list", false, "List DLP Jobs"); optionsGroup.addOption(listOption); - Option deleteOption = new Option("d", "delete", true, "Delete DLP Jobs"); + Option deleteOption = new Option("d", "delete", false, "Delete DLP Jobs"); optionsGroup.addOption(deleteOption); Options commandLineOptions = new Options(); diff --git a/dlp/src/test/java/com/example/dlp/JobsIT.java b/dlp/src/test/java/com/example/dlp/JobsIT.java new file mode 100644 index 00000000000..31fca6758b3 --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/JobsIT.java @@ -0,0 +1,5 @@ +package com.example.dlp; + +public class JobsIT { + +} From fb7261f945c6f7fef75180cb217a5ec7617ff503 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Wed, 14 Mar 2018 15:23:00 -0700 Subject: [PATCH 07/23] Updated Metadata classes. --- .../main/java/com/example/dlp/Metadata.java | 27 +++++-------------- .../test/java/com/example/dlp/MetadataIT.java | 21 +++++++-------- 2 files changed, 15 insertions(+), 33 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/Metadata.java b/dlp/src/main/java/com/example/dlp/Metadata.java index 8d219139d3c..154aeae6ba1 100644 --- a/dlp/src/main/java/com/example/dlp/Metadata.java +++ b/dlp/src/main/java/com/example/dlp/Metadata.java @@ -38,8 +38,10 @@ private static void listInfoTypes(String filter, String languageCode) throws Exc // The category of info types to list, e.g. category = 'GOVERNMENT'; // Optional BCP-47 language code for localized info type friendly names, e.g. 'en-US' // filter supported_by=INSPECT - ListInfoTypesRequest listInfoTypesRequest = - ListInfoTypesRequest.newBuilder().setFilter(filter).setLanguageCode(languageCode).build(); + ListInfoTypesRequest listInfoTypesRequest = ListInfoTypesRequest.newBuilder() + .setFilter(filter) + .setLanguageCode(languageCode) + .build(); ListInfoTypesResponse infoTypesResponse = dlpClient.listInfoTypes(listInfoTypesRequest); List infoTypeDescriptions = infoTypesResponse.getInfoTypesList(); for (InfoTypeDescription infoTypeDescription : infoTypeDescriptions) { @@ -50,27 +52,10 @@ private static void listInfoTypes(String filter, String languageCode) throws Exc // [END dlp_list_info_types] } - // TODO - // private static void listRootCategories(String languageCode) throws Exception { - // // [START dlp_list_categories] - // // Instantiate a DLP client - // try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - // // The BCP-47 language code to use, e.g. 'en-US' - // // languageCode = 'en-US' - // ListRootCategoriesResponse rootCategoriesResponse = - // dlpClient.listRootCategories(languageCode); - // for (CategoryDescription categoryDescription : rootCategoriesResponse.getCategoriesList()) { - // System.out.println("Name : " + categoryDescription.getName()); - // System.out.println("Display name : " + categoryDescription.getDisplayName()); - // } - // } - // // [END dlp_list_categories] - // } - /** Retrieve infoTypes. */ public static void main(String[] args) throws Exception { Options options = new Options(); - Option languageCodeOption = Option.builder("language").hasArg(true).required(true).build(); + Option languageCodeOption = Option.builder("language").hasArg(true).required(false).build(); options.addOption(languageCodeOption); Option filterOption = Option.builder("filter").hasArg(true).required(false).build(); @@ -90,6 +75,6 @@ public static void main(String[] args) throws Exception { String languageCode = cmd.getOptionValue(languageCodeOption.getOpt(), "en-US"); String filter = cmd.getOptionValue(filterOption.getOpt(), ""); - listInfoTypes(languageCode, filter); + listInfoTypes(filter, languageCode); } } diff --git a/dlp/src/test/java/com/example/dlp/MetadataIT.java b/dlp/src/test/java/com/example/dlp/MetadataIT.java index eeacbf35e9a..fec3bfa8715 100644 --- a/dlp/src/test/java/com/example/dlp/MetadataIT.java +++ b/dlp/src/test/java/com/example/dlp/MetadataIT.java @@ -43,23 +43,20 @@ public void setUp() { assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); } - @Test - public void testRootCategoriesAreRetrieved() throws Exception { - Metadata.main(new String[] {}); - String output = bout.toString(); - assertTrue(output.contains("GOVERNMENT")); - assertTrue(output.contains("HEALTH")); + @After + public void tearDown() { + System.setOut(null); + bout.reset(); } @Test - public void testInfoTypesAreRetrieved() throws Exception { - Metadata.main(new String[] {"-category", "GOVERNMENT"}); + public void testListInfoTypes() throws Exception { + Metadata.main(new String[] { + "-language", "en-US", + "-filter", "supported_by=INSPECT" + }); String output = bout.toString(); assertTrue(output.contains("Name") && output.contains("Display name")); } - @After - public void tearDown() { - System.setOut(null); - } } From f4b54b2fd531e5ba73cad6672b032aa9ccf24a4a Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Thu, 15 Mar 2018 08:40:31 -0700 Subject: [PATCH 08/23] Updated QuickStart tests and samples. --- dlp/src/main/java/com/example/dlp/QuickStart.java | 8 +++++++- dlp/src/test/java/com/example/dlp/QuickStartIT.java | 5 ++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/QuickStart.java b/dlp/src/main/java/com/example/dlp/QuickStart.java index 375a3148823..3ead1b1b4d6 100644 --- a/dlp/src/main/java/com/example/dlp/QuickStart.java +++ b/dlp/src/main/java/com/example/dlp/QuickStart.java @@ -16,6 +16,7 @@ package com.example.dlp; +import com.google.cloud.ServiceOptions; import com.google.cloud.dlp.v2.DlpServiceClient; import com.google.privacy.dlp.v2.ByteContentItem; import com.google.privacy.dlp.v2.ContentItem; @@ -26,6 +27,7 @@ import com.google.privacy.dlp.v2.InspectContentResponse; import com.google.privacy.dlp.v2.InspectResult; import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.ProjectName; import com.google.protobuf.ByteString; import java.util.Arrays; import java.util.List; @@ -59,7 +61,9 @@ public static void main(String[] args) throws Exception { try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { InspectConfig.FindingLimits findingLimits = - InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerItem(maxFindings).build(); + InspectConfig.FindingLimits.newBuilder() + .setMaxFindingsPerItem(maxFindings) + .build(); InspectConfig inspectConfig = InspectConfig.newBuilder() @@ -76,8 +80,10 @@ public static void main(String[] args) throws Exception { .build(); ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); + String projectId = ServiceOptions.getDefaultProjectId(); InspectContentRequest request = InspectContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setInspectConfig(inspectConfig) .setItem(contentItem) .build(); diff --git a/dlp/src/test/java/com/example/dlp/QuickStartIT.java b/dlp/src/test/java/com/example/dlp/QuickStartIT.java index 2e6c16f5802..dee02ce404d 100644 --- a/dlp/src/test/java/com/example/dlp/QuickStartIT.java +++ b/dlp/src/test/java/com/example/dlp/QuickStartIT.java @@ -16,7 +16,9 @@ package com.example.dlp; +import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; @@ -47,7 +49,8 @@ public void setUp() { public void testQuickStart() throws Exception { QuickStart.main(new String[] {}); String output = bout.toString(); - assertTrue(output.contains("US_MALE_NAME")); + + assertThat(output, containsString("US_MALE_NAME")); } @After From d88015d9f62356160cadb49c1ef17dd2b7b05700 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Thu, 15 Mar 2018 09:47:21 -0700 Subject: [PATCH 09/23] Updated Redact samples and tests. --- dlp/src/main/java/com/example/dlp/Redact.java | 6 ++- .../test/java/com/example/dlp/RedactIT.java | 46 ++++++++----------- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/Redact.java b/dlp/src/main/java/com/example/dlp/Redact.java index 63ff2e5e5e5..0bda7dac2c0 100644 --- a/dlp/src/main/java/com/example/dlp/Redact.java +++ b/dlp/src/main/java/com/example/dlp/Redact.java @@ -22,6 +22,7 @@ import com.google.privacy.dlp.v2.InfoType; import com.google.privacy.dlp.v2.InspectConfig; import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.ProjectName; import com.google.privacy.dlp.v2.RedactImageRequest; import com.google.privacy.dlp.v2.RedactImageResponse; import com.google.protobuf.ByteString; @@ -110,7 +111,7 @@ private static void redactImage( RedactImageRequest redactImageRequest = RedactImageRequest.newBuilder() - .setParent(projectId) + .setParent(ProjectName.of(projectId).toString()) .addAllImageRedactionConfigs(imageRedactionConfigs) .setByteItem(byteContentItem) .setInspectConfig(inspectConfig) @@ -142,11 +143,12 @@ public static void main(String[] args) throws Exception { commandLineOptions.addOption(infoTypesOption); Option inputFilePathOption = - Option.builder("o").hasArg(true).longOpt("inputFilePath").required(false).build(); + Option.builder("f").hasArg(true).longOpt("inputFilePath").required(false).build(); commandLineOptions.addOption(inputFilePathOption); Option outputFilePathOption = Option.builder("o").hasArg(true).longOpt("outputFilePath").required(false).build(); + commandLineOptions.addOption(outputFilePathOption); Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); diff --git a/dlp/src/test/java/com/example/dlp/RedactIT.java b/dlp/src/test/java/com/example/dlp/RedactIT.java index 5c68b9ebd1c..6cd277aeef0 100644 --- a/dlp/src/test/java/com/example/dlp/RedactIT.java +++ b/dlp/src/test/java/com/example/dlp/RedactIT.java @@ -17,7 +17,10 @@ package com.example.dlp; import static junit.framework.TestCase.assertFalse; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.not; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; @@ -47,36 +50,27 @@ public void setUp() { } @Test - public void testInfoTypesInStringAreReplaced() throws Exception { - String text = - "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; - Redact.main(new String[] {"-s", text, "-r", "_REDACTED_"}); - String output = bout.toString(); - assertTrue(output.contains("My phone number is _REDACTED_ and my email address is _REDACTED_")); - } - - @Ignore // TODO: b/69461298 - @Test - public void testInfoTypesInImageAreReplaced() throws Exception { - ClassLoader classLoader = getClass().getClassLoader(); - // confirm that current data contains info types - File file = new File(classLoader.getResource("test.png").getFile()); - Inspect.main(new String[] {"-f", file.getAbsolutePath()}); - String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); - bout.reset(); - - String outputFilePath = "output.png"; + public void testRedactImage() throws Exception { + // InspectIT Tests verify original has PII present + String outputFilePath = "src/test/resources/output.png"; + // Restrict phone number, but not email Redact.main( new String[] { - "-f", file.getAbsolutePath(), "-infoTypes", "PHONE_NUMBER", "-o", outputFilePath + "-f", "src/test/resources/test.png", + "-infoTypes", "PHONE_NUMBER", + "-o", outputFilePath }); - Inspect.main(new String[] {"-f", outputFilePath}); - output = bout.toString(); - assertFalse(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + bout.reset(); + + // Verify that phone_number is missing but email is present + Inspect.main(new String[] { + "-f", outputFilePath, + "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); + String output = bout.toString(); + assertThat(output, not(containsString("PHONE_NUMBER"))); + assertThat(output, containsString("EMAIL_ADDRESS")); } @After From d44100cdcb925c98104d621d88c7dc08d310bdc1 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Thu, 15 Mar 2018 10:12:17 -0700 Subject: [PATCH 10/23] Updated RiskAnalysis. --- .../java/com/example/dlp/RiskAnalysis.java | 50 ++++++++------- .../java/com/example/dlp/RiskAnalysisIT.java | 61 +++++++++++-------- 2 files changed, 62 insertions(+), 49 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index 8e4a0c57f46..3996d51f637 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -40,9 +40,12 @@ import com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig; import com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig; import com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig; +import com.google.privacy.dlp.v2.ProjectName; import com.google.privacy.dlp.v2.RiskAnalysisJobConfig; import com.google.privacy.dlp.v2.Value; import com.google.privacy.dlp.v2.ValueFrequency; +import com.google.pubsub.v1.ProjectSubscriptionName; +import com.google.pubsub.v1.ProjectTopicName; import java.util.Arrays; import java.util.List; import java.util.concurrent.ExecutionException; @@ -112,8 +115,8 @@ private static void calculateNumericalStats( CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setRiskJob(riskAnalysisJobConfig) - .setParent(projectId) .build(); DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); @@ -143,34 +146,35 @@ private static void calculateNumericalStats( } } + // [START wait_on_dlp_job_completion] + // wait on receiving a job status update over a Google Cloud Pub/Sub subscriber private static void waitOnJobCompletion( String projectId, String subscriptionId, String dlpJobName) throws InterruptedException, ExecutionException { - // [START wait_on_dlp_job_completion] // wait for job completion final SettableApiFuture done = SettableApiFuture.create(); -/* TODO // setup a Pub/Sub subscriber to listen on the job completion status + // setup a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.newBuilder() - .setProject(projectId) - .setSubscription(subscriptionId) - .build(), - (pubsubMessage, ackReplyConsumer) -> { - ackReplyConsumer.ack(); - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { - // notify job completion - done.set(true); - } - }) - .build();*/ + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + ackReplyConsumer.ack(); + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + } + }) + .build(); // wait for job completion done.get(); - // [END wait_on_dlp_job_completion] } + // [END wait_on_dlp_job_completion] private static void calculateCategoricalStats( String projectId, @@ -213,9 +217,11 @@ private static void calculateCategoricalStats( PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setCategoricalStatsConfig(categoricalStatsConfig).build(); - String topicName = String.format("projects/%s/topics/%s", projectId, topicId); + ProjectTopicName topicName = ProjectTopicName.of(projectId, topicId); - PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder() + .setTopic(topicName.toString()) + .build(); // create /action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); @@ -229,8 +235,8 @@ private static void calculateCategoricalStats( CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setRiskJob(riskAnalysisJobConfig) - .setParent(projectId) .build(); DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); @@ -329,8 +335,8 @@ private static void calculateKAnonymity( CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setRiskJob(riskAnalysisJobConfig) - .setParent(projectId) .build(); DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); @@ -438,8 +444,8 @@ private static void calculateLDiversity( CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setRiskJob(riskAnalysisJobConfig) - .setParent(projectId) .build(); DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index a6eb0491cb0..7727d6e4ffe 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -16,6 +16,7 @@ package com.example.dlp; +import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -24,6 +25,7 @@ import java.util.regex.Pattern; import org.junit.After; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -36,6 +38,10 @@ public class RiskAnalysisIT { private ByteArrayOutputStream bout; private PrintStream out; + private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; + private String topicId = "dlp-tests"; + private String subscriptionId = "dlp-test"; + @Before public void setUp() { bout = new ByteArrayOutputStream(); @@ -47,10 +53,15 @@ public void setUp() { } @Test + @Ignore // TODO: Fix Pubsub public void testNumericalStats() throws Exception { RiskAnalysis.main( new String[] { - "-n", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-columnName", "Age" + "-n", "-datasetId", "integration_tests_dlp", + "-tableId", "harmful", + "-columnName", "Age", + "-topicId", topicId, + "-subscriptionId", subscriptionId }); String output = bout.toString(); assertTrue( @@ -60,34 +71,32 @@ public void testNumericalStats() throws Exception { } @Test + @Ignore // TODO: Fix Pubsub public void testCategoricalStats() throws Exception { RiskAnalysis.main( new String[] { "-c", - "-datasetId", - "integration_tests_dlp", - "-tableId", - "harmful", - "-columnName", - "Mystery" + "-datasetId", "integration_tests_dlp", + "-tableId", "harmful", + "-columnName", "Mystery", + "-topicId", topicId, + "-subscriptionId", subscriptionId }); String output = bout.toString(); assertTrue(Pattern.compile("Most common value occurs \\d time\\(s\\)").matcher(output).find()); } @Test + @Ignore // TODO: Fix Pubsub public void testKAnonymity() throws Exception { - RiskAnalysis.main( - new String[] { - "-k", - "-datasetId", - "integration_tests_dlp", - "-tableId", - "harmful", - "-quasiIdColumnNames", - "Age", - "Mystery" - }); + RiskAnalysis.main(new String[]{ + "-k", + "-datasetId", "integration_tests_dlp", + "-tableId", "harmful", + "-quasiIdColumnNames", "Age", "Mystery", + "-topicId", topicId, + "-subscriptionId", subscriptionId + }); String output = bout.toString(); assertTrue(Pattern.compile("Bucket size range: \\[\\d, \\d\\]").matcher(output).find()); assertTrue(output.contains("Quasi-ID values: integer_value: 19")); @@ -95,19 +104,17 @@ public void testKAnonymity() throws Exception { } @Test + @Ignore // TODO: Fix Pubsub public void testLDiversity() throws Exception { RiskAnalysis.main( new String[] { "-l", - "-datasetId", - "integration_tests_dlp", - "-tableId", - "harmful", - "-sensitiveAttribute", - "Name", - "-quasiIdColumnNames", - "Age", - "Mystery" + "-datasetId", "integration_tests_dlp", + "-tableId", "harmful", + "-sensitiveAttribute", "Name", + "-quasiIdColumnNames", "Age", "Mystery", + "-topicId", topicId, + "-subscriptionId", subscriptionId }); String output = bout.toString(); assertTrue(output.contains("Quasi-ID values: integer_value: 19")); From 268796c0750d2f3591b42730cb5b057dc6a88159 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Thu, 15 Mar 2018 12:41:36 -0700 Subject: [PATCH 11/23] Update Template samples. --- .../main/java/com/example/dlp/Templates.java | 49 +++++----- .../java/com/example/dlp/TemplatesIT.java | 95 +++++++++++++++++++ 2 files changed, 119 insertions(+), 25 deletions(-) create mode 100644 dlp/src/test/java/com/example/dlp/TemplatesIT.java diff --git a/dlp/src/main/java/com/example/dlp/Templates.java b/dlp/src/main/java/com/example/dlp/Templates.java index fbd33e58c4e..41709429a4e 100644 --- a/dlp/src/main/java/com/example/dlp/Templates.java +++ b/dlp/src/main/java/com/example/dlp/Templates.java @@ -25,6 +25,7 @@ import com.google.privacy.dlp.v2.Likelihood; import com.google.privacy.dlp.v2.ListInspectTemplatesRequest; import com.google.privacy.dlp.v2.ListInspectTemplatesResponse; +import com.google.privacy.dlp.v2.ProjectName; import java.util.ArrayList; import java.util.List; import org.apache.commons.cli.CommandLine; @@ -41,20 +42,19 @@ public class Templates { /** * [START dlp_create_template] * + * @param displayName (Optional) The human-readable name to give the template * @param projectId Google Cloud Project ID to call the API under * @param templateId (Optional) The name of the template to be created - * @param displayName (Optional) The human-readable name to give the template * @param infoTypeList The infoTypes of information to match - * @param includeQuote Whether to include the matching string * @param minLikelihood The minimum likelihood required before returning a match * @param maxFindings The maximum number of findings to report per request (0 = server maximum) */ private static void createInspectTemplate( - String projectId, - String templateId, String displayName, + String templateId, + String description, + String projectId, List infoTypeList, - boolean includeQuote, Likelihood minLikelihood, int maxFindings) { try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -67,7 +67,6 @@ private static void createInspectTemplate( InspectConfig.newBuilder() .addAllInfoTypes(infoTypeList) .setMinLikelihood(minLikelihood) - .setIncludeQuote(includeQuote) .setLimits(findingLimits) .build(); @@ -75,11 +74,12 @@ private static void createInspectTemplate( InspectTemplate.newBuilder() .setInspectConfig(inspectConfig) .setDisplayName(displayName) + .setDescription(description) .build(); CreateInspectTemplateRequest createInspectTemplateRequest = CreateInspectTemplateRequest.newBuilder() - .setParent(projectId) + .setParent(ProjectName.of(projectId).toString()) .setInspectTemplate(inspectTemplate) .setTemplateId(templateId) .build(); @@ -105,18 +105,20 @@ private static void listInspectTemplates(String projectId) { try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { ListInspectTemplatesRequest request = - ListInspectTemplatesRequest.newBuilder().setParent(projectId).setPageSize(1).build(); + ListInspectTemplatesRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setPageSize(1).build(); ListInspectTemplatesPagedResponse response = dlpServiceClient.listInspectTemplates(request); ListInspectTemplatesPage page = response.getPage(); ListInspectTemplatesResponse templatesResponse = page.getResponse(); for (InspectTemplate template : templatesResponse.getInspectTemplatesList()) { - System.out.printf("Template name: %s", template.getName()); + System.out.printf("Template name: %s\n", template.getName()); if (template.getDisplayName() != null) { - System.out.printf("Template display name: %s", template.getDisplayName()); - System.out.printf("Template create time: %s", template.getCreateTime()); - System.out.printf("Template update time: %s", template.getUpdateTime()); + System.out.printf("Template display name: %s \n", template.getDisplayName()); + System.out.printf("Template create time: %s \n", template.getCreateTime()); + System.out.printf("Template update time: %s \n", template.getUpdateTime()); // print inspection config InspectConfig inspectConfig = template.getInspectConfig(); @@ -167,13 +169,13 @@ public static void main(String[] args) throws Exception { OptionGroup optionsGroup = new OptionGroup(); optionsGroup.setRequired(true); - Option createOption = new Option("c", "create", true, "Create inspect template"); + Option createOption = new Option("c", "create", false, "Create inspect template"); optionsGroup.addOption(createOption); - Option listOption = new Option("l", "list", true, "List inspect templates"); + Option listOption = new Option("l", "list", false, "List inspect templates"); optionsGroup.addOption(listOption); - Option deleteOption = new Option("d", "delete", true, "Delete inspect template"); + Option deleteOption = new Option("d", "delete", false, "Delete inspect template"); optionsGroup.addOption(deleteOption); commandLineOptions.addOptionGroup(optionsGroup); @@ -190,6 +192,9 @@ public static void main(String[] args) throws Exception { Option templateIdOption = Option.builder("templateId").hasArg(true).required(false).build(); commandLineOptions.addOption(templateIdOption); + Option templateDescription = Option.builder("description").hasArg(true).required(false).build(); + commandLineOptions.addOption(templateDescription); + Option templateDisplayNameOption = Option.builder("displayName").hasArg(true).required(false).build(); commandLineOptions.addOption(templateDisplayNameOption); @@ -219,6 +224,7 @@ public static void main(String[] args) throws Exception { if (cmd.hasOption(createOption.getOpt())) { String templateId = cmd.getOptionValue(templateIdOption.getOpt()); String displayName = cmd.getOptionValue(templateDisplayNameOption.getOpt()); + String description = cmd.getOptionValue(templateDescription.getOpt()); Likelihood minLikelihood = Likelihood.valueOf( @@ -232,17 +238,10 @@ public static void main(String[] args) throws Exception { infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); } } - Boolean includeQuote = - Boolean.valueOf(cmd.getOptionValue(includeQuoteOption.getOpt(), "false")); - int maxFindings = Integer.valueOf(maxFindingsOption.getOpt(), 0); + int maxFindings = Integer.valueOf(cmd.getOptionValue(maxFindingsOption.getOpt(), "0")); createInspectTemplate( - projectId, - templateId, - displayName, - infoTypesList, - includeQuote, - minLikelihood, - maxFindings); + displayName, templateId, description, projectId, + infoTypesList, minLikelihood, maxFindings); } else if (cmd.hasOption(listOption.getOpt())) { listInspectTemplates(projectId); diff --git a/dlp/src/test/java/com/example/dlp/TemplatesIT.java b/dlp/src/test/java/com/example/dlp/TemplatesIT.java new file mode 100644 index 00000000000..912e71fdd14 --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/TemplatesIT.java @@ -0,0 +1,95 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.Date; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +// CHECKSTYLE OFF: AbbreviationAsWordInName +public class TemplatesIT { + // CHECKSTYLE ON: AbbreviationAsWordInName + + private ByteArrayOutputStream bout; + private PrintStream out; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } + + @Test + public void testCreateInspectTemplate() throws Exception { + Templates.main(new String[] { + "-c", + "-displayName", String.format("test-name-%s", new Date()), + "-templateId", String.format("template%s", System.currentTimeMillis()), + "-description", String.format("description-%s", new Date()) + }); + String output = bout.toString(); + assertThat(output, containsString("Template created: ")); + } + + @Test + public void testListInspectemplate() throws Exception { + Templates.main(new String[] { + "-l" + }); + String output = bout.toString(); + assertThat(output, containsString("Template name:")); + } + + @Test + public void testDeleteInspectTemplate() throws Exception { + // Extract a Template ID + Templates.main(new String[] { "-l" }); + String output = bout.toString(); + Matcher templateIds = Pattern.compile("template[0-9]+").matcher(output); + assertTrue(templateIds.find()); + String templateId = templateIds.group(0); + bout.reset(); + Templates.main(new String[] { + "-d", + "-templateId", templateId + }); + output = bout.toString(); + assertThat(output, containsString("Deleted template:")); + } + +} From 3dc8bad189771680d8f3c701f883097b0becf631 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Thu, 15 Mar 2018 13:18:29 -0700 Subject: [PATCH 12/23] Update trigger tests. --- .../main/java/com/example/dlp/Triggers.java | 62 +++++++---- .../test/java/com/example/dlp/TriggersIT.java | 101 ++++++++++++++++++ 2 files changed, 142 insertions(+), 21 deletions(-) create mode 100644 dlp/src/test/java/com/example/dlp/TriggersIT.java diff --git a/dlp/src/main/java/com/example/dlp/Triggers.java b/dlp/src/main/java/com/example/dlp/Triggers.java index 6eb36e0041c..d766be73c0f 100644 --- a/dlp/src/main/java/com/example/dlp/Triggers.java +++ b/dlp/src/main/java/com/example/dlp/Triggers.java @@ -16,6 +16,7 @@ package com.example.dlp; +import com.google.cloud.ServiceOptions; import com.google.cloud.dlp.v2.DlpServiceClient; import com.google.privacy.dlp.v2.CloudStorageOptions; import com.google.privacy.dlp.v2.CreateJobTriggerRequest; @@ -26,6 +27,8 @@ import com.google.privacy.dlp.v2.JobTrigger; import com.google.privacy.dlp.v2.Likelihood; import com.google.privacy.dlp.v2.ListJobTriggersRequest; +import com.google.privacy.dlp.v2.ProjectJobTriggerName; +import com.google.privacy.dlp.v2.ProjectName; import com.google.privacy.dlp.v2.Schedule; import com.google.privacy.dlp.v2.StorageConfig; import com.google.protobuf.Duration; @@ -50,7 +53,6 @@ public class Triggers { * @param triggerId (Optional) name of the trigger to be created * @param displayName (Optional) display name for the trigger to be created * @param description (Optional) description for the trigger to be created - * @param gcsUrl URL path to GCS bucket, eg. gs://my-bucket-name * @param scanPeriod How often to wait between scans, in days (minimum = 1 day) * @param infoTypes infoTypes of information to match eg. InfoType.PHONE_NUMBER, * InfoType.EMAIL_ADDRESS @@ -62,21 +64,24 @@ private static void createTrigger( String triggerId, String displayName, String description, - String gcsUrl, + String bucketName, + String fileName, int scanPeriod, List infoTypes, Likelihood minLikelihood, int maxFindings, - String projectId) { + String projectId) throws Exception { // instantiate a client - try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + DlpServiceClient dlpServiceClient = DlpServiceClient.create(); + try { - CloudStorageOptions.FileSet fileSet = - CloudStorageOptions.FileSet.newBuilder().setUrl(gcsUrl).build(); CloudStorageOptions cloudStorageOptions = - CloudStorageOptions.newBuilder().setFileSet(fileSet).build(); - + CloudStorageOptions.newBuilder() + .setFileSet( + CloudStorageOptions.FileSet.newBuilder() + .setUrl("gs://" + bucketName + "/" + fileName)) + .build(); StorageConfig storageConfig = StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build(); @@ -110,16 +115,17 @@ private static void createTrigger( .addTriggers(trigger) .build(); + System.out.println("Pause"); // Create scan request CreateJobTriggerRequest createJobTriggerRequest = CreateJobTriggerRequest.newBuilder() - .setParent(projectId) + .setParent(ProjectName.of(projectId).toString()) .setJobTrigger(jobTrigger) .build(); JobTrigger createdJobTrigger = dlpServiceClient.createJobTrigger(createJobTriggerRequest); - System.out.println("Created Trigger: " + createdJobTrigger.getDisplayName()); + System.out.println("Created Trigger: " + createdJobTrigger.getName()); } catch (Exception e) { System.out.println("Error creating trigger :" + e.getMessage()); } @@ -135,7 +141,8 @@ private static void listTriggers(String projectId) { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { ListJobTriggersRequest listJobTriggersRequest = - ListJobTriggersRequest.newBuilder().setParent(projectId).build(); + ListJobTriggersRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()).build(); DlpServiceClient.ListJobTriggersPagedResponse response = dlpServiceClient.listJobTriggers(listJobTriggersRequest); response @@ -172,11 +179,14 @@ private static void listTriggers(String projectId) { private static void deleteTrigger(String projectId, String triggerId) { // Instantiates a client // triggerName to provided as projects/project-id/jobTriggers/triggerId - String triggerName = String.format("projects/%s/jobTriggers/%s", projectId, triggerId); + + ProjectJobTriggerName triggerName = ProjectJobTriggerName.of(projectId, triggerId); try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { DeleteJobTriggerRequest deleteJobTriggerRequest = - DeleteJobTriggerRequest.newBuilder().setName(triggerName).build(); + DeleteJobTriggerRequest.newBuilder().setName(triggerName.toString()).build(); dlpServiceClient.deleteJobTrigger(deleteJobTriggerRequest); + + System.out.println("Trigger deleted: " + triggerName.toString()); } catch (Exception e) { System.out.println("Error deleting trigger :" + e.getMessage()); } @@ -191,20 +201,23 @@ public static void main(String[] args) throws Exception { optionsGroup.setRequired(true); Option createTriggerOption = - new Option("c", "create", true, "Create trigger to scan a GCS bucket"); + new Option("c", "create", false, "Create trigger to scan a GCS bucket"); optionsGroup.addOption(createTriggerOption); - Option listTriggersOption = new Option("l", "list", true, "List triggers"); + Option listTriggersOption = new Option("l", "list", false, "List triggers"); optionsGroup.addOption(listTriggersOption); - Option deleteTriggerOption = new Option("d", "delete", true, "Delete trigger"); + Option deleteTriggerOption = new Option("d", "delete", false, "Delete trigger"); optionsGroup.addOption(deleteTriggerOption); Options commandLineOptions = new Options(); commandLineOptions.addOptionGroup(optionsGroup); - Option gcsUrlOption = Option.builder("gcsUrl").hasArg(true).required(false).build(); - commandLineOptions.addOption(gcsUrlOption); + Option bucketNameOption = Option.builder("bucketName").hasArg(true).required(false).build(); + commandLineOptions.addOption(bucketNameOption); + + Option gcsFileNameOption = Option.builder("fileName").hasArg(true).required(false).build(); + commandLineOptions.addOption(gcsFileNameOption); Option minLikelihoodOption = Option.builder("minLikelihood").hasArg(true).required(false).build(); @@ -223,10 +236,14 @@ public static void main(String[] args) throws Exception { commandLineOptions.addOption(projectIdOption); Option triggerIdOption = Option.builder("triggerId").hasArg(true).required(false).build(); + commandLineOptions.addOption(triggerIdOption); Option displayNameOption = Option.builder("displayName").hasArg(true).required(false).build(); + commandLineOptions.addOption(displayNameOption); Option descriptionOption = Option.builder("description").hasArg(true).required(false).build(); + commandLineOptions.addOption(descriptionOption); Option scanPeriodOption = Option.builder("scanPeriod").hasArg(true).required(false).build(); + commandLineOptions.addOption(scanPeriodOption); CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); @@ -241,7 +258,8 @@ public static void main(String[] args) throws Exception { return; } - String projectId = cmd.getOptionValue(projectIdOption.getOpt()); + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); if (cmd.hasOption("c")) { Likelihood minLikelihood = Likelihood.valueOf( @@ -251,7 +269,8 @@ public static void main(String[] args) throws Exception { String triggerId = cmd.getOptionValue(triggerIdOption.getOpt()); String displayName = cmd.getOptionValue(displayNameOption.getOpt(), ""); String description = cmd.getOptionValue(descriptionOption.getOpt(), ""); - String gcsUrl = cmd.getOptionValue(gcsUrlOption.getOpt()); + String bucketName = cmd.getOptionValue(bucketNameOption.getOpt()); + String fileName = cmd.getOptionValue(gcsFileNameOption.getOpt()); int scanPeriod = Integer.valueOf(cmd.getOptionValue(scanPeriodOption.getOpt())); List infoTypesList = new ArrayList<>(); if (cmd.hasOption(infoTypesOption.getOpt())) { @@ -265,7 +284,8 @@ public static void main(String[] args) throws Exception { triggerId, displayName, description, - gcsUrl, + bucketName, + fileName, scanPeriod, infoTypesList, minLikelihood, diff --git a/dlp/src/test/java/com/example/dlp/TriggersIT.java b/dlp/src/test/java/com/example/dlp/TriggersIT.java new file mode 100644 index 00000000000..a8045d823d8 --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/TriggersIT.java @@ -0,0 +1,101 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.Date; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +// CHECKSTYLE OFF: AbbreviationAsWordInName +public class TriggersIT { + // CHECKSTYLE ON: AbbreviationAsWordInName + + private ByteArrayOutputStream bout; + private PrintStream out; + + private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; + private String topicId = "dlp-tests"; + private String subscriptionId = "dlp-test"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } + + @Test + public void testCreateTrigger() throws Exception { + Triggers.main(new String[] { + "-c", + "-displayName", String.format("trigger-name-%s", new Date()), + "-triggerId", String.format("trigger%s", System.currentTimeMillis()), + "-description", String.format("description-%s", new Date()), + "-bucketName", bucketName, + "-fileName", "test.txt", + "-scanPeriod", "1" + }); + String output = bout.toString(); + assertThat(output, containsString("Created Trigger:")); + } + + @Test + public void testListTrigger() throws Exception { + Triggers.main(new String[] { + "-l" + }); + String output = bout.toString(); + assertThat(output, containsString("Trigger:")); + } + + @Test + public void testDeleteTrigger() throws Exception { + Triggers.main(new String[] { "-l" }); + String output = bout.toString(); + Matcher templateIds = Pattern.compile("(?<=jobTriggers/)[0-9]+").matcher(output); + assertTrue(templateIds.find()); + String triggerId = templateIds.group(0); + bout.reset(); + Triggers.main(new String[] { + "-d", + "-triggerId", triggerId, + }); + output = bout.toString(); + assertThat(output, containsString("Trigger deleted:")); + } + +} From 678fd0b60a021626be919f5437d4c40a9ee4675d Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Thu, 15 Mar 2018 13:41:30 -0700 Subject: [PATCH 13/23] Make Checkstyle Happy Again. --- .../com/example/dlp/DeIdentification.java | 6 +- .../main/java/com/example/dlp/Inspect.java | 70 +++++++------- dlp/src/main/java/com/example/dlp/Jobs.java | 1 + dlp/src/main/java/com/example/dlp/Redact.java | 22 ++--- .../java/com/example/dlp/RiskAnalysis.java | 8 +- .../main/java/com/example/dlp/Templates.java | 28 +++--- .../com/example/dlp/DeIdentificationIT.java | 2 +- .../test/java/com/example/dlp/InspectIT.java | 6 +- dlp/src/test/java/com/example/dlp/JobsIT.java | 91 +++++++++++++++++++ .../java/com/example/dlp/QuickStartIT.java | 1 - .../test/java/com/example/dlp/RedactIT.java | 4 - .../java/com/example/dlp/RiskAnalysisIT.java | 3 - .../java/com/example/dlp/TemplatesIT.java | 2 +- .../test/java/com/example/dlp/TriggersIT.java | 3 +- 14 files changed, 163 insertions(+), 84 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 159d66ac326..20444d3f9f5 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -280,7 +280,7 @@ private static void deidentifyWithDateShift( // (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key // If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set // String keyName = - // 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; + // 'projects/PROJECT/locations/LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; // (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates // This key should be encrypted using the Cloud KMS key specified above @@ -303,8 +303,6 @@ private static void deidentifyWithDateShift( "You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!"); } - DateShiftConfig dateShiftConfig = dateShiftConfigBuilder.build(); - // Read and parse the CSV file // The first row of the file must specify column names, and all other rows // Path inputCsvFile = Paths.get("/path/to/file.csv"); @@ -334,6 +332,8 @@ private static void deidentifyWithDateShift( .map(field -> FieldId.newBuilder().setName(field).build()) .collect(Collectors.toList()); + DateShiftConfig dateShiftConfig = dateShiftConfigBuilder.build(); + FieldTransformation fieldTransformation = FieldTransformation.newBuilder() .addAllFields(dateFieldIds) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 0ea44d63412..8f7c5367d95 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -20,8 +20,6 @@ import com.google.cloud.ServiceOptions; import com.google.cloud.dlp.v2.DlpServiceClient; import com.google.cloud.pubsub.v1.Subscriber; -import com.google.privacy.dlp.v2.ProjectName; -import com.google.pubsub.v1.ProjectSubscriptionName; import com.google.privacy.dlp.v2.Action; import com.google.privacy.dlp.v2.BigQueryOptions; import com.google.privacy.dlp.v2.BigQueryTable; @@ -44,10 +42,11 @@ import com.google.privacy.dlp.v2.KindExpression; import com.google.privacy.dlp.v2.Likelihood; import com.google.privacy.dlp.v2.PartitionId; +import com.google.privacy.dlp.v2.ProjectName; import com.google.privacy.dlp.v2.StorageConfig; import com.google.protobuf.ByteString; +import com.google.pubsub.v1.ProjectSubscriptionName; import com.google.pubsub.v1.ProjectTopicName; -import com.google.pubsub.v1.TopicName; import java.net.URLConnection; import java.nio.file.Files; import java.nio.file.Paths; @@ -157,19 +156,14 @@ private static void inspectFile( ByteContentItem.BytesType bytesType = ByteContentItem.BytesType.TEXT_UTF8; - switch (mimeType) { - case "image/jpeg": - bytesType = ByteContentItem.BytesType.IMAGE_JPEG; - break; - case "image/bmp": - bytesType = ByteContentItem.BytesType.IMAGE_BMP; - break; - case "image/png": - bytesType = ByteContentItem.BytesType.IMAGE_PNG; - break; - case "image/svg": - bytesType = ByteContentItem.BytesType.IMAGE_SVG; - break; + if (mimeType.equals("image/jpeg")) { + bytesType = ByteContentItem.BytesType.IMAGE_JPEG; + } else if (mimeType.equals("image/bmp")) { + bytesType = ByteContentItem.BytesType.IMAGE_BMP; + } else if (mimeType.equals("image/png")) { + bytesType = ByteContentItem.BytesType.IMAGE_PNG; + } else if (mimeType.equals("image/svg")) { + bytesType = ByteContentItem.BytesType.IMAGE_SVG; } byte[] data = Files.readAllBytes(Paths.get(filePath)); @@ -325,18 +319,18 @@ private static void waitOnJobCompletion( // setup a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.newBuilder() - .setProject(projectId) - .setSubscription(subscriptionId) - .build(), - (pubsubMessage, ackReplyConsumer) -> { - ackReplyConsumer.ack(); - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { - // notify job completion - done.set(true); - } - }) + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + ackReplyConsumer.ack(); + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + } + }) .build(); // wait for job completion @@ -356,8 +350,7 @@ private static void waitOnJobCompletion( * @param maxFindings max number of findings * @param topicId Google Cloud Pub/Sub topic to notify job status updates * @param subscriptionId Google Cloud Pub/Sub subscription to above topic to receive status - * updates - * @throws Exception + * updates */ private static void inspectDatastore( String projectId, @@ -367,8 +360,7 @@ private static void inspectDatastore( List infoTypes, int maxFindings, String topicId, - String subscriptionId) - throws Exception { + String subscriptionId) { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -437,6 +429,8 @@ private static void inspectDatastore( } else { System.out.println("No findings."); } + } catch (Exception e) { + System.out.println("inspectDatastore Problems: " + e.getMessage()); } } // [END dlp_inspect_datastore] @@ -449,10 +443,9 @@ private static void inspectDatastore( * @param tableId The ID of the table to inspect, e.g. 'my_table' * @param minLikelihood The minimum likelihood required before returning a match * @param infoTypes The infoTypes of information to match - * @param maxFindings - * @param topicId - * @param subscriptionId - * @throws Exception + * @param maxFindings The maximum number of findings to report (0 = server maximum) + * @param topicId Topic ID for pubsub. + * @param subscriptionId Subscription ID for pubsub. */ private static void inspectBigquery( String projectId, @@ -462,8 +455,7 @@ private static void inspectBigquery( List infoTypes, int maxFindings, String topicId, - String subscriptionId) - throws Exception { + String subscriptionId) { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { // Reference to the BigQuery table @@ -535,6 +527,8 @@ private static void inspectBigquery( } else { System.out.println("No findings."); } + } catch (Exception e) { + System.out.println("inspectBigquery Problems: " + e.getMessage()); } } // [END dlp_inspect_bigquery] diff --git a/dlp/src/main/java/com/example/dlp/Jobs.java b/dlp/src/main/java/com/example/dlp/Jobs.java index 7d6a499870d..c039581015c 100644 --- a/dlp/src/main/java/com/example/dlp/Jobs.java +++ b/dlp/src/main/java/com/example/dlp/Jobs.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package com.example.dlp; import com.google.cloud.ServiceOptions; diff --git a/dlp/src/main/java/com/example/dlp/Redact.java b/dlp/src/main/java/com/example/dlp/Redact.java index 0bda7dac2c0..9e32349be7a 100644 --- a/dlp/src/main/java/com/example/dlp/Redact.java +++ b/dlp/src/main/java/com/example/dlp/Redact.java @@ -62,20 +62,16 @@ private static void redactImage( } ByteContentItem.BytesType bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED; - switch (mimeType) { - case "image/jpeg": - bytesType = ByteContentItem.BytesType.IMAGE_JPEG; - break; - case "image/bmp": - bytesType = ByteContentItem.BytesType.IMAGE_BMP; - break; - case "image/png": - bytesType = ByteContentItem.BytesType.IMAGE_PNG; - break; - case "image/svg": - bytesType = ByteContentItem.BytesType.IMAGE_SVG; - break; + if (mimeType.equals("image/jpeg")) { + bytesType = ByteContentItem.BytesType.IMAGE_JPEG; + } else if (mimeType.equals("image/bmp")) { + bytesType = ByteContentItem.BytesType.IMAGE_BMP; + } else if (mimeType.equals("image/png")) { + bytesType = ByteContentItem.BytesType.IMAGE_PNG; + } else if (mimeType.equals("image/svg")) { + bytesType = ByteContentItem.BytesType.IMAGE_SVG; } + byte[] data = Files.readAllBytes(Paths.get(filePath)); // The minimum likelihood required before redacting a match diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index 3996d51f637..27328c135b3 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -352,9 +352,9 @@ private static void calculateKAnonymity( System.out.println("Job status: " + completedJob.getState()); AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); - KAnonymityResult kAnonymityResult = riskDetails.getKAnonymityResult(); + KAnonymityResult kanonymityResult = riskDetails.getKAnonymityResult(); for (KAnonymityHistogramBucket result : - kAnonymityResult.getEquivalenceClassHistogramBucketsList()) { + kanonymityResult.getEquivalenceClassHistogramBucketsList()) { System.out.println( "Bucket size range: [" + result.getEquivalenceClassSizeLowerBound() @@ -461,9 +461,9 @@ private static void calculateLDiversity( System.out.println("Job status: " + completedJob.getState()); AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); - LDiversityResult lDiversityResult = riskDetails.getLDiversityResult(); + LDiversityResult ldiversityResult = riskDetails.getLDiversityResult(); for (LDiversityHistogramBucket result : - lDiversityResult.getSensitiveValueFrequencyHistogramBucketsList()) { + ldiversityResult.getSensitiveValueFrequencyHistogramBucketsList()) { for (LDiversityEquivalenceClass bucket : result.getBucketValuesList()) { List quasiIdValues = bucket diff --git a/dlp/src/main/java/com/example/dlp/Templates.java b/dlp/src/main/java/com/example/dlp/Templates.java index 41709429a4e..e697104a651 100644 --- a/dlp/src/main/java/com/example/dlp/Templates.java +++ b/dlp/src/main/java/com/example/dlp/Templates.java @@ -1,15 +1,19 @@ -/** - * Copyright 2018, Google, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you - * may not use this file except in compliance with the License. You may obtain a copy of the License - * at +/* + * Copyright 2018 Google Inc. * - *

http://www.apache.org/licenses/LICENSE-2.0 + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - *

Unless required by applicable law or agreed to in writing, software distributed under the - * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing permissions and + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and * limitations under the License. */ + package com.example.dlp; import com.google.cloud.ServiceOptions; @@ -162,9 +166,6 @@ private static void deleteInspectTemplate(String projectId, String templateId) { /** Command line application to create, list and delete DLP inspect templates. */ public static void main(String[] args) throws Exception { - Options commandLineOptions = new Options(); - - Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); OptionGroup optionsGroup = new OptionGroup(); optionsGroup.setRequired(true); @@ -178,11 +179,14 @@ public static void main(String[] args) throws Exception { Option deleteOption = new Option("d", "delete", false, "Delete inspect template"); optionsGroup.addOption(deleteOption); + Options commandLineOptions = new Options(); commandLineOptions.addOptionGroup(optionsGroup); + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + commandLineOptions.addOption(projectIdOption); + Option minLikelihoodOption = Option.builder("minLikelihood").hasArg(true).required(false).build(); - commandLineOptions.addOption(minLikelihoodOption); Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index e62b113e464..68c56f3f39d 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -89,7 +89,7 @@ public void testDeidentifyWithDateShift() throws Exception { "-d", "-inputCsvPath", "src/test/resources/dates.csv", "-outputCsvPath", "src/test/resources/results.temp.csv", - "-dateFields", "birth_date,register_date", + "-dateFields", "birth_date,register_date", "-lowerBoundDays", "5", "-upperBoundDays", "5", "-contextField", "name", diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 5111bf11b09..38cb33db3fb 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -20,7 +20,6 @@ import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; -import java.io.File; import java.io.PrintStream; import org.junit.After; import org.junit.Before; @@ -32,8 +31,8 @@ @RunWith(JUnit4.class) // CHECKSTYLE OFF: AbbreviationAsWordInName public class InspectIT { - // CHECKSTYLE ON: AbbreviationAsWordInName + private ByteArrayOutputStream bout; private PrintStream out; @@ -50,7 +49,8 @@ public class InspectIT { public void setUp() { bout = new ByteArrayOutputStream(); out = new PrintStream(bout); - System.setOut(out); // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH + System.setOut(out); + // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); } diff --git a/dlp/src/test/java/com/example/dlp/JobsIT.java b/dlp/src/test/java/com/example/dlp/JobsIT.java index 31fca6758b3..e13304f1c8b 100644 --- a/dlp/src/test/java/com/example/dlp/JobsIT.java +++ b/dlp/src/test/java/com/example/dlp/JobsIT.java @@ -1,5 +1,96 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.example.dlp; +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +// CHECKSTYLE OFF: AbbreviationAsWordInName +@RunWith(JUnit4.class) public class JobsIT { + // CHECKSTYLE ON: AbbreviationAsWordInName + + private ByteArrayOutputStream bout; + private PrintStream out; + + private static final Pattern jobIdPattern = Pattern.compile("projects/.*/dlpJobs/i-\\d+"); + + // Update to Google Cloud Storage path containing test.txt + private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; + + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } + + + @Test + public void testListJobs() throws Exception { + Jobs.main(new String[] { + "-l", + "-filter", "state=DONE" + }); + String output = bout.toString(); + Matcher matcher = jobIdPattern.matcher(bout.toString()); + assertTrue("List must contain results.", matcher.find()); + } + + @Test + public void testDeleteJobs() throws Exception { + // Get a list of JobIds, and extract one to delete + Jobs.main(new String[] { "-l", "-filter", "state=DONE"}); + String jobList = bout.toString(); + Matcher matcher = jobIdPattern.matcher(jobList); + assertTrue("List must contain results.", matcher.find()); + // Extract just the ID + String jobId = matcher.group(0).split("/")[3]; + bout.reset(); + + // Delete the Job + Jobs.main(new String[] { + "-d", + "-jobId", jobId + }); + String output = bout.toString(); + assertThat(output, containsString("Job deleted successfully.")); + } + } diff --git a/dlp/src/test/java/com/example/dlp/QuickStartIT.java b/dlp/src/test/java/com/example/dlp/QuickStartIT.java index dee02ce404d..5c22c64a781 100644 --- a/dlp/src/test/java/com/example/dlp/QuickStartIT.java +++ b/dlp/src/test/java/com/example/dlp/QuickStartIT.java @@ -19,7 +19,6 @@ import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.io.PrintStream; diff --git a/dlp/src/test/java/com/example/dlp/RedactIT.java b/dlp/src/test/java/com/example/dlp/RedactIT.java index 6cd277aeef0..e82dcfc6163 100644 --- a/dlp/src/test/java/com/example/dlp/RedactIT.java +++ b/dlp/src/test/java/com/example/dlp/RedactIT.java @@ -16,19 +16,15 @@ package com.example.dlp; -import static junit.framework.TestCase.assertFalse; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.not; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; -import java.io.File; import java.io.PrintStream; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index 7727d6e4ffe..5e164e3fe1a 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -16,7 +16,6 @@ package com.example.dlp; -import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -38,7 +37,6 @@ public class RiskAnalysisIT { private ByteArrayOutputStream bout; private PrintStream out; - private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; private String topicId = "dlp-tests"; private String subscriptionId = "dlp-test"; @@ -46,7 +44,6 @@ public class RiskAnalysisIT { public void setUp() { bout = new ByteArrayOutputStream(); out = new PrintStream(bout); - System.setOut(out); // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); assertNotNull(System.getenv("DLP_DEID_WRAPPED_KEY")); assertNotNull(System.getenv("DLP_DEID_KEY_NAME")); diff --git a/dlp/src/test/java/com/example/dlp/TemplatesIT.java b/dlp/src/test/java/com/example/dlp/TemplatesIT.java index 912e71fdd14..80627826727 100644 --- a/dlp/src/test/java/com/example/dlp/TemplatesIT.java +++ b/dlp/src/test/java/com/example/dlp/TemplatesIT.java @@ -35,7 +35,7 @@ @RunWith(JUnit4.class) // CHECKSTYLE OFF: AbbreviationAsWordInName public class TemplatesIT { - // CHECKSTYLE ON: AbbreviationAsWordInName + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; diff --git a/dlp/src/test/java/com/example/dlp/TriggersIT.java b/dlp/src/test/java/com/example/dlp/TriggersIT.java index a8045d823d8..27560361197 100644 --- a/dlp/src/test/java/com/example/dlp/TriggersIT.java +++ b/dlp/src/test/java/com/example/dlp/TriggersIT.java @@ -35,7 +35,8 @@ @RunWith(JUnit4.class) // CHECKSTYLE OFF: AbbreviationAsWordInName public class TriggersIT { - // CHECKSTYLE ON: AbbreviationAsWordInName + + //CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; From f767a9743bfdd5818ba2fba8f8672abbb57dd34a Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent Date: Thu, 15 Mar 2018 14:59:27 -0700 Subject: [PATCH 14/23] Fix (and ignore) tests using pubsub. --- .../main/java/com/example/dlp/Inspect.java | 23 ++++++------- .../java/com/example/dlp/RiskAnalysis.java | 19 +++++++---- .../com/example/dlp/DeIdentificationIT.java | 2 +- .../test/java/com/example/dlp/InspectIT.java | 32 ++++++++++--------- dlp/src/test/java/com/example/dlp/JobsIT.java | 1 - .../java/com/example/dlp/RiskAnalysisIT.java | 19 +++++------ 6 files changed, 52 insertions(+), 44 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 8f7c5367d95..78097713e24 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -54,6 +54,7 @@ import java.util.Collections; import java.util.List; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; import javax.activation.MimetypesFileTypeMap; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -310,31 +311,31 @@ private static void inspectGcsFile( // [START wait_on_dlp_job_completion] // wait on receiving a job status update over a Google Cloud Pub/Sub subscriber - private static void waitOnJobCompletion( - String projectId, String subscriptionId, String dlpJobName) - throws InterruptedException, ExecutionException { + private static void waitOnJobCompletion ( + String projectId, String subscriptionId, String dlpJobName) throws Exception{ // wait for job completion final SettableApiFuture done = SettableApiFuture.create(); // setup a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.newBuilder() - .setProject(projectId) - .setSubscription(subscriptionId) - .build(), + ProjectSubscriptionName.of(projectId, subscriptionId), (pubsubMessage, ackReplyConsumer) -> { - ackReplyConsumer.ack(); if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { // notify job completion done.set(true); + ackReplyConsumer.ack(); } }) .build(); - + subscriber.startAsync(); // wait for job completion - done.get(); + try{ + done.get(30, TimeUnit.SECONDS); + } catch (Exception e){ + System.out.println("Unable to verify job complete."); + } } // [END wait_on_dlp_job_completion] @@ -509,7 +510,7 @@ private static void inspectBigquery( System.out.println("Job created with ID:" + dlpJob.getName()); // wait on completion - waitOnJobCompletion(dlpJob.getName(), projectId, subscriptionId); + waitOnJobCompletion(projectId, subscriptionId, dlpJob.getName()); DlpJob completedJob = dlpServiceClient.getDlpJob( diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index 27328c135b3..c59dfbd80e6 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -49,6 +49,8 @@ import java.util.Arrays; import java.util.List; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -141,7 +143,7 @@ private static void calculateNumericalStats( int percent = 1; for (Value quantileValue : result.getQuantileValuesList()) { System.out.printf( - "Value at %d \\% quantile : %.3f", percent, quantileValue.getFloatValue()); + "Value at %s %% quantile : %.3f", percent, quantileValue.getFloatValue()); } } } @@ -150,7 +152,7 @@ private static void calculateNumericalStats( // wait on receiving a job status update over a Google Cloud Pub/Sub subscriber private static void waitOnJobCompletion( String projectId, String subscriptionId, String dlpJobName) - throws InterruptedException, ExecutionException { + throws Exception { // wait for job completion final SettableApiFuture done = SettableApiFuture.create(); @@ -162,17 +164,21 @@ private static void waitOnJobCompletion( .setSubscription(subscriptionId) .build(), (pubsubMessage, ackReplyConsumer) -> { - ackReplyConsumer.ack(); if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { // notify job completion done.set(true); + ackReplyConsumer.ack(); } }) .build(); - + subscriber.startAsync(); // wait for job completion - done.get(); + try{ + done.get(30, TimeUnit.SECONDS); + } catch (TimeoutException e) { + System.out.println("Unable to verify job complete."); + } } // [END wait_on_dlp_job_completion] @@ -182,8 +188,7 @@ private static void calculateCategoricalStats( String tableId, String columnName, String topicId, - String subscriptionId) - throws Exception { + String subscriptionId){ // [START dlp_categorical_stats] /** * Calculate categorical statistics for a column in a BigQuery table using the DLP API. diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index 68c56f3f39d..3f163e42a29 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -47,7 +47,7 @@ public class DeIdentificationIT { public void setUp() { bout = new ByteArrayOutputStream(); out = new PrintStream(bout); - System.setOut(out); // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH + System.setOut(out); assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); assertNotNull(System.getenv("DLP_DEID_WRAPPED_KEY")); assertNotNull(System.getenv("DLP_DEID_KEY_NAME")); diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 38cb33db3fb..9aebae7d8d8 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -16,7 +16,9 @@ package com.example.dlp; +import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; @@ -50,7 +52,6 @@ public void setUp() { bout = new ByteArrayOutputStream(); out = new PrintStream(bout); System.setOut(out); - // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); } @@ -63,8 +64,9 @@ public void testStringInspectionReturnsInfoTypes() throws Exception { "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } @Test @@ -74,8 +76,8 @@ public void testTextFileInspectionReturnsInfoTypes() throws Exception { "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } @Test @@ -85,13 +87,13 @@ public void testImageFileInspectionReturnsInfoTypes() throws Exception { "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } // Requires that bucket by the specified name exists @Test - @Ignore // TODO: Fix Pubsub + @Ignore // Pubsub tests are flakey when run consecutively public void testGcsFileInspectionReturnsInfoTypes() throws Exception { Inspect.main(new String[] { "-gcs", @@ -102,14 +104,14 @@ public void testGcsFileInspectionReturnsInfoTypes() throws Exception { "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } // Requires a Datastore kind containing an entity // with phone number and email address properties. @Test - @Ignore // TODO: Fix Pubsub + @Ignore // Pubsub tests are flakey when run consecutively public void testDatastoreInspectionReturnsInfoTypes() throws Exception { Inspect.main(new String[] { "-ds", @@ -119,12 +121,12 @@ public void testDatastoreInspectionReturnsInfoTypes() throws Exception { "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } @Test - @Ignore // TODO: Fix Pubsub + @Ignore // Pubsub tests are flakey when run consecutively public void testBigqueryInspectionReturnsInfoTypes() throws Exception { Inspect.main(new String[] { "-bq", @@ -135,7 +137,7 @@ public void testBigqueryInspectionReturnsInfoTypes() throws Exception { "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); + assertThat(output, containsString("PHONE_NUMBER")); } @After diff --git a/dlp/src/test/java/com/example/dlp/JobsIT.java b/dlp/src/test/java/com/example/dlp/JobsIT.java index e13304f1c8b..14c336c2543 100644 --- a/dlp/src/test/java/com/example/dlp/JobsIT.java +++ b/dlp/src/test/java/com/example/dlp/JobsIT.java @@ -50,7 +50,6 @@ public void setUp() { bout = new ByteArrayOutputStream(); out = new PrintStream(bout); System.setOut(out); - // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); } diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index 5e164e3fe1a..e15675eef60 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -16,7 +16,9 @@ package com.example.dlp; +import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; @@ -44,13 +46,14 @@ public class RiskAnalysisIT { public void setUp() { bout = new ByteArrayOutputStream(); out = new PrintStream(bout); + System.setOut(out); assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); assertNotNull(System.getenv("DLP_DEID_WRAPPED_KEY")); assertNotNull(System.getenv("DLP_DEID_KEY_NAME")); } @Test - @Ignore // TODO: Fix Pubsub + @Ignore // Pubsub tests are flakey when run consecutively public void testNumericalStats() throws Exception { RiskAnalysis.main( new String[] { @@ -61,14 +64,11 @@ public void testNumericalStats() throws Exception { "-subscriptionId", subscriptionId }); String output = bout.toString(); - assertTrue( - Pattern.compile("Value at 0% quantile: integer_value: \\d{2}").matcher(output).find()); - assertTrue( - Pattern.compile("Value at \\d{2}% quantile: integer_value: \\d{2}").matcher(output).find()); + assertThat(output, containsString("Value at ")); } @Test - @Ignore // TODO: Fix Pubsub + @Ignore // Pubsub tests are flakey when run consecutively public void testCategoricalStats() throws Exception { RiskAnalysis.main( new String[] { @@ -80,11 +80,12 @@ public void testCategoricalStats() throws Exception { "-subscriptionId", subscriptionId }); String output = bout.toString(); - assertTrue(Pattern.compile("Most common value occurs \\d time\\(s\\)").matcher(output).find()); + + assertThat(output, containsString("Most common value occurs")); } @Test - @Ignore // TODO: Fix Pubsub + @Ignore // Pubsub tests are flakey when run consecutively public void testKAnonymity() throws Exception { RiskAnalysis.main(new String[]{ "-k", @@ -101,7 +102,7 @@ public void testKAnonymity() throws Exception { } @Test - @Ignore // TODO: Fix Pubsub + @Ignore // Pubsub tests are flakey when run consecutively public void testLDiversity() throws Exception { RiskAnalysis.main( new String[] { From 70ca0f7af0f7202003ee64263e27f16db7f06d20 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent <31518063+kurtisvg@users.noreply.github.com> Date: Sat, 17 Mar 2018 11:01:51 -0700 Subject: [PATCH 15/23] Update PR tests to complete all tests before returning results. (#1065) * Return results of all tests. * Use for loop instead of while. --- .kokoro/tests/diff_tests.sh | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.kokoro/tests/diff_tests.sh b/.kokoro/tests/diff_tests.sh index a300587c78e..0aeced3e33b 100755 --- a/.kokoro/tests/diff_tests.sh +++ b/.kokoro/tests/diff_tests.sh @@ -38,21 +38,21 @@ gcloud auth activate-service-account\ --project=$GOOGLE_CLOUD_PROJECT echo -e "\n******************** TESTING AFFECTED PROJECTS ********************" +set +e # Diff to find out what has changed from master +RESULT=0 cd github/java-docs-samples -find * -name pom.xml -print0 | sort -z | while read -d $'\0' file -do +# For every pom.xml (may break on whitespace) +for file in **/pom.xml; do # Navigate to project file=$(dirname "$file") pushd "$file" > /dev/null - set +e # Only tests changed projects git diff --quiet master.. . CHANGED=$? # Only test leafs to prevent testing twice PARENT=$(grep "" pom.xml -c) - set -e # Check for changes to the current folder if [ "$CHANGED" -eq 1 ] && [ "$PARENT" -eq 0 ]; then @@ -60,15 +60,25 @@ do echo "- testing $file" echo "------------------------------------------------------------" + # Run tests and update RESULT if failed mvn -q --batch-mode --fail-at-end clean verify \ -Dfile.encoding="UTF-8" \ -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \ -Dmaven.test.redirectTestOutputToFile=true \ -Dbigtable.projectID="${GOOGLE_CLOUD_PROJECT}" \ -Dbigtable.instanceID=instance - echo -e "\n Tests complete. \n" + EXIT=$? + + if [ $EXIT -ne 0 ]; then + echo -e "\n Tests failed. \n" + RESULT=1 + else + echo -e "\n Tests complete. \n" + fi fi popd > /dev/null done + +exit $RESULT From abd103746cf98be78123f45701b0e1fd6ef12c80 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Mon, 19 Mar 2018 14:59:32 -0700 Subject: [PATCH 16/23] WIP: Address PR feedback, part 1 --- .../com/example/dlp/DeIdentification.java | 42 ++--------- .../main/java/com/example/dlp/Inspect.java | 16 ++-- dlp/src/main/java/com/example/dlp/Jobs.java | 18 ++--- .../main/java/com/example/dlp/Metadata.java | 14 ++-- .../main/java/com/example/dlp/QuickStart.java | 6 +- dlp/src/main/java/com/example/dlp/Redact.java | 27 ++++--- .../java/com/example/dlp/RiskAnalysis.java | 74 +++++++++---------- .../main/java/com/example/dlp/Templates.java | 28 +++---- .../main/java/com/example/dlp/Triggers.java | 34 ++++----- .../java/com/example/dlp/QuickStartIT.java | 2 +- .../java/com/example/dlp/TemplatesIT.java | 6 +- .../test/java/com/example/dlp/TriggersIT.java | 6 +- 12 files changed, 120 insertions(+), 153 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 20444d3f9f5..855842d417d 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -66,10 +66,9 @@ public class DeIdentification { + // [START dlp_deidentify_masking] /** - * [START dlp_deidentify_mask] - * - *

Deidentify a string by masking sensitive information with a character using the DLP API. + * Deidentify a string by masking sensitive information with a character using the DLP API. * * @param string The string to deidentify. * @param maskingCharacter (Optional) The character to mask sensitive data with. @@ -79,21 +78,10 @@ public class DeIdentification { */ private static void deIdentifyWithMask( String string, Character maskingCharacter, int numberToMask, String projectId) { - // [START dlp_deidentify_masking] - /** - * Deidentify a string by masking sensitive information with a character using the DLP API. - * @param string The string to deidentify. - * @param maskingCharacter (Optional) The character to mask sensitive data with. - * @param numberToMask (Optional) The number of characters' worth of sensitive data to mask. - * Omitting this value or setting it to 0 masks all sensitive chars. - */ // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // string = "My SSN is 372819127"; - // numberToMask = 5; - // maskingCharacter = 'x'; ByteContentItem byteContentItem = ByteContentItem.newBuilder() .setType(ByteContentItem.BytesType.TEXT_UTF8) @@ -151,10 +139,9 @@ private static void deIdentifyWithMask( } // [END dlp_deidentify_mask] + // [START dlp_deidentify_fpe] /** - * [START dlp_deidentify_fpe] - * - *

Deidentify a string by encrypting sensitive information while preserving format. + * Deidentify a string by encrypting sensitive information while preserving format. * * @param string The string to deidentify. * @param alphabet The set of characters to use when encrypting the input. For more information, @@ -172,11 +159,6 @@ private static void deIdentifyWithFpe( // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // string = "My SSN is 372819127"; - // alphabet = FfxCommonNativeAlphabet.ALPHA_NUMERIC; - // keyName = "projects/GCP_PROJECT/locations/REGION/keyRings/KEYRING_ID/cryptoKeys/KEY_NAME"; - // wrappedKey = "YOUR_ENCRYPTED_AES_256_KEY" - ByteContentItem byteContentItem = ByteContentItem.newBuilder() .setType(ByteContentItem.BytesType.TEXT_UTF8) @@ -277,17 +259,7 @@ private static void deidentifyWithDateShift( .setLowerBoundDays(lowerBoundDays) .setUpperBoundDays(upperBoundDays); - // (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key - // If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set - // String keyName = - // 'projects/PROJECT/locations/LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; - - // (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates - // This key should be encrypted using the Cloud KMS key specified above - // If this is specified, then 'keyName' and 'contextFieldId' must also be set - // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' - - // If contextFieldId , keyName or wrappedKey is set : all three arguments must be valid + // If contextFieldId, keyName or wrappedKey is set: all three arguments must be valid if (contextFieldId != null && keyName != null && wrappedKey != null) { dateShiftConfigBuilder.setContext(FieldId.newBuilder().setName(contextFieldId).build()); KmsWrappedCryptoKey kmsWrappedCryptoKey = @@ -304,8 +276,6 @@ private static void deidentifyWithDateShift( } // Read and parse the CSV file - // The first row of the file must specify column names, and all other rows - // Path inputCsvFile = Paths.get("/path/to/file.csv"); BufferedReader br = null; String line; List rows = new ArrayList<>(); @@ -443,7 +413,7 @@ public static void main(String[] args) throws Exception { optionsGroup.addOption(deidentifyMaskingOption); Option deidentifyFpeOption = - new Option("f", "fpe", true, "Deidentify with FFX FPE."); + new Option("f", "fpe", true, "Deidentify with format-preserving encryption."); optionsGroup.addOption(deidentifyFpeOption); Option deidentifyDateShiftOption = diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 78097713e24..6bfb13f0cc9 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -75,6 +75,7 @@ public class Inspect { * @param maxFindings The maximum number of findings to report (0 = server maximum) * @param infoTypes The infoTypes of information to match * @param includeQuote Whether to include the matching string + * @param projectId Google Cloud project ID */ private static void inspectString( String string, @@ -95,8 +96,6 @@ private static void inspectString( .setIncludeQuote(includeQuote) .build(); - // The string to inspect - // string = 'My name is Gary and my email is gary@example.com'; ByteContentItem byteContentItem = ByteContentItem.newBuilder() .setType(ByteContentItem.BytesType.TEXT_UTF8) @@ -117,7 +116,7 @@ private static void inspectString( System.out.println("Findings: "); for (Finding finding : response.getResult().getFindingsList()) { if (includeQuote) { - System.out.print("Quote: " + finding.getQuote()); + System.out.print("\tQuote: " + finding.getQuote()); } System.out.print("\tInfo type: " + finding.getInfoType().getName()); System.out.println("\tLikelihood: " + finding.getLikelihood()); @@ -139,6 +138,7 @@ private static void inspectString( * @param maxFindings The maximum number of findings to report (0 = server maximum) * @param infoTypes The infoTypes of information to match * @param includeQuote Whether to include the matching string + * @param projectId Google Cloud project ID */ private static void inspectFile( String filePath, @@ -155,7 +155,7 @@ private static void inspectFile( mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); } - ByteContentItem.BytesType bytesType = ByteContentItem.BytesType.TEXT_UTF8; + ByteContentItem.BytesType bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED; if (mimeType.equals("image/jpeg")) { bytesType = ByteContentItem.BytesType.IMAGE_JPEG; @@ -199,7 +199,7 @@ private static void inspectFile( System.out.println("Findings: "); for (Finding finding : result.getFindingsList()) { if (includeQuote) { - System.out.print("Quote: " + finding.getQuote()); + System.out.print("\tQuote: " + finding.getQuote()); } System.out.print("\tInfo type: " + finding.getInfoType().getName()); System.out.println("\tLikelihood: " + finding.getLikelihood()); @@ -215,9 +215,9 @@ private static void inspectFile( // [END dlp_inspect_file] /** - * [START inspect_gcs_file] + * [START dlp_inspect_gcs] * - *

Inspect GCS file for Info types and wait on job completion using Google Cloud Pub/Sub + * Inspect GCS file for Info types and wait on job completion using Google Cloud Pub/Sub * notification * * @param bucketName The name of the bucket where the file resides. @@ -334,7 +334,7 @@ private static void waitOnJobCompletion ( try{ done.get(30, TimeUnit.SECONDS); } catch (Exception e){ - System.out.println("Unable to verify job complete."); + System.out.println("Unable to verify job completion."); } } // [END wait_on_dlp_job_completion] diff --git a/dlp/src/main/java/com/example/dlp/Jobs.java b/dlp/src/main/java/com/example/dlp/Jobs.java index c039581015c..43ed11dd4ab 100644 --- a/dlp/src/main/java/com/example/dlp/Jobs.java +++ b/dlp/src/main/java/com/example/dlp/Jobs.java @@ -36,17 +36,16 @@ public class Jobs { // [START dlp_list_jobs] + /* + * List DLP jobs + * + * @param projectId The project ID to run the API call under + * @param filter The filter expression to use, eg. state=DONE For more information on filter + * syntax see https://cloud.google.com/dlp/docs/reference/rest/v2/projects.dlpJobs/list + * @param jobType The type of job to list (either 'INSPECT_JOB' or 'RISK_ANALYSIS_JOB') + */ private static void listJobs(String projectId, String filter, DlpJobType jobType) throws Exception { - /** - * - * List DLP jobs - * - * @param projectId The project ID to run the API call under - * @param filter The filter expression to use, eg. state=DONE For more information on filter - * syntax see https://cloud.google.com/dlp/docs/reference/rest/v2/projects.dlpJobs/list - * @param jobType The type of job to list (either 'INSPECT_JOB' or 'RISK_ANALYSIS_JOB') - */ try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { ListDlpJobsRequest listDlpJobsRequest = ListDlpJobsRequest.newBuilder() @@ -64,7 +63,6 @@ private static void listJobs(String projectId, String filter, DlpJobType jobType // [END dlp_list_jobs] /** - * * Delete a DLP Job * * @param projectId Google Cloud ProjectID diff --git a/dlp/src/main/java/com/example/dlp/Metadata.java b/dlp/src/main/java/com/example/dlp/Metadata.java index 154aeae6ba1..57cabfa874c 100644 --- a/dlp/src/main/java/com/example/dlp/Metadata.java +++ b/dlp/src/main/java/com/example/dlp/Metadata.java @@ -31,13 +31,17 @@ public class Metadata { + // [START dlp_list_info_types] + /* + * List the types of sensitive information the DLP API supports. + * + * @param filter The filter to use, e.g. "supported_by=INSPECT" + * @param languageCode The BCP-47 language code to use, e.g. 'en-US' + */ private static void listInfoTypes(String filter, String languageCode) throws Exception { - // [START dlp_list_info_types] + // Instantiate a DLP client try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - // The category of info types to list, e.g. category = 'GOVERNMENT'; - // Optional BCP-47 language code for localized info type friendly names, e.g. 'en-US' - // filter supported_by=INSPECT ListInfoTypesRequest listInfoTypesRequest = ListInfoTypesRequest.newBuilder() .setFilter(filter) .setLanguageCode(languageCode) @@ -49,8 +53,8 @@ private static void listInfoTypes(String filter, String languageCode) throws Exc System.out.println("Display name : " + infoTypeDescription.getDisplayName()); } } - // [END dlp_list_info_types] } + // [END dlp_list_info_types] /** Retrieve infoTypes. */ public static void main(String[] args) throws Exception { diff --git a/dlp/src/main/java/com/example/dlp/QuickStart.java b/dlp/src/main/java/com/example/dlp/QuickStart.java index 3ead1b1b4d6..d8cd7442d5d 100644 --- a/dlp/src/main/java/com/example/dlp/QuickStart.java +++ b/dlp/src/main/java/com/example/dlp/QuickStart.java @@ -51,8 +51,8 @@ public static void main(String[] args) throws Exception { // The infoTypes of information to match List infoTypes = Arrays.asList( - InfoType.newBuilder().setName("US_MALE_NAME").build(), - InfoType.newBuilder().setName("US_FEMALE_NAME").build()); + InfoType.newBuilder().setName("PERSON_NAME").build(), + InfoType.newBuilder().setName("US_STATE").build()); // Whether to include the matching string boolean includeQuote = true; @@ -96,7 +96,7 @@ public static void main(String[] args) throws Exception { System.out.println("Findings: "); for (Finding finding : result.getFindingsList()) { if (includeQuote) { - System.out.print("Quote: " + finding.getQuote()); + System.out.print("\tQuote: " + finding.getQuote()); } System.out.print("\tInfo type: " + finding.getInfoType().getName()); System.out.println("\tLikelihood: " + finding.getLikelihood()); diff --git a/dlp/src/main/java/com/example/dlp/Redact.java b/dlp/src/main/java/com/example/dlp/Redact.java index 9e32349be7a..c01abe39be1 100644 --- a/dlp/src/main/java/com/example/dlp/Redact.java +++ b/dlp/src/main/java/com/example/dlp/Redact.java @@ -44,6 +44,16 @@ public class Redact { + // [START dlp_redact_image] + /* + * Redact sensitive data from an image using the Data Loss Prevention API. + * + * @param filePath The path to a local file to inspect. Can be a JPG or PNG image file. + * @param minLikelihood The minimum likelihood required before redacting a match. + * @param infoTypes The infoTypes of information to redact. + * @param outputPath The local path to save the resulting image to. + * @param projectId The project ID to run the API call under. + */ private static void redactImage( String filePath, Likelihood minLikelihood, @@ -51,11 +61,9 @@ private static void redactImage( String outputPath, String projectId) throws Exception { - // [START dlp_redact_image] + // Instantiate the DLP client try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - // The path to a local file to inspect. Can be a JPG or PNG image file. - // filePath = 'path/to/image.png' String mimeType = URLConnection.guessContentTypeFromName(filePath); if (mimeType == null) { mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); @@ -71,17 +79,8 @@ private static void redactImage( } else if (mimeType.equals("image/svg")) { bytesType = ByteContentItem.BytesType.IMAGE_SVG; } - - byte[] data = Files.readAllBytes(Paths.get(filePath)); - - // The minimum likelihood required before redacting a match - // minLikelihood = 'LIKELIHOOD_UNSPECIFIED' - // The infoTypes of information to redact - // infoTypes = [{ name: 'EMAIL_ADDRESS' }, { name: 'PHONE_NUMBER' }] - - // The local path to save the resulting image to. - // outputPath = 'result.png' + byte[] data = Files.readAllBytes(Paths.get(filePath)); InspectConfig inspectConfig = InspectConfig.newBuilder() @@ -121,8 +120,8 @@ private static void redactImage( outputStream.write(redactedImageData.toByteArray()); outputStream.close(); } - // [END dlp_redact_image] } + // [END dlp_redact_image] /** Command line application to redact strings, images using the Data Loss Prevention API. */ public static void main(String[] args) throws Exception { diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index c59dfbd80e6..b320a6d7636 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -63,6 +63,18 @@ public class RiskAnalysis { + // [START dlp_numerical_stats] + /** + * Calculate numerical statistics for a column in a BigQuery table using the DLP API. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param columnName The name of the column to analyze, which must contain only numerical data. + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. + */ private static void calculateNumericalStats( String projectId, String datasetId, @@ -71,18 +83,6 @@ private static void calculateNumericalStats( String topicId, String subscriptionId) throws Exception { - // [START dlp_numerical_stats] - /** - * Calculate numerical statistics for a column in a BigQuery table using the DLP API. - * - * @param projectId The Google Cloud Platform project ID to run the API call under. - * @param datasetId The BigQuery dataset to analyze. - * @param tableId The BigQuery table to analyze. - * @param columnName The name of the column to analyze, which must contain only numerical data. - * @param topicId The name of the Pub/Sub topic to notify once the job completes - * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job - * completion status. - */ // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -105,7 +105,7 @@ private static void calculateNumericalStats( PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); - // create /action to publish job status notifications over Google Cloud Pub/Sub + // create action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); RiskAnalysisJobConfig riskAnalysisJobConfig = @@ -147,6 +147,7 @@ private static void calculateNumericalStats( } } } + // [END dlp_numerical_stats] // [START wait_on_dlp_job_completion] // wait on receiving a job status update over a Google Cloud Pub/Sub subscriber @@ -182,6 +183,18 @@ private static void waitOnJobCompletion( } // [END wait_on_dlp_job_completion] + // [START dlp_categorical_stats] + /** + * Calculate categorical statistics for a column in a BigQuery table using the DLP API. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param columnName The name of the column to analyze, which need not contain numerical data. + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. + */ private static void calculateCategoricalStats( String projectId, String datasetId, @@ -189,24 +202,10 @@ private static void calculateCategoricalStats( String columnName, String topicId, String subscriptionId){ - // [START dlp_categorical_stats] - /** - * Calculate categorical statistics for a column in a BigQuery table using the DLP API. - * - * @param projectId The Google Cloud Platform project ID to run the API call under. - * @param datasetId The BigQuery dataset to analyze. - * @param tableId The BigQuery table to analyze. - * @param columnName The name of the column to analyze, which need not contain numerical data. - */ // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // projectId = process.env.GCLOUD_PROJECT; - // datasetId = "my_dataset"; - // tableId = "my_table"; - // columnName = "firstName"; - FieldId fieldId = FieldId.newBuilder().setName(columnName).build(); CategoricalStatsConfig categoricalStatsConfig = @@ -228,7 +227,7 @@ private static void calculateCategoricalStats( .setTopic(topicName.toString()) .build(); - // create /action to publish job status notifications over Google Cloud Pub/Sub + // create action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); RiskAnalysisJobConfig riskAnalysisJobConfig = @@ -278,7 +277,7 @@ private static void calculateCategoricalStats( System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage()); } } - // [END dlp_categorical_stats_analysis] + // [END dlp_categorical_stats] // [START dlp_k_anonymity] /** @@ -288,6 +287,9 @@ private static void calculateCategoricalStats( * @param datasetId The BigQuery dataset to analyze. * @param tableId The BigQuery table to analyze. * @param quasiIds The names of columns that form a composite key ('quasi-identifiers'). + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. */ private static void calculateKAnonymity( String projectId, @@ -300,11 +302,6 @@ private static void calculateKAnonymity( // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // projectId = process.env.GCLOUD_PROJECT; - // datasetId = 'my_dataset'; - // tableId = 'my_table'; - // quasiIds = [{ columnName: 'age' }, { columnName: 'city' }]; - List quasiIdFields = quasiIds .stream() @@ -328,7 +325,7 @@ private static void calculateKAnonymity( PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); - // create /action to publish job status notifications over Google Cloud Pub/Sub + // create action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); RiskAnalysisJobConfig riskAnalysisJobConfig = @@ -388,13 +385,16 @@ private static void calculateKAnonymity( /** * [START dlp_l_diversity] * - *

Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table. + * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table. * * @param projectId The Google Cloud Platform project ID to run the API call under. * @param datasetId The BigQuery dataset to analyze. * @param tableId The BigQuery table to analyze. * @param sensitiveAttribute The name of the attribute to compare the quasi-ID against * @param quasiIds A set of column names that form a composite key ('quasi-identifiers'). + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. */ private static void calculateLDiversity( String projectId, @@ -437,7 +437,7 @@ private static void calculateLDiversity( PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); - // create /action to publish job status notifications over Google Cloud Pub/Sub + // create action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); RiskAnalysisJobConfig riskAnalysisJobConfig = diff --git a/dlp/src/main/java/com/example/dlp/Templates.java b/dlp/src/main/java/com/example/dlp/Templates.java index e697104a651..91bbab558ae 100644 --- a/dlp/src/main/java/com/example/dlp/Templates.java +++ b/dlp/src/main/java/com/example/dlp/Templates.java @@ -44,7 +44,7 @@ public class Templates { /** - * [START dlp_create_template] + * [START dlp_create_inspect_template] * * @param displayName (Optional) The human-readable name to give the template * @param projectId Google Cloud Project ID to call the API under @@ -95,12 +95,11 @@ private static void createInspectTemplate( System.out.printf("Error creating template: %s", e.getMessage()); } } - // [END dlp_create_template] + // [END dlp_create_inspect_template] + // [START dlp_list_inspect_templates] /** - * [START dlp_list_templates] - * - *

List DLP inspection templates created in a given project + * List DLP inspection templates created in a given project * * @param projectId Google Cloud Project ID */ @@ -120,27 +119,28 @@ private static void listInspectTemplates(String projectId) { for (InspectTemplate template : templatesResponse.getInspectTemplatesList()) { System.out.printf("Template name: %s\n", template.getName()); if (template.getDisplayName() != null) { - System.out.printf("Template display name: %s \n", template.getDisplayName()); - System.out.printf("Template create time: %s \n", template.getCreateTime()); - System.out.printf("Template update time: %s \n", template.getUpdateTime()); + System.out.printf("\tDisplay name: %s \n", template.getDisplayName()); + System.out.printf("\tCreate time: %s \n", template.getCreateTime()); + System.out.printf("\tUpdate time: %s \n", template.getUpdateTime()); // print inspection config InspectConfig inspectConfig = template.getInspectConfig(); for (InfoType infoType : inspectConfig.getInfoTypesList()) { - System.out.printf("InfoType: %s\n", infoType.getName()); + System.out.printf("\tInfoType: %s\n", infoType.getName()); } - System.out.printf("Min likelihood: %s\n", inspectConfig.getMinLikelihood()); - System.out.printf("Limits: %s\n", inspectConfig.getLimits().getMaxFindingsPerRequest()); + System.out.printf("\tMin likelihood: %s\n", inspectConfig.getMinLikelihood()); + System.out.printf("\tLimits: %s\n", inspectConfig.getLimits().getMaxFindingsPerRequest()); } } } catch (Exception e) { System.out.printf("Error creating template: %s", e.getMessage()); } } - // [END dlp_list_templates] + // [END dlp_list_inspect_templates] + // [START dlp_delete_inspect_template] /** - * [START dlp_delete_template] + * Delete the DLP inspection configuration template with the specified name. * * @param projectId Google Cloud Project ID * @param templateId Template ID to be deleted @@ -161,7 +161,7 @@ private static void deleteInspectTemplate(String projectId, String templateId) { System.err.printf("Error deleting template: %s\n", templateName); } } - // [END dlp_delete_template] + // [END dlp_delete_inspect_template] /** Command line application to create, list and delete DLP inspect templates. */ public static void main(String[] args) throws Exception { diff --git a/dlp/src/main/java/com/example/dlp/Triggers.java b/dlp/src/main/java/com/example/dlp/Triggers.java index d766be73c0f..107324602cb 100644 --- a/dlp/src/main/java/com/example/dlp/Triggers.java +++ b/dlp/src/main/java/com/example/dlp/Triggers.java @@ -45,10 +45,9 @@ public class Triggers { + // [START dlp_create_trigger] /** - * [START dlp_create_trigger] - * - *

Schedule a DLP inspection trigger for a GCS location. + * Schedule a DLP inspection trigger for a GCS location. * * @param triggerId (Optional) name of the trigger to be created * @param displayName (Optional) display name for the trigger to be created @@ -101,7 +100,7 @@ private static void createTrigger( .setStorageConfig(storageConfig) .build(); - // Schedule scan of GCS bucket every scanPeriod number of days + // Schedule scan of GCS bucket every scanPeriod number of days (minimum = 1 day) Duration duration = Duration.newBuilder().setSeconds(scanPeriod * 24 * 3600).build(); Schedule schedule = Schedule.newBuilder().setRecurrencePeriodDuration(duration).build(); JobTrigger.Trigger trigger = JobTrigger.Trigger.newBuilder().setSchedule(schedule).build(); @@ -115,7 +114,6 @@ private static void createTrigger( .addTriggers(trigger) .build(); - System.out.println("Pause"); // Create scan request CreateJobTriggerRequest createJobTriggerRequest = CreateJobTriggerRequest.newBuilder() @@ -127,13 +125,14 @@ private static void createTrigger( System.out.println("Created Trigger: " + createdJobTrigger.getName()); } catch (Exception e) { - System.out.println("Error creating trigger :" + e.getMessage()); + System.out.println("Error creating trigger: " + e.getMessage()); } } // [END dlp_create_trigger] + // [START dlp_list_triggers] /** - * [START dlp_list_triggers] List all DLP triggers for a given project. + * List all DLP triggers for a given project. * * @param projectId The project ID to run the API call under. */ @@ -151,34 +150,32 @@ private static void listTriggers(String projectId) { .forEach( trigger -> { System.out.println("Trigger: " + trigger.getName()); - System.out.println("Created: " + trigger.getCreateTime()); - System.out.println("Updated: " + trigger.getUpdateTime()); + System.out.println("\tCreated: " + trigger.getCreateTime()); + System.out.println("\tUpdated: " + trigger.getUpdateTime()); if (trigger.getDisplayName() != null) { - System.out.println("Display name: " + trigger.getDisplayName()); + System.out.println("\tDisplay name: " + trigger.getDisplayName()); } if (trigger.getDescription() != null) { - System.out.println("Description: " + trigger.getDescription()); + System.out.println("\tDescription: " + trigger.getDescription()); } - System.out.println("Status: " + trigger.getStatus()); - System.out.println("Error count: " + trigger.getErrorsCount()); + System.out.println("\tStatus: " + trigger.getStatus()); + System.out.println("\tError count: " + trigger.getErrorsCount()); }); } catch (Exception e) { System.out.println("Error listing triggers :" + e.getMessage()); } } - // [END dlp_list_trigger] + // [END dlp_list_triggers] + // [START dlp_delete_trigger] /** - * [START dlp_delete_trigger] * - *

Delete a DLP trigger in a project. + * Delete a DLP trigger in a project. * * @param projectId The project ID to run the API call under. * @param triggerId Trigger ID */ private static void deleteTrigger(String projectId, String triggerId) { - // Instantiates a client - // triggerName to provided as projects/project-id/jobTriggers/triggerId ProjectJobTriggerName triggerName = ProjectJobTriggerName.of(projectId, triggerId); try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -191,7 +188,6 @@ private static void deleteTrigger(String projectId, String triggerId) { System.out.println("Error deleting trigger :" + e.getMessage()); } } - // [END dlp_delete_trigger] /** Command line application to crate, list and delete triggers. */ diff --git a/dlp/src/test/java/com/example/dlp/QuickStartIT.java b/dlp/src/test/java/com/example/dlp/QuickStartIT.java index 5c22c64a781..d62726ea2db 100644 --- a/dlp/src/test/java/com/example/dlp/QuickStartIT.java +++ b/dlp/src/test/java/com/example/dlp/QuickStartIT.java @@ -49,7 +49,7 @@ public void testQuickStart() throws Exception { QuickStart.main(new String[] {}); String output = bout.toString(); - assertThat(output, containsString("US_MALE_NAME")); + assertThat(output, containsString("PERSON_NAME")); } @After diff --git a/dlp/src/test/java/com/example/dlp/TemplatesIT.java b/dlp/src/test/java/com/example/dlp/TemplatesIT.java index 80627826727..7c8b921b294 100644 --- a/dlp/src/test/java/com/example/dlp/TemplatesIT.java +++ b/dlp/src/test/java/com/example/dlp/TemplatesIT.java @@ -58,9 +58,9 @@ public void tearDown() { public void testCreateInspectTemplate() throws Exception { Templates.main(new String[] { "-c", - "-displayName", String.format("test-name-%s", new Date()), - "-templateId", String.format("template%s", System.currentTimeMillis()), - "-description", String.format("description-%s", new Date()) + "-displayName", String.format("test-name-%s", UUID.randomUUID()), + "-templateId", String.format("template%s", UUID.randomUUID()), + "-description", String.format("description-%s", UUID.randomUUID()) }); String output = bout.toString(); assertThat(output, containsString("Template created: ")); diff --git a/dlp/src/test/java/com/example/dlp/TriggersIT.java b/dlp/src/test/java/com/example/dlp/TriggersIT.java index 27560361197..352706dcff7 100644 --- a/dlp/src/test/java/com/example/dlp/TriggersIT.java +++ b/dlp/src/test/java/com/example/dlp/TriggersIT.java @@ -63,9 +63,9 @@ public void tearDown() { public void testCreateTrigger() throws Exception { Triggers.main(new String[] { "-c", - "-displayName", String.format("trigger-name-%s", new Date()), - "-triggerId", String.format("trigger%s", System.currentTimeMillis()), - "-description", String.format("description-%s", new Date()), + "-displayName", String.format("trigger-name-%s", UUID.randomUUID()), + "-triggerId", String.format("trigger%s", UUID.randomUUID()), + "-description", String.format("description-%s", UUID.randomUUID()), "-bucketName", bucketName, "-fileName", "test.txt", "-scanPeriod", "1" From d11d705d346be5ca93f687fbdc67e31bffb5d2db Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Mon, 19 Mar 2018 15:23:17 -0700 Subject: [PATCH 17/23] Update deps --- dlp/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlp/pom.xml b/dlp/pom.xml index 0e9f1aee795..d1adb7f88d5 100644 --- a/dlp/pom.xml +++ b/dlp/pom.xml @@ -43,12 +43,12 @@ com.google.cloud google-cloud-dlp - 0.38.1-beta-SNAPSHOT + 0.40.0-beta com.google.cloud google-cloud-pubsub - 0.38.0-beta + 0.40.0-beta commons-cli From eb0e6d2e57cadee588649dd5f19f68c74dbfc231 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Mon, 19 Mar 2018 17:33:22 -0700 Subject: [PATCH 18/23] Address PR feedback --- .../com/example/dlp/DeIdentification.java | 4 +- .../main/java/com/example/dlp/Inspect.java | 159 +++++++++----- .../main/java/com/example/dlp/QuickStart.java | 4 +- dlp/src/main/java/com/example/dlp/Redact.java | 28 ++- .../java/com/example/dlp/RiskAnalysis.java | 203 ++++++++++++------ .../com/example/dlp/DeIdentificationIT.java | 25 ++- .../test/java/com/example/dlp/InspectIT.java | 3 - .../java/com/example/dlp/RiskAnalysisIT.java | 7 +- .../java/com/example/dlp/TemplatesIT.java | 4 +- .../test/java/com/example/dlp/TriggersIT.java | 2 +- dlp/src/test/resources/results.correct.csv | 5 + 11 files changed, 288 insertions(+), 156 deletions(-) create mode 100644 dlp/src/test/resources/results.correct.csv diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 855842d417d..4d11cfd37bc 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -161,7 +161,6 @@ private static void deIdentifyWithFpe( ByteContentItem byteContentItem = ByteContentItem.newBuilder() - .setType(ByteContentItem.BytesType.TEXT_UTF8) .setData(ByteString.copyFrom(string, StandardCharsets.UTF_8)) .build(); @@ -224,8 +223,8 @@ private static void deIdentifyWithFpe( } // [END dlp_deidentify_fpe] + // [START dlp_deidentify_date_shift] /** - * [START dlp_deidentify_date_shift] * * @param inputCsvPath The path to the CSV file to deidentify * @param outputCsvPath (Optional) path to the output CSV file @@ -340,7 +339,6 @@ private static void deidentifyWithDateShift( File outputFile = outputCsvPath.toFile(); if (!outputFile.exists()) { - outputFile.getParentFile().mkdirs(); outputFile.createNewFile(); } BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(outputFile)); diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 6bfb13f0cc9..476404606a9 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -34,6 +34,7 @@ import com.google.privacy.dlp.v2.InfoType; import com.google.privacy.dlp.v2.InfoTypeStats; import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectConfig.FindingLimits; import com.google.privacy.dlp.v2.InspectContentRequest; import com.google.privacy.dlp.v2.InspectContentResponse; import com.google.privacy.dlp.v2.InspectDataSourceDetails; @@ -65,6 +66,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; + public class Inspect { /** @@ -86,8 +88,8 @@ private static void inspectString( String projectId) { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - InspectConfig.FindingLimits findingLimits = - InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) @@ -155,16 +157,23 @@ private static void inspectFile( mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); } - ByteContentItem.BytesType bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED; - - if (mimeType.equals("image/jpeg")) { - bytesType = ByteContentItem.BytesType.IMAGE_JPEG; - } else if (mimeType.equals("image/bmp")) { - bytesType = ByteContentItem.BytesType.IMAGE_BMP; - } else if (mimeType.equals("image/png")) { - bytesType = ByteContentItem.BytesType.IMAGE_PNG; - } else if (mimeType.equals("image/svg")) { - bytesType = ByteContentItem.BytesType.IMAGE_SVG; + ByteContentItem.BytesType bytesType; + switch (mimeType) { + case "image/jpeg": + bytesType = ByteContentItem.BytesType.IMAGE_JPEG; + break; + case "image/bmp": + bytesType = ByteContentItem.BytesType.IMAGE_BMP; + break; + case "image/png": + bytesType = ByteContentItem.BytesType.IMAGE_PNG; + break; + case "image/svg": + bytesType = ByteContentItem.BytesType.IMAGE_SVG; + break; + default: + bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED; + break; } byte[] data = Files.readAllBytes(Paths.get(filePath)); @@ -174,8 +183,8 @@ private static void inspectFile( .build(); ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); - InspectConfig.FindingLimits findingLimits = - InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() @@ -253,8 +262,8 @@ private static void inspectGcsFile( StorageConfig storageConfig = StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build(); - InspectConfig.FindingLimits findingLimits = - InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() @@ -287,7 +296,30 @@ private static void inspectGcsFile( System.out.println("Job created with ID:" + dlpJob.getName()); - waitOnJobCompletion(projectId, subscriptionId, dlpJob.getName()); + final SettableApiFuture done = SettableApiFuture.create(); + + // setup a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.of(projectId, subscriptionId), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // wait for job completion + try{ + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); + } catch (Exception e){ + System.out.println("Unable to verify job completion."); + } DlpJob completedJob = dlpServiceClient.getDlpJob( @@ -309,36 +341,6 @@ private static void inspectGcsFile( // [END dlp_inspect_gcs] } - // [START wait_on_dlp_job_completion] - // wait on receiving a job status update over a Google Cloud Pub/Sub subscriber - private static void waitOnJobCompletion ( - String projectId, String subscriptionId, String dlpJobName) throws Exception{ - // wait for job completion - final SettableApiFuture done = SettableApiFuture.create(); - - // setup a Pub/Sub subscriber to listen on the job completion status - Subscriber subscriber = - Subscriber.newBuilder( - ProjectSubscriptionName.of(projectId, subscriptionId), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) - .build(); - subscriber.startAsync(); - // wait for job completion - try{ - done.get(30, TimeUnit.SECONDS); - } catch (Exception e){ - System.out.println("Unable to verify job completion."); - } - } - // [END wait_on_dlp_job_completion] - // [START dlp_inspect_datastore] /** * Inspect a Datastore kind @@ -378,8 +380,8 @@ private static void inspectDatastore( StorageConfig storageConfig = StorageConfig.newBuilder().setDatastoreOptions(datastoreOptions).build(); - InspectConfig.FindingLimits findingLimits = - InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() @@ -411,8 +413,32 @@ private static void inspectDatastore( DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); System.out.println("Job created with ID:" + dlpJob.getName()); - // asynchronously submit an inspect job, and wait on results - waitOnJobCompletion(projectId, subscriptionId, dlpJob.getName()); + + final SettableApiFuture done = SettableApiFuture.create(); + + // setup a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.of(projectId, subscriptionId), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // wait for job completion + try{ + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); + } catch (Exception e){ + System.out.println("Unable to verify job completion."); + } + DlpJob completedJob = dlpServiceClient.getDlpJob( @@ -473,8 +499,8 @@ private static void inspectBigquery( StorageConfig storageConfig = StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build(); - InspectConfig.FindingLimits findingLimits = - InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() @@ -509,8 +535,31 @@ private static void inspectBigquery( System.out.println("Job created with ID:" + dlpJob.getName()); - // wait on completion - waitOnJobCompletion(projectId, subscriptionId, dlpJob.getName()); + // wait on job completion + final SettableApiFuture done = SettableApiFuture.create(); + + // setup a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.of(projectId, subscriptionId), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + try{ + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); + } catch (Exception e){ + System.out.println("Unable to verify job completion."); + } + DlpJob completedJob = dlpServiceClient.getDlpJob( diff --git a/dlp/src/main/java/com/example/dlp/QuickStart.java b/dlp/src/main/java/com/example/dlp/QuickStart.java index d8cd7442d5d..2728e4674cb 100644 --- a/dlp/src/main/java/com/example/dlp/QuickStart.java +++ b/dlp/src/main/java/com/example/dlp/QuickStart.java @@ -39,11 +39,11 @@ public class QuickStart { public static void main(String[] args) throws Exception { // string to inspect - String text = "Robert Frost"; + String text = "His name was Robert Frost"; // The minimum likelihood required before returning a match: // LIKELIHOOD_UNSPECIFIED, VERY_UNLIKELY, UNLIKELY, POSSIBLE, LIKELY, VERY_LIKELY, UNRECOGNIZED - Likelihood minLikelihood = Likelihood.VERY_LIKELY; + Likelihood minLikelihood = Likelihood.POSSIBLE; // The maximum number of findings to report (0 = server maximum) int maxFindings = 0; diff --git a/dlp/src/main/java/com/example/dlp/Redact.java b/dlp/src/main/java/com/example/dlp/Redact.java index c01abe39be1..1ffd49fc90c 100644 --- a/dlp/src/main/java/com/example/dlp/Redact.java +++ b/dlp/src/main/java/com/example/dlp/Redact.java @@ -68,16 +68,24 @@ private static void redactImage( if (mimeType == null) { mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); } - ByteContentItem.BytesType bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED; - - if (mimeType.equals("image/jpeg")) { - bytesType = ByteContentItem.BytesType.IMAGE_JPEG; - } else if (mimeType.equals("image/bmp")) { - bytesType = ByteContentItem.BytesType.IMAGE_BMP; - } else if (mimeType.equals("image/png")) { - bytesType = ByteContentItem.BytesType.IMAGE_PNG; - } else if (mimeType.equals("image/svg")) { - bytesType = ByteContentItem.BytesType.IMAGE_SVG; + + ByteContentItem.BytesType bytesType; + switch (mimeType) { + case "image/jpeg": + bytesType = ByteContentItem.BytesType.IMAGE_JPEG; + break; + case "image/bmp": + bytesType = ByteContentItem.BytesType.IMAGE_BMP; + break; + case "image/png": + bytesType = ByteContentItem.BytesType.IMAGE_PNG; + break; + case "image/svg": + bytesType = ByteContentItem.BytesType.IMAGE_SVG; + break; + default: + bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED; + break; } byte[] data = Files.readAllBytes(Paths.get(filePath)); diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index b320a6d7636..d3ea4a13fe5 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -48,7 +48,6 @@ import com.google.pubsub.v1.ProjectTopicName; import java.util.Arrays; import java.util.List; -import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; @@ -75,7 +74,7 @@ public class RiskAnalysis { * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job * completion status. */ - private static void calculateNumericalStats( + private static void numericalStatsAnalysis( String projectId, String datasetId, String tableId, @@ -124,8 +123,33 @@ private static void calculateNumericalStats( DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); String dlpJobName = dlpJob.getName(); - // wait on job completion - waitOnJobCompletion(projectId, subscriptionId, dlpJobName); + final SettableApiFuture done = SettableApiFuture.create(); + + // setup a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // wait for job completion + try{ + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); + } // retrieve completed job status DlpJob completedJob = @@ -141,48 +165,19 @@ private static void calculateNumericalStats( result.getMinValue().getFloatValue(), result.getMaxValue().getFloatValue()); int percent = 1; + Double lastValue = null; for (Value quantileValue : result.getQuantileValuesList()) { - System.out.printf( - "Value at %s %% quantile : %.3f", percent, quantileValue.getFloatValue()); + Double currentValue = quantileValue.getFloatValue(); + if (lastValue == null || !lastValue.equals(currentValue)) { + System.out.printf( + "Value at %s %% quantile : %.3f", percent, currentValue); + } + lastValue = currentValue; } } } // [END dlp_numerical_stats] - // [START wait_on_dlp_job_completion] - // wait on receiving a job status update over a Google Cloud Pub/Sub subscriber - private static void waitOnJobCompletion( - String projectId, String subscriptionId, String dlpJobName) - throws Exception { - // wait for job completion - final SettableApiFuture done = SettableApiFuture.create(); - - // setup a Pub/Sub subscriber to listen on the job completion status - Subscriber subscriber = - Subscriber.newBuilder( - ProjectSubscriptionName.newBuilder() - .setProject(projectId) - .setSubscription(subscriptionId) - .build(), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) - .build(); - subscriber.startAsync(); - // wait for job completion - try{ - done.get(30, TimeUnit.SECONDS); - } catch (TimeoutException e) { - System.out.println("Unable to verify job complete."); - } - } - // [END wait_on_dlp_job_completion] - // [START dlp_categorical_stats] /** * Calculate categorical statistics for a column in a BigQuery table using the DLP API. @@ -195,7 +190,7 @@ private static void waitOnJobCompletion( * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job * completion status. */ - private static void calculateCategoricalStats( + private static void categoricalStatsAnalysis( String projectId, String datasetId, String tableId, @@ -246,8 +241,33 @@ private static void calculateCategoricalStats( DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); String dlpJobName = dlpJob.getName(); - // wait on job completion - waitOnJobCompletion(projectId, subscriptionId, dlpJobName); + final SettableApiFuture done = SettableApiFuture.create(); + + // setup a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // wait for job completion + try{ + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); + } // retrieve completed job status DlpJob completedJob = @@ -260,17 +280,14 @@ private static void calculateCategoricalStats( for (CategoricalStatsHistogramBucket bucket : result.getValueFrequencyHistogramBucketsList()) { - System.out.println( - "Most common value occurs " + bucket.getValueFrequencyUpperBound() + " time(s)"); - System.out.println( - "Least common value occurs " + bucket.getValueFrequencyLowerBound() + " time(s)"); + System.out.printf("Most common value occurs %d time(s).\n", + bucket.getValueFrequencyUpperBound()); + System.out.printf("Least common value occurs %d time(s).\n", + bucket.getValueFrequencyLowerBound()); for (ValueFrequency valueFrequency : bucket.getBucketValuesList()) { - System.out.println( - "Value " - + valueFrequency.getValue().toString() - + " occurs " - + valueFrequency.getCount() - + " time(s)."); + System.out.printf("Value %s occurs %d time(s).\n", + valueFrequency.getValue().toString(), + valueFrequency.getCount()); } } } catch (Exception e) { @@ -344,8 +361,33 @@ private static void calculateKAnonymity( DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); String dlpJobName = dlpJob.getName(); - // wait on job completion - waitOnJobCompletion(projectId, subscriptionId, dlpJobName); + final SettableApiFuture done = SettableApiFuture.create(); + + // setup a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // wait for job completion + try{ + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); + } // retrieve completed job status DlpJob completedJob = @@ -357,12 +399,9 @@ private static void calculateKAnonymity( KAnonymityResult kanonymityResult = riskDetails.getKAnonymityResult(); for (KAnonymityHistogramBucket result : kanonymityResult.getEquivalenceClassHistogramBucketsList()) { - System.out.println( - "Bucket size range: [" - + result.getEquivalenceClassSizeLowerBound() - + ", " - + result.getEquivalenceClassSizeUpperBound() - + "]"); + System.out.printf("Bucket size range: [%d, %d]\n", + result.getEquivalenceClassSizeLowerBound(), + result.getEquivalenceClassSizeUpperBound()); for (KAnonymityEquivalenceClass bucket : result.getBucketValuesList()) { List quasiIdValues = @@ -456,8 +495,33 @@ private static void calculateLDiversity( DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); String dlpJobName = dlpJob.getName(); - // wait on job completion - waitOnJobCompletion(projectId, subscriptionId, dlpJobName); + final SettableApiFuture done = SettableApiFuture.create(); + + // setup a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // wait for job completion + try{ + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); + } // retrieve completed job status DlpJob completedJob = @@ -481,12 +545,9 @@ private static void calculateLDiversity( System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) { - System.out.println( - "\t\tSensitive value " - + valueFrequency.getValue().toString() - + " occurs " - + valueFrequency.getCount() - + " time(s)."); + System.out.printf("\t\tSensitive value %s occurs %d time(s).\n", + valueFrequency.getValue().toString(), + valueFrequency.getCount()); } } } @@ -572,11 +633,11 @@ public static void main(String[] args) throws Exception { if (cmd.hasOption("n")) { // numerical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); - calculateNumericalStats(projectId, datasetId, tableId, columnName, topicId, subscriptionId); + numericalStatsAnalysis(projectId, datasetId, tableId, columnName, topicId, subscriptionId); } else if (cmd.hasOption("c")) { // categorical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); - calculateCategoricalStats(projectId, datasetId, tableId, columnName, topicId, subscriptionId); + categoricalStatsAnalysis(projectId, datasetId, tableId, columnName, topicId, subscriptionId); } else if (cmd.hasOption("k")) { // k-anonymity analysis List quasiIdColumnNames = diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index 3f163e42a29..2ea7a41845c 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -20,14 +20,20 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.io.PrintStream; +import java.nio.file.Files; + +import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; +import java.nio.file.Paths; +import java.util.Arrays; @RunWith(JUnit4.class) // CHECKSTYLE OFF: AbbreviationAsWordInName @@ -67,28 +73,32 @@ public void testDeidStringMasksCharacters() throws Exception { } @Test - public void testDeidStringPerformsFpe() throws Exception { + public void testDeidReidFpe() throws Exception { + + // Test DeID String text = "\"My SSN is 372819127\""; DeIdentification.main( new String[] { "-f", text, "-wrappedKey", wrappedKey, - "-keyName", keyName + "-keyName", keyName, + "-commonAlphabet", "NUMERIC", }); String output = bout.toString(); assertFalse( "Response contains original SSN.", output.contains("372819127")); - assertThat(output, containsString("My SSN is ")); + assertTrue(output.matches("My SSN is \\d+\n")); } @Test public void testDeidentifyWithDateShift() throws Exception { + String outputPath = "src/test/resources/results.temp.csv"; DeIdentification.main( new String[] { "-d", "-inputCsvPath", "src/test/resources/dates.csv", - "-outputCsvPath", "src/test/resources/results.temp.csv", + "-outputCsvPath", outputPath, "-dateFields", "birth_date,register_date", "-lowerBoundDays", "5", "-upperBoundDays", "5", @@ -99,6 +109,13 @@ public void testDeidentifyWithDateShift() throws Exception { String output = bout.toString(); assertThat( output, containsString("Successfully saved date-shift output to: results.temp.csv")); + + // Compare the result against an expected output file + byte[] resultCsv = Files.readAllBytes(Paths.get(outputPath)); + byte[] correctCsv = Files.readAllBytes(Paths.get( + "src/test/resources/results.correct.csv")); + + assertTrue(Arrays.equals(resultCsv, correctCsv)); } @After diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 9aebae7d8d8..7b07d1b6254 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -93,7 +93,6 @@ public void testImageFileInspectionReturnsInfoTypes() throws Exception { // Requires that bucket by the specified name exists @Test - @Ignore // Pubsub tests are flakey when run consecutively public void testGcsFileInspectionReturnsInfoTypes() throws Exception { Inspect.main(new String[] { "-gcs", @@ -111,7 +110,6 @@ public void testGcsFileInspectionReturnsInfoTypes() throws Exception { // Requires a Datastore kind containing an entity // with phone number and email address properties. @Test - @Ignore // Pubsub tests are flakey when run consecutively public void testDatastoreInspectionReturnsInfoTypes() throws Exception { Inspect.main(new String[] { "-ds", @@ -126,7 +124,6 @@ public void testDatastoreInspectionReturnsInfoTypes() throws Exception { } @Test - @Ignore // Pubsub tests are flakey when run consecutively public void testBigqueryInspectionReturnsInfoTypes() throws Exception { Inspect.main(new String[] { "-bq", diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index e15675eef60..76d75baf1d3 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -53,7 +53,6 @@ public void setUp() { } @Test - @Ignore // Pubsub tests are flakey when run consecutively public void testNumericalStats() throws Exception { RiskAnalysis.main( new String[] { @@ -68,7 +67,6 @@ public void testNumericalStats() throws Exception { } @Test - @Ignore // Pubsub tests are flakey when run consecutively public void testCategoricalStats() throws Exception { RiskAnalysis.main( new String[] { @@ -81,11 +79,11 @@ public void testCategoricalStats() throws Exception { }); String output = bout.toString(); - assertThat(output, containsString("Most common value occurs")); + assertTrue(Pattern.compile("Most common value occurs \\d time").matcher(output).find()); + assertTrue(Pattern.compile("Least common value occurs \\d time").matcher(output).find()); } @Test - @Ignore // Pubsub tests are flakey when run consecutively public void testKAnonymity() throws Exception { RiskAnalysis.main(new String[]{ "-k", @@ -102,7 +100,6 @@ public void testKAnonymity() throws Exception { } @Test - @Ignore // Pubsub tests are flakey when run consecutively public void testLDiversity() throws Exception { RiskAnalysis.main( new String[] { diff --git a/dlp/src/test/java/com/example/dlp/TemplatesIT.java b/dlp/src/test/java/com/example/dlp/TemplatesIT.java index 7c8b921b294..3d0ef2356a9 100644 --- a/dlp/src/test/java/com/example/dlp/TemplatesIT.java +++ b/dlp/src/test/java/com/example/dlp/TemplatesIT.java @@ -23,7 +23,6 @@ import java.io.ByteArrayOutputStream; import java.io.PrintStream; -import java.util.Date; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.junit.After; @@ -31,6 +30,7 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; +import java.util.UUID; @RunWith(JUnit4.class) // CHECKSTYLE OFF: AbbreviationAsWordInName @@ -80,7 +80,7 @@ public void testDeleteInspectTemplate() throws Exception { // Extract a Template ID Templates.main(new String[] { "-l" }); String output = bout.toString(); - Matcher templateIds = Pattern.compile("template[0-9]+").matcher(output); + Matcher templateIds = Pattern.compile("template(\\w|\\-)+").matcher(output); assertTrue(templateIds.find()); String templateId = templateIds.group(0); bout.reset(); diff --git a/dlp/src/test/java/com/example/dlp/TriggersIT.java b/dlp/src/test/java/com/example/dlp/TriggersIT.java index 352706dcff7..c3e9ef521a0 100644 --- a/dlp/src/test/java/com/example/dlp/TriggersIT.java +++ b/dlp/src/test/java/com/example/dlp/TriggersIT.java @@ -23,7 +23,7 @@ import java.io.ByteArrayOutputStream; import java.io.PrintStream; -import java.util.Date; +import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.junit.After; diff --git a/dlp/src/test/resources/results.correct.csv b/dlp/src/test/resources/results.correct.csv new file mode 100644 index 00000000000..5b078fe825a --- /dev/null +++ b/dlp/src/test/resources/results.correct.csv @@ -0,0 +1,5 @@ +name,birth_date,credit_card,register_date +Ann,1970-01-06,4532908762519852,1996-07-26 +James,1988-03-11,4301261899725540,2001-04-14 +Dan,1945-08-19,4620761856015295,2011-11-20 +Laura,1992-11-08,4564981067258901,2017-01-09 From 9af924dd1fee614642dfa2f9d93771e5ff18d4fd Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Mon, 19 Mar 2018 18:16:22 -0700 Subject: [PATCH 19/23] Remove mvn clean verify failure --- dlp/src/main/java/com/example/dlp/DeIdentification.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 4d11cfd37bc..914c546cb97 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -134,7 +134,6 @@ private static void deIdentifyWithMask( System.out.println(result); } catch (Exception e) { System.out.println("Error in deidentifyWithMask: " + e.getMessage()); - System.out.println(e.getStackTrace()); } } // [END dlp_deidentify_mask] From cef37d541b309a5f52aa47a567c514edc7d59ff3 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Mon, 19 Mar 2018 19:45:48 -0700 Subject: [PATCH 20/23] Add ReID FPE sample --- .../com/example/dlp/DeIdentification.java | 154 +++++++++++++++--- .../com/example/dlp/DeIdentificationIT.java | 24 ++- 2 files changed, 154 insertions(+), 24 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 914c546cb97..5bb4b5e5005 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -19,24 +19,29 @@ import com.google.cloud.ServiceOptions; import com.google.cloud.dlp.v2.DlpServiceClient; import com.google.common.io.BaseEncoding; -import com.google.privacy.dlp.v2.ByteContentItem; import com.google.privacy.dlp.v2.CharacterMaskConfig; import com.google.privacy.dlp.v2.ContentItem; import com.google.privacy.dlp.v2.CryptoKey; import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig; import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; +import com.google.privacy.dlp.v2.CustomInfoType; +import com.google.privacy.dlp.v2.CustomInfoType.SurrogateType; import com.google.privacy.dlp.v2.DateShiftConfig; import com.google.privacy.dlp.v2.DeidentifyConfig; import com.google.privacy.dlp.v2.DeidentifyContentRequest; import com.google.privacy.dlp.v2.DeidentifyContentResponse; import com.google.privacy.dlp.v2.FieldId; import com.google.privacy.dlp.v2.FieldTransformation; +import com.google.privacy.dlp.v2.InfoType; import com.google.privacy.dlp.v2.InfoTypeTransformations; import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation; +import com.google.privacy.dlp.v2.InspectConfig; import com.google.privacy.dlp.v2.KmsWrappedCryptoKey; import com.google.privacy.dlp.v2.PrimitiveTransformation; import com.google.privacy.dlp.v2.ProjectName; import com.google.privacy.dlp.v2.RecordTransformations; +import com.google.privacy.dlp.v2.ReidentifyContentRequest; +import com.google.privacy.dlp.v2.ReidentifyContentResponse; import com.google.privacy.dlp.v2.Table; import com.google.privacy.dlp.v2.Value; import com.google.protobuf.ByteString; @@ -46,7 +51,6 @@ import java.io.File; import java.io.FileReader; import java.io.FileWriter; -import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.nio.file.Paths; import java.time.LocalDate; @@ -82,14 +86,11 @@ private static void deIdentifyWithMask( // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - ByteContentItem byteContentItem = - ByteContentItem.newBuilder() - .setType(ByteContentItem.BytesType.TEXT_UTF8) - .setData(ByteString.copyFrom(string, StandardCharsets.UTF_8)) + ContentItem contentItem = + ContentItem.newBuilder() + .setValue(string) .build(); - ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); - CharacterMaskConfig characterMaskConfig = CharacterMaskConfig.newBuilder() .setMaskingCharacter(maskingCharacter.toString()) @@ -130,7 +131,7 @@ private static void deIdentifyWithMask( // Print the character-masked input value // e.g. "My SSN is 123456789" --> "My SSN is *********" - String result = response.getItem().getByteItem().getData().toStringUtf8(); + String result = response.getItem().getValue(); System.out.println(result); } catch (Exception e) { System.out.println("Error in deidentifyWithMask: " + e.getMessage()); @@ -154,16 +155,11 @@ private static void deIdentifyWithFpe( FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey, - String projectId) { + String projectId, + String surrogateType) { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - - ByteContentItem byteContentItem = - ByteContentItem.newBuilder() - .setData(ByteString.copyFrom(string, StandardCharsets.UTF_8)) - .build(); - - ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); + ContentItem contentItem = ContentItem.newBuilder().setValue(string).build(); // Create the format-preserving encryption (FPE) configuration KmsWrappedCryptoKey kmsWrappedCryptoKey = @@ -178,6 +174,7 @@ private static void deIdentifyWithFpe( CryptoReplaceFfxFpeConfig.newBuilder() .setCryptoKey(cryptoKey) .setCommonAlphabet(alphabet) + .setSurrogateInfoType(InfoType.newBuilder().setName(surrogateType).build()) .build(); // Create the deidentification transformation configuration @@ -214,7 +211,7 @@ private static void deIdentifyWithFpe( // Print the deidentified input value // e.g. "My SSN is 123456789" --> "My SSN is 7261298621" - String result = response.getItem().getByteItem().getData().toStringUtf8(); + String result = response.getItem().getValue(); System.out.println(result); } catch (Exception e) { System.out.println("Error in deidentifyWithFpe: " + e.getMessage()); @@ -222,6 +219,105 @@ private static void deIdentifyWithFpe( } // [END dlp_deidentify_fpe] + // [START dlp_reidentify_fpe] + /** + * Reidentify a string by encrypting sensitive information while preserving format. + * + * @param string The string to reidentify. + * @param alphabet The set of characters used when encrypting the input. For more information, + * see cloud.google.com/dlp/docs/reference/rest/v2/content/deidentify + * @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key. + * @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key. + * @param projectId ID of Google Cloud project to run the API under. + * @param surrogateType The name of the surrogate custom info type to used + * during the encryption process. + */ + private static void reIdentifyWithFpe( + String string, + FfxCommonNativeAlphabet alphabet, + String keyName, + String wrappedKey, + String projectId, + String surrogateType) { + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + ContentItem contentItem = ContentItem.newBuilder().setValue(string).build(); + + + InfoType surrogateTypeObject = InfoType.newBuilder() + .setName(surrogateType) + .build(); + + // Create the format-preserving encryption (FPE) configuration + KmsWrappedCryptoKey kmsWrappedCryptoKey = + KmsWrappedCryptoKey.newBuilder() + .setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey))) + .setCryptoKeyName(keyName) + .build(); + + CryptoKey cryptoKey = CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build(); + + CryptoReplaceFfxFpeConfig cryptoReplaceFfxFpeConfig = + CryptoReplaceFfxFpeConfig.newBuilder() + .setCryptoKey(cryptoKey) + .setCommonAlphabet(alphabet) + .setSurrogateInfoType(surrogateTypeObject) + .build(); + + // Create the deidentification transformation configuration + PrimitiveTransformation primitiveTransformation = + PrimitiveTransformation.newBuilder() + .setCryptoReplaceFfxFpeConfig(cryptoReplaceFfxFpeConfig) + .build(); + + InfoTypeTransformation infoTypeTransformationObject = + InfoTypeTransformation.newBuilder() + .setPrimitiveTransformation(primitiveTransformation) + .addInfoTypes(surrogateTypeObject) + .build(); + + InfoTypeTransformations infoTypeTransformationArray = + InfoTypeTransformations.newBuilder() + .addTransformations(infoTypeTransformationObject) + .build(); + + // Create the inspection config + CustomInfoType customInfoType = CustomInfoType.newBuilder() + .setInfoType(surrogateTypeObject) + .setSurrogateType(SurrogateType.newBuilder().build()) + .build(); + + InspectConfig inspectConfig = + InspectConfig.newBuilder() + .addCustomInfoTypes(customInfoType).build(); + + // Create the reidentification request object + DeidentifyConfig reidentifyConfig = + DeidentifyConfig.newBuilder() + .setInfoTypeTransformations(infoTypeTransformationArray) + .build(); + + ReidentifyContentRequest request = + ReidentifyContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setReidentifyConfig(reidentifyConfig) + .setInspectConfig(inspectConfig) + .setItem(contentItem) + .build(); + + // Execute the deidentification request + ReidentifyContentResponse response = dlpServiceClient.reidentifyContent(request); + + // Print the reidentified input value + // e.g. "My SSN is 7261298621" --> "My SSN is 123456789" + String result = response.getItem().getValue(); + System.out.println(result); + } catch (Exception e) { + System.out.println("Error in reidentifyWithFpe: " + e.getMessage()); + } + } + // [END dlp_reidentify_fpe] + // [START dlp_deidentify_date_shift] /** * @@ -413,6 +509,10 @@ public static void main(String[] args) throws Exception { new Option("f", "fpe", true, "Deidentify with format-preserving encryption."); optionsGroup.addOption(deidentifyFpeOption); + Option reidentifyFpeOption = + new Option("r", "reid", true, "Reidentify with format-preserving encryption."); + optionsGroup.addOption(reidentifyFpeOption); + Option deidentifyDateShiftOption = new Option("d", "date", false, "Deidentify dates in a CSV file."); optionsGroup.addOption(deidentifyDateShiftOption); @@ -424,6 +524,10 @@ public static void main(String[] args) throws Exception { Option.builder("maskingCharacter").hasArg(true).required(false).build(); commandLineOptions.addOption(maskingCharacterOption); + Option surrogateTypeOption = + Option.builder("surrogateType").hasArg(true).required(false).build(); + commandLineOptions.addOption(surrogateTypeOption); + Option numberToMaskOption = Option.builder("numberToMask").hasArg(true).required(false).build(); commandLineOptions.addOption(numberToMaskOption); @@ -489,11 +593,12 @@ public static void main(String[] args) throws Exception { String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); String keyName = cmd.getOptionValue(keyNameOption.getOpt()); String val = cmd.getOptionValue(deidentifyFpeOption.getOpt()); + String surrogateType = cmd.getOptionValue(surrogateTypeOption.getOpt()); FfxCommonNativeAlphabet alphabet = FfxCommonNativeAlphabet.valueOf( cmd.getOptionValue( alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); - deIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId); + deIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId, surrogateType); } else if (cmd.hasOption("d")) { //deidentify with date shift String inputCsv = cmd.getOptionValue(inputCsvPathOption.getOpt()); @@ -518,6 +623,17 @@ public static void main(String[] args) throws Exception { wrappedKey, keyName, projectId); + } else if (cmd.hasOption("r")) { + // reidentification with FPE + String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); + String keyName = cmd.getOptionValue(keyNameOption.getOpt()); + String val = cmd.getOptionValue(reidentifyFpeOption.getOpt()); + String surrogateType = cmd.getOptionValue(surrogateTypeOption.getOpt()); + FfxCommonNativeAlphabet alphabet = + FfxCommonNativeAlphabet.valueOf( + cmd.getOptionValue( + alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); + reIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId, surrogateType); } } } diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index 2ea7a41845c..2cae38da069 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -76,19 +76,33 @@ public void testDeidStringMasksCharacters() throws Exception { public void testDeidReidFpe() throws Exception { // Test DeID - String text = "\"My SSN is 372819127\""; + String text = "My SSN is 372819127"; DeIdentification.main( new String[] { - "-f", text, + "-f", "\"" + text + "\"", "-wrappedKey", wrappedKey, "-keyName", keyName, "-commonAlphabet", "NUMERIC", + "-surrogateType", "SSN_TOKEN" }); - String output = bout.toString(); + String deidOutput = bout.toString(); assertFalse( "Response contains original SSN.", - output.contains("372819127")); - assertTrue(output.matches("My SSN is \\d+\n")); + deidOutput.contains("372819127")); + assertTrue(deidOutput.matches("My SSN is SSN_TOKEN\\(9\\):\\d+\n")); + + // Test ReID + bout.flush(); + DeIdentification.main( + new String[] { + "-r", deidOutput.toString().trim(), + "-wrappedKey", wrappedKey, + "-keyName", keyName, + "-commonAlphabet", "NUMERIC", + "-surrogateType", "SSN_TOKEN" + }); + String reidOutput = bout.toString(); + assertThat(reidOutput, containsString(text)); } @Test From cd7592112c91ef8e3e871801ddc84b4e573ceb1b Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Mon, 19 Mar 2018 21:08:22 -0700 Subject: [PATCH 21/23] Address PR feedback --- .../main/java/com/example/dlp/Inspect.java | 39 +++++++------ .../java/com/example/dlp/RiskAnalysis.java | 55 ++++++++++--------- .../main/java/com/example/dlp/Templates.java | 3 +- dlp/src/test/resources/dates.csv | 2 +- 4 files changed, 53 insertions(+), 46 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 476404606a9..80d1256721e 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -132,8 +132,9 @@ private static void inspectString( } // [END dlp_inspect_string] + // [START dlp_inspect_file] /** - * [START dlp_inspect_file] + * Inspect a local file * * @param filePath The path to a local file to inspect. Can be a text, JPG, or PNG file. * @param minLikelihood The minimum likelihood required before returning a match @@ -217,15 +218,13 @@ private static void inspectFile( System.out.println("No findings."); } } catch (Exception e) { - e.printStackTrace(); System.out.println("Error in inspectFile: " + e.getMessage()); } } // [END dlp_inspect_file] + // [START dlp_inspect_gcs] /** - * [START dlp_inspect_gcs] - * * Inspect GCS file for Info types and wait on job completion using Google Cloud Pub/Sub * notification * @@ -285,7 +284,7 @@ private static void inspectGcsFile( .addActions(action) .build(); - // asynchronously submit an inspect job, and wait on results + // Semi-synchronously submit an inspect job, and wait on results CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() .setParent(ProjectName.of(projectId).toString()) @@ -298,7 +297,7 @@ private static void inspectGcsFile( final SettableApiFuture done = SettableApiFuture.create(); - // setup a Pub/Sub subscriber to listen on the job completion status + // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( ProjectSubscriptionName.of(projectId, subscriptionId), @@ -313,10 +312,11 @@ private static void inspectGcsFile( .build(); subscriber.startAsync(); - // wait for job completion + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions try{ done.get(1, TimeUnit.MINUTES); - Thread.sleep(500); + Thread.sleep(500); // Wait for the job to become available } catch (Exception e){ System.out.println("Unable to verify job completion."); } @@ -338,8 +338,8 @@ private static void inspectGcsFile( System.out.println("No findings."); } } - // [END dlp_inspect_gcs] } + // [END dlp_inspect_gcs] // [START dlp_inspect_datastore] /** @@ -403,7 +403,7 @@ private static void inspectDatastore( .addActions(action) .build(); - // asynchronously submit an inspect job, and wait on results + // Asynchronously submit an inspect job, and wait on results CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() .setParent(ProjectName.of(projectId).toString()) @@ -416,7 +416,7 @@ private static void inspectDatastore( final SettableApiFuture done = SettableApiFuture.create(); - // setup a Pub/Sub subscriber to listen on the job completion status + // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( ProjectSubscriptionName.of(projectId, subscriptionId), @@ -431,10 +431,11 @@ private static void inspectDatastore( .build(); subscriber.startAsync(); - // wait for job completion + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions try{ done.get(1, TimeUnit.MINUTES); - Thread.sleep(500); + Thread.sleep(500); // Wait for the job to become available } catch (Exception e){ System.out.println("Unable to verify job completion."); } @@ -462,8 +463,9 @@ private static void inspectDatastore( } // [END dlp_inspect_datastore] + // [START dlp_inspect_bigquery] /** - * [START dlp_inspect_bigquery] + * Inspect a BigQuery table * * @param projectId The project ID to run the API call under * @param datasetId The ID of the dataset to inspect, e.g. 'my_dataset' @@ -524,7 +526,7 @@ private static void inspectBigquery( .addActions(action) .build(); - // asynchronously submit an inspect job, and wait on results + // Asynchronously submit an inspect job, and wait on results CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder() .setParent(ProjectName.of(projectId).toString()) @@ -535,10 +537,11 @@ private static void inspectBigquery( System.out.println("Job created with ID:" + dlpJob.getName()); - // wait on job completion + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions final SettableApiFuture done = SettableApiFuture.create(); - // setup a Pub/Sub subscriber to listen on the job completion status + // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( ProjectSubscriptionName.of(projectId, subscriptionId), @@ -555,7 +558,7 @@ private static void inspectBigquery( try{ done.get(1, TimeUnit.MINUTES); - Thread.sleep(500); + Thread.sleep(500); // Wait for the job to become available } catch (Exception e){ System.out.println("Unable to verify job completion."); } diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index d3ea4a13fe5..a98a40b2caa 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -83,7 +83,7 @@ private static void numericalStatsAnalysis( String subscriptionId) throws Exception { - // instantiate a client + // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { BigQueryTable bigQueryTable = BigQueryTable.newBuilder() @@ -104,7 +104,7 @@ private static void numericalStatsAnalysis( PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); - // create action to publish job status notifications over Google Cloud Pub/Sub + // Create action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); RiskAnalysisJobConfig riskAnalysisJobConfig = @@ -125,7 +125,7 @@ private static void numericalStatsAnalysis( final SettableApiFuture done = SettableApiFuture.create(); - // setup a Pub/Sub subscriber to listen on the job completion status + // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( ProjectSubscriptionName.newBuilder() @@ -143,15 +143,16 @@ private static void numericalStatsAnalysis( .build(); subscriber.startAsync(); - // wait for job completion + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions try{ done.get(1, TimeUnit.MINUTES); - Thread.sleep(500); + Thread.sleep(500); // Wait for the job to become available } catch (TimeoutException e) { System.out.println("Unable to verify job completion."); } - // retrieve completed job status + // Retrieve completed job status DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); @@ -198,7 +199,7 @@ private static void categoricalStatsAnalysis( String topicId, String subscriptionId){ - // instantiate a client + // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { FieldId fieldId = FieldId.newBuilder().setName(columnName).build(); @@ -222,7 +223,7 @@ private static void categoricalStatsAnalysis( .setTopic(topicName.toString()) .build(); - // create action to publish job status notifications over Google Cloud Pub/Sub + // Create action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); RiskAnalysisJobConfig riskAnalysisJobConfig = @@ -243,7 +244,7 @@ private static void categoricalStatsAnalysis( final SettableApiFuture done = SettableApiFuture.create(); - // setup a Pub/Sub subscriber to listen on the job completion status + // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( ProjectSubscriptionName.newBuilder() @@ -261,15 +262,16 @@ private static void categoricalStatsAnalysis( .build(); subscriber.startAsync(); - // wait for job completion + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions try{ done.get(1, TimeUnit.MINUTES); - Thread.sleep(500); + Thread.sleep(500); // Wait for the job to become available } catch (TimeoutException e) { System.out.println("Unable to verify job completion."); } - // retrieve completed job status + // Retrieve completed job status DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); @@ -316,7 +318,7 @@ private static void calculateKAnonymity( String topicId, String subscriptionId) throws Exception { - // instantiate a client + // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { List quasiIdFields = @@ -342,7 +344,7 @@ private static void calculateKAnonymity( PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); - // create action to publish job status notifications over Google Cloud Pub/Sub + // Create action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); RiskAnalysisJobConfig riskAnalysisJobConfig = @@ -363,7 +365,7 @@ private static void calculateKAnonymity( final SettableApiFuture done = SettableApiFuture.create(); - // setup a Pub/Sub subscriber to listen on the job completion status + // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( ProjectSubscriptionName.newBuilder() @@ -381,15 +383,16 @@ private static void calculateKAnonymity( .build(); subscriber.startAsync(); - // wait for job completion + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions try{ done.get(1, TimeUnit.MINUTES); - Thread.sleep(500); + Thread.sleep(500); // Wait for the job to become available } catch (TimeoutException e) { System.out.println("Unable to verify job completion."); } - // retrieve completed job status + // Retrieve completed job status DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); @@ -421,9 +424,8 @@ private static void calculateKAnonymity( } // [END dlp_k_anonymity] + // [START dlp_l_diversity] /** - * [START dlp_l_diversity] - * * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table. * * @param projectId The Google Cloud Platform project ID to run the API call under. @@ -445,7 +447,7 @@ private static void calculateLDiversity( String subscriptionId) throws Exception { - // instantiate a client + // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { FieldId sensitiveAttributeField = FieldId.newBuilder().setName(sensitiveAttribute).build(); @@ -476,7 +478,7 @@ private static void calculateLDiversity( PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); - // create action to publish job status notifications over Google Cloud Pub/Sub + // Create action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); RiskAnalysisJobConfig riskAnalysisJobConfig = @@ -497,7 +499,7 @@ private static void calculateLDiversity( final SettableApiFuture done = SettableApiFuture.create(); - // setup a Pub/Sub subscriber to listen on the job completion status + // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( ProjectSubscriptionName.newBuilder() @@ -515,10 +517,11 @@ private static void calculateLDiversity( .build(); subscriber.startAsync(); - // wait for job completion + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions try{ done.get(1, TimeUnit.MINUTES); - Thread.sleep(500); + Thread.sleep(500); // Wait for the job to become available } catch (TimeoutException e) { System.out.println("Unable to verify job completion."); } @@ -554,8 +557,8 @@ private static void calculateLDiversity( } catch (Exception e) { System.out.println("Error in lDiversityAnalysis: " + e.getMessage()); } - // [END dlp_l_diversity] } + // [END dlp_l_diversity] /** * Command line application to perform risk analysis using the Data Loss Prevention API. Supported diff --git a/dlp/src/main/java/com/example/dlp/Templates.java b/dlp/src/main/java/com/example/dlp/Templates.java index 91bbab558ae..cfdb1e339de 100644 --- a/dlp/src/main/java/com/example/dlp/Templates.java +++ b/dlp/src/main/java/com/example/dlp/Templates.java @@ -43,8 +43,9 @@ public class Templates { + // [START dlp_create_inspect_template] /** - * [START dlp_create_inspect_template] + * Create a new DLP inspection configuration template. * * @param displayName (Optional) The human-readable name to give the template * @param projectId Google Cloud Project ID to call the API under diff --git a/dlp/src/test/resources/dates.csv b/dlp/src/test/resources/dates.csv index 676c2b4567f..290a85dec68 100644 --- a/dlp/src/test/resources/dates.csv +++ b/dlp/src/test/resources/dates.csv @@ -2,4 +2,4 @@ name,birth_date,credit_card,register_date Ann,01/01/1970,4532908762519852,07/21/1996 James,03/06/1988,4301261899725540,04/09/2001 Dan,08/14/1945,4620761856015295,11/15/2011 -Laura,11/03/1992,4564981067258901,01/04/2017 \ No newline at end of file +Laura,11/03/1992,4564981067258901,01/04/2017 From 8ba4acbffe918ce787fc704c7b87342e78faf1dd Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Mon, 19 Mar 2018 22:26:19 -0700 Subject: [PATCH 22/23] Add k-map sample --- .../java/com/example/dlp/RiskAnalysis.java | 199 +++++++++++++++++- .../java/com/example/dlp/RiskAnalysisIT.java | 22 +- 2 files changed, 216 insertions(+), 5 deletions(-) diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index a98a40b2caa..b4b7393aafa 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -27,6 +27,9 @@ import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult; import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass; import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues; import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult; import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass; import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket; @@ -35,9 +38,12 @@ import com.google.privacy.dlp.v2.DlpJob; import com.google.privacy.dlp.v2.FieldId; import com.google.privacy.dlp.v2.GetDlpJobRequest; +import com.google.privacy.dlp.v2.InfoType; import com.google.privacy.dlp.v2.PrivacyMetric; import com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig; import com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig; +import com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig; +import com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField; import com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig; import com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig; import com.google.privacy.dlp.v2.ProjectName; @@ -46,7 +52,10 @@ import com.google.privacy.dlp.v2.ValueFrequency; import com.google.pubsub.v1.ProjectSubscriptionName; import com.google.pubsub.v1.ProjectTopicName; + +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -59,6 +68,7 @@ import org.apache.commons.cli.OptionGroup; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; +import java.util.Iterator; public class RiskAnalysis { @@ -175,6 +185,8 @@ private static void numericalStatsAnalysis( } lastValue = currentValue; } + } catch (Exception e) { + System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage()); } } // [END dlp_numerical_stats] @@ -419,7 +431,7 @@ private static void calculateKAnonymity( } } } catch (Exception e) { - System.out.println("Error in kAnonymityAnalysis: " + e.getMessage()); + System.out.println("Error in calculateKAnonymity: " + e.getMessage()); } } // [END dlp_k_anonymity] @@ -555,11 +567,162 @@ private static void calculateLDiversity( } } } catch (Exception e) { - System.out.println("Error in lDiversityAnalysis: " + e.getMessage()); + System.out.println("Error in calculateLDiversity: " + e.getMessage()); } } // [END dlp_l_diversity] + // [START dlp_k_map] + /** + * Calculate k-map risk estimation for an attribute relative to quasi-identifiers in a BigQuery table. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param quasiIds A set of column names that form a composite key ('quasi-identifiers'). + * @param infoTypes The infoTypes corresponding to each quasi-id column + * @param regionCode An ISO-3166-1 region code specifying the k-map distribution region + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. + */ + private static void calculateKMap( + String projectId, + String datasetId, + String tableId, + List quasiIds, + List infoTypes, + String regionCode, + String topicId, + String subscriptionId) + throws Exception { + + // Instantiates a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + Iterator quasiIdsIterator = quasiIds.iterator(); + Iterator infoTypesIterator = infoTypes.iterator(); + + if (quasiIds.size() != infoTypes.size()) { + throw new IllegalArgumentException("The numbers of quasi-IDs and infoTypes must be equal!"); + } + + ArrayList taggedFields = new ArrayList(); + + while (quasiIdsIterator.hasNext() || infoTypesIterator.hasNext()) { + taggedFields.add(TaggedField.newBuilder() + .setField(FieldId.newBuilder().setName(quasiIdsIterator.next()).build()) + .setInfoType(infoTypesIterator.next()) + .build()); + } + + KMapEstimationConfig kmapConfig = + KMapEstimationConfig.newBuilder() + .addAllQuasiIds(taggedFields) + .setRegionCode(regionCode) + .build(); + + BigQueryTable bigQueryTable = + BigQueryTable.newBuilder() + .setProjectId(projectId) + .setDatasetId(datasetId) + .setTableId(tableId) + .build(); + + PrivacyMetric privacyMetric = + PrivacyMetric.newBuilder().setKMapEstimationConfig(kmapConfig).build(); + + String topicName = String.format("projects/%s/topics/%s", projectId, topicId); + + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); + + // Create action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() + .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) + .build(); + + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setRiskJob(riskAnalysisJobConfig) + .build(); + + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); + + final SettableApiFuture done = SettableApiFuture.create(); + + // Set up a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions + try{ + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); // Wait for the job to become available + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); + } + + // retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); + + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + + KMapEstimationResult kmapResult = riskDetails.getKMapEstimationResult(); + for (KMapEstimationHistogramBucket result : + kmapResult.getKMapEstimationHistogramList()) { + + System.out.printf("\tAnonymity range: [%d, %d]\n", + result.getMinAnonymity(), + result.getMaxAnonymity()); + System.out.printf("\tSize: %d\n", result.getBucketSize()); + + for (KMapEstimationQuasiIdValues valueBucket : result.getBucketValuesList()) { + String quasiIdValues = + valueBucket + .getQuasiIdsValuesList() + .stream() + .map(v -> { + String s = v.toString(); + return s.substring(s.indexOf(':') + 1).trim(); + }) + .collect(Collectors.joining(", ")); + + + System.out.printf("\tValues: {%s}\n", quasiIdValues); + System.out.printf("\tEstimated k-map anonymity: %d\n", + valueBucket.getEstimatedAnonymity()); + } + } + } catch (Exception e) { + System.out.println("Error in calculateKMap: " + e.getMessage()); + } + } + // [END dlp_k_map] + /** * Command line application to perform risk analysis using the Data Loss Prevention API. Supported * data format: BigQuery tables @@ -575,9 +738,12 @@ public static void main(String[] args) throws Exception { Option categoricalAnalysisOption = new Option("c", "categorical"); optionsGroup.addOption(categoricalAnalysisOption); - Option kanonymityOption = new Option("k", "kAnonymity"); + Option kanonymityOption = new Option("a", "kAnonymity"); optionsGroup.addOption(kanonymityOption); + Option kmapOption = new Option("m", "kAnonymity"); + optionsGroup.addOption(kmapOption); + Option ldiversityOption = new Option("l", "lDiversity"); optionsGroup.addOption(ldiversityOption); @@ -607,10 +773,19 @@ public static void main(String[] args) throws Exception { Option.builder("sensitiveAttribute").hasArg(true).required(false).build(); commandLineOptions.addOption(sensitiveAttributeOption); + Option regionCodeOption = + Option.builder("regionCode").hasArg(true).required(false).build(); + commandLineOptions.addOption(regionCodeOption); + Option quasiIdColumnNamesOption = Option.builder("quasiIdColumnNames").hasArg(true).required(false).build(); + quasiIdColumnNamesOption.setArgs(Option.UNLIMITED_VALUES); commandLineOptions.addOption(quasiIdColumnNamesOption); + Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); + infoTypesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(infoTypesOption); + CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; @@ -630,9 +805,20 @@ public static void main(String[] args) throws Exception { String projectId = cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + String regionCode = cmd.getOptionValue(regionCodeOption.getOpt(), "US"); + String topicId = cmd.getOptionValue(topicIdOption.getOpt()); String subscriptionId = cmd.getOptionValue(subscriptionIdOption.getOpt()); + List infoTypesList = Collections.emptyList(); + if (cmd.hasOption(infoTypesOption.getOpt())) { + infoTypesList = new ArrayList<>(); + String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); + for (String infoType : infoTypes) { + infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); + } + } + if (cmd.hasOption("n")) { // numerical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); @@ -641,12 +827,17 @@ public static void main(String[] args) throws Exception { // categorical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); categoricalStatsAnalysis(projectId, datasetId, tableId, columnName, topicId, subscriptionId); - } else if (cmd.hasOption("k")) { + } else if (cmd.hasOption("a")) { // k-anonymity analysis List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); calculateKAnonymity( projectId, datasetId, tableId, quasiIdColumnNames, topicId, subscriptionId); + } else if (cmd.hasOption("m")) { + // k-map analysis + List quasiIdColumnNames = + Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); + calculateKMap(projectId, datasetId, tableId, quasiIdColumnNames, infoTypesList, regionCode, topicId, subscriptionId); } else if (cmd.hasOption("l")) { // l-diversity analysis String sensitiveAttribute = cmd.getOptionValue(sensitiveAttributeOption.getOpt()); diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index 76d75baf1d3..c75c5279be3 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -86,7 +86,7 @@ public void testCategoricalStats() throws Exception { @Test public void testKAnonymity() throws Exception { RiskAnalysis.main(new String[]{ - "-k", + "-a", "-datasetId", "integration_tests_dlp", "-tableId", "harmful", "-quasiIdColumnNames", "Age", "Mystery", @@ -117,6 +117,26 @@ public void testLDiversity() throws Exception { assertTrue(output.contains("Sensitive value string_value: \"James\"")); } + @Test + public void testKMap() throws Exception { + RiskAnalysis.main( + new String[] { + "-m", + "-datasetId", "integration_tests_dlp", + "-tableId", "harmful", + "-topicId", topicId, + "-subscriptionId", subscriptionId, + "-regionCode", "US", + "-quasiIdColumnNames", "Age", "Gender", + "-infoTypes", "AGE", "GENDER" + }); + String output = bout.toString(); + + assertTrue(Pattern.compile("Anonymity range: \\[\\d, \\d]").matcher(output).find()); + assertTrue(Pattern.compile("Size: \\d").matcher(output).find()); + assertTrue(Pattern.compile("Values: \\{\\d{2}, \"Female\"\\}").matcher(output).find()); + } + @After public void tearDown() { System.setOut(null); From 58625f9b345cc62ea2085c1c4b4946ebc62bde04 Mon Sep 17 00:00:00 2001 From: Jisha Abubaker Date: Mon, 19 Mar 2018 23:28:52 -0700 Subject: [PATCH 23/23] checkstyle fixes --- dlp/pom.xml | 1 + .../com/example/dlp/DeIdentification.java | 45 ++-- .../main/java/com/example/dlp/Inspect.java | 94 ++++---- .../main/java/com/example/dlp/Metadata.java | 6 +- .../main/java/com/example/dlp/QuickStart.java | 4 +- .../java/com/example/dlp/RiskAnalysis.java | 216 +++++++++--------- .../main/java/com/example/dlp/Templates.java | 13 +- .../main/java/com/example/dlp/Triggers.java | 9 +- .../com/example/dlp/DeIdentificationIT.java | 72 +++--- .../test/java/com/example/dlp/InspectIT.java | 89 +++++--- dlp/src/test/java/com/example/dlp/JobsIT.java | 20 +- .../test/java/com/example/dlp/MetadataIT.java | 10 +- .../test/java/com/example/dlp/RedactIT.java | 12 +- .../java/com/example/dlp/RiskAnalysisIT.java | 99 +++++--- .../java/com/example/dlp/TemplatesIT.java | 30 ++- .../test/java/com/example/dlp/TriggersIT.java | 40 ++-- 16 files changed, 407 insertions(+), 353 deletions(-) diff --git a/dlp/pom.xml b/dlp/pom.xml index d1adb7f88d5..9108bd49f4e 100644 --- a/dlp/pom.xml +++ b/dlp/pom.xml @@ -30,6 +30,7 @@ com.google.cloud.samples shared-configuration 1.0.8 + diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 5bb4b5e5005..8ffa46bc0e9 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -86,10 +86,7 @@ private static void deIdentifyWithMask( // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - ContentItem contentItem = - ContentItem.newBuilder() - .setValue(string) - .build(); + ContentItem contentItem = ContentItem.newBuilder().setValue(string).build(); CharacterMaskConfig characterMaskConfig = CharacterMaskConfig.newBuilder() @@ -99,9 +96,7 @@ private static void deIdentifyWithMask( // Create the deidentification transformation configuration PrimitiveTransformation primitiveTransformation = - PrimitiveTransformation.newBuilder() - .setCharacterMaskConfig(characterMaskConfig) - .build(); + PrimitiveTransformation.newBuilder().setCharacterMaskConfig(characterMaskConfig).build(); InfoTypeTransformation infoTypeTransformationObject = InfoTypeTransformation.newBuilder() @@ -224,13 +219,13 @@ private static void deIdentifyWithFpe( * Reidentify a string by encrypting sensitive information while preserving format. * * @param string The string to reidentify. - * @param alphabet The set of characters used when encrypting the input. For more information, - * see cloud.google.com/dlp/docs/reference/rest/v2/content/deidentify + * @param alphabet The set of characters used when encrypting the input. For more information, see + * cloud.google.com/dlp/docs/reference/rest/v2/content/deidentify * @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key. * @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key. * @param projectId ID of Google Cloud project to run the API under. - * @param surrogateType The name of the surrogate custom info type to used - * during the encryption process. + * @param surrogateType The name of the surrogate custom info type to used during the encryption + * process. */ private static void reIdentifyWithFpe( String string, @@ -243,10 +238,7 @@ private static void reIdentifyWithFpe( try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { ContentItem contentItem = ContentItem.newBuilder().setValue(string).build(); - - InfoType surrogateTypeObject = InfoType.newBuilder() - .setName(surrogateType) - .build(); + InfoType surrogateTypeObject = InfoType.newBuilder().setName(surrogateType).build(); // Create the format-preserving encryption (FPE) configuration KmsWrappedCryptoKey kmsWrappedCryptoKey = @@ -282,14 +274,14 @@ private static void reIdentifyWithFpe( .build(); // Create the inspection config - CustomInfoType customInfoType = CustomInfoType.newBuilder() - .setInfoType(surrogateTypeObject) - .setSurrogateType(SurrogateType.newBuilder().build()) - .build(); + CustomInfoType customInfoType = + CustomInfoType.newBuilder() + .setInfoType(surrogateTypeObject) + .setSurrogateType(SurrogateType.newBuilder().build()) + .build(); InspectConfig inspectConfig = - InspectConfig.newBuilder() - .addCustomInfoTypes(customInfoType).build(); + InspectConfig.newBuilder().addCustomInfoTypes(customInfoType).build(); // Create the reidentification request object DeidentifyConfig reidentifyConfig = @@ -320,7 +312,6 @@ private static void reIdentifyWithFpe( // [START dlp_deidentify_date_shift] /** - * * @param inputCsvPath The path to the CSV file to deidentify * @param outputCsvPath (Optional) path to the output CSV file * @param dateFields The list of (date) fields in the CSV file to date shift @@ -443,10 +434,12 @@ private static void deidentifyWithDateShift( // write out each row for (Table.Row outputRow : outputRows) { - String row = outputRow.getValuesList() - .stream() - .map(value -> value.getStringValue()) - .collect(Collectors.joining(",")); + String row = + outputRow + .getValuesList() + .stream() + .map(value -> value.getStringValue()) + .collect(Collectors.joining(",")); bufferedWriter.append(row + "\n"); } diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 80d1256721e..d4adc6d33ba 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -54,7 +54,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import javax.activation.MimetypesFileTypeMap; import org.apache.commons.cli.CommandLine; @@ -66,7 +65,6 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; - public class Inspect { /** @@ -178,10 +176,11 @@ private static void inspectFile( } byte[] data = Files.readAllBytes(Paths.get(filePath)); - ByteContentItem byteContentItem = ByteContentItem.newBuilder() - .setType(bytesType) - .setData(ByteString.copyFrom(data)) - .build(); + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(bytesType) + .setData(ByteString.copyFrom(data)) + .build(); ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); FindingLimits findingLimits = @@ -300,24 +299,27 @@ private static void inspectGcsFile( // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.of(projectId, subscriptionId), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) + ProjectSubscriptionName.of(projectId, subscriptionId), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage + .getAttributesMap() + .get("DlpJobName") + .equals(dlpJob.getName())) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) .build(); subscriber.startAsync(); // Wait for job completion semi-synchronously // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions - try{ + try { done.get(1, TimeUnit.MINUTES); Thread.sleep(500); // Wait for the job to become available - } catch (Exception e){ + } catch (Exception e) { System.out.println("Unable to verify job completion."); } @@ -353,7 +355,7 @@ private static void inspectGcsFile( * @param maxFindings max number of findings * @param topicId Google Cloud Pub/Sub topic to notify job status updates * @param subscriptionId Google Cloud Pub/Sub subscription to above topic to receive status - * updates + * updates */ private static void inspectDatastore( String projectId, @@ -419,28 +421,30 @@ private static void inspectDatastore( // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.of(projectId, subscriptionId), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) + ProjectSubscriptionName.of(projectId, subscriptionId), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage + .getAttributesMap() + .get("DlpJobName") + .equals(dlpJob.getName())) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) .build(); subscriber.startAsync(); // Wait for job completion semi-synchronously // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions - try{ + try { done.get(1, TimeUnit.MINUTES); Thread.sleep(500); // Wait for the job to become available - } catch (Exception e){ + } catch (Exception e) { System.out.println("Unable to verify job completion."); } - DlpJob completedJob = dlpServiceClient.getDlpJob( GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build()); @@ -513,9 +517,7 @@ private static void inspectBigquery( ProjectTopicName topic = ProjectTopicName.of(projectId, topicId); Action.PublishToPubSub publishToPubSub = - Action.PublishToPubSub.newBuilder() - .setTopic(topic.toString()) - .build(); + Action.PublishToPubSub.newBuilder().setTopic(topic.toString()).build(); Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); @@ -544,26 +546,28 @@ private static void inspectBigquery( // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.of(projectId, subscriptionId), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) + ProjectSubscriptionName.of(projectId, subscriptionId), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage + .getAttributesMap() + .get("DlpJobName") + .equals(dlpJob.getName())) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) .build(); subscriber.startAsync(); - try{ + try { done.get(1, TimeUnit.MINUTES); Thread.sleep(500); // Wait for the job to become available - } catch (Exception e){ + } catch (Exception e) { System.out.println("Unable to verify job completion."); } - DlpJob completedJob = dlpServiceClient.getDlpJob( GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build()); diff --git a/dlp/src/main/java/com/example/dlp/Metadata.java b/dlp/src/main/java/com/example/dlp/Metadata.java index 57cabfa874c..3b7a85aec5b 100644 --- a/dlp/src/main/java/com/example/dlp/Metadata.java +++ b/dlp/src/main/java/com/example/dlp/Metadata.java @@ -42,10 +42,8 @@ private static void listInfoTypes(String filter, String languageCode) throws Exc // Instantiate a DLP client try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - ListInfoTypesRequest listInfoTypesRequest = ListInfoTypesRequest.newBuilder() - .setFilter(filter) - .setLanguageCode(languageCode) - .build(); + ListInfoTypesRequest listInfoTypesRequest = + ListInfoTypesRequest.newBuilder().setFilter(filter).setLanguageCode(languageCode).build(); ListInfoTypesResponse infoTypesResponse = dlpClient.listInfoTypes(listInfoTypesRequest); List infoTypeDescriptions = infoTypesResponse.getInfoTypesList(); for (InfoTypeDescription infoTypeDescription : infoTypeDescriptions) { diff --git a/dlp/src/main/java/com/example/dlp/QuickStart.java b/dlp/src/main/java/com/example/dlp/QuickStart.java index 2728e4674cb..52946585907 100644 --- a/dlp/src/main/java/com/example/dlp/QuickStart.java +++ b/dlp/src/main/java/com/example/dlp/QuickStart.java @@ -61,9 +61,7 @@ public static void main(String[] args) throws Exception { try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { InspectConfig.FindingLimits findingLimits = - InspectConfig.FindingLimits.newBuilder() - .setMaxFindingsPerItem(maxFindings) - .build(); + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerItem(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index b4b7393aafa..161129d311b 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -52,10 +52,10 @@ import com.google.privacy.dlp.v2.ValueFrequency; import com.google.pubsub.v1.ProjectSubscriptionName; import com.google.pubsub.v1.ProjectTopicName; - import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -68,7 +68,6 @@ import org.apache.commons.cli.OptionGroup; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import java.util.Iterator; public class RiskAnalysis { @@ -138,24 +137,24 @@ private static void numericalStatsAnalysis( // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.newBuilder() - .setProject(projectId) - .setSubscription(subscriptionId) - .build(), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) .build(); subscriber.startAsync(); // Wait for job completion semi-synchronously // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions - try{ + try { done.get(1, TimeUnit.MINUTES); Thread.sleep(500); // Wait for the job to become available } catch (TimeoutException e) { @@ -180,8 +179,7 @@ private static void numericalStatsAnalysis( for (Value quantileValue : result.getQuantileValuesList()) { Double currentValue = quantileValue.getFloatValue(); if (lastValue == null || !lastValue.equals(currentValue)) { - System.out.printf( - "Value at %s %% quantile : %.3f", percent, currentValue); + System.out.printf("Value at %s %% quantile : %.3f", percent, currentValue); } lastValue = currentValue; } @@ -209,7 +207,7 @@ private static void categoricalStatsAnalysis( String tableId, String columnName, String topicId, - String subscriptionId){ + String subscriptionId) { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { @@ -231,9 +229,8 @@ private static void categoricalStatsAnalysis( ProjectTopicName topicName = ProjectTopicName.of(projectId, topicId); - PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder() - .setTopic(topicName.toString()) - .build(); + PublishToPubSub publishToPubSub = + PublishToPubSub.newBuilder().setTopic(topicName.toString()).build(); // Create action to publish job status notifications over Google Cloud Pub/Sub Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); @@ -259,24 +256,24 @@ private static void categoricalStatsAnalysis( // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.newBuilder() - .setProject(projectId) - .setSubscription(subscriptionId) - .build(), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) .build(); subscriber.startAsync(); // Wait for job completion semi-synchronously // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions - try{ + try { done.get(1, TimeUnit.MINUTES); Thread.sleep(500); // Wait for the job to become available } catch (TimeoutException e) { @@ -294,14 +291,14 @@ private static void categoricalStatsAnalysis( for (CategoricalStatsHistogramBucket bucket : result.getValueFrequencyHistogramBucketsList()) { - System.out.printf("Most common value occurs %d time(s).\n", - bucket.getValueFrequencyUpperBound()); - System.out.printf("Least common value occurs %d time(s).\n", - bucket.getValueFrequencyLowerBound()); + System.out.printf( + "Most common value occurs %d time(s).\n", bucket.getValueFrequencyUpperBound()); + System.out.printf( + "Least common value occurs %d time(s).\n", bucket.getValueFrequencyLowerBound()); for (ValueFrequency valueFrequency : bucket.getBucketValuesList()) { - System.out.printf("Value %s occurs %d time(s).\n", - valueFrequency.getValue().toString(), - valueFrequency.getCount()); + System.out.printf( + "Value %s occurs %d time(s).\n", + valueFrequency.getValue().toString(), valueFrequency.getCount()); } } } catch (Exception e) { @@ -380,24 +377,24 @@ private static void calculateKAnonymity( // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.newBuilder() - .setProject(projectId) - .setSubscription(subscriptionId) - .build(), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) .build(); subscriber.startAsync(); // Wait for job completion semi-synchronously // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions - try{ + try { done.get(1, TimeUnit.MINUTES); Thread.sleep(500); // Wait for the job to become available } catch (TimeoutException e) { @@ -414,9 +411,9 @@ private static void calculateKAnonymity( KAnonymityResult kanonymityResult = riskDetails.getKAnonymityResult(); for (KAnonymityHistogramBucket result : kanonymityResult.getEquivalenceClassHistogramBucketsList()) { - System.out.printf("Bucket size range: [%d, %d]\n", - result.getEquivalenceClassSizeLowerBound(), - result.getEquivalenceClassSizeUpperBound()); + System.out.printf( + "Bucket size range: [%d, %d]\n", + result.getEquivalenceClassSizeLowerBound(), result.getEquivalenceClassSizeUpperBound()); for (KAnonymityEquivalenceClass bucket : result.getBucketValuesList()) { List quasiIdValues = @@ -514,24 +511,24 @@ private static void calculateLDiversity( // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.newBuilder() - .setProject(projectId) - .setSubscription(subscriptionId) - .build(), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) .build(); subscriber.startAsync(); // Wait for job completion semi-synchronously // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions - try{ + try { done.get(1, TimeUnit.MINUTES); Thread.sleep(500); // Wait for the job to become available } catch (TimeoutException e) { @@ -560,9 +557,9 @@ private static void calculateLDiversity( System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) { - System.out.printf("\t\tSensitive value %s occurs %d time(s).\n", - valueFrequency.getValue().toString(), - valueFrequency.getCount()); + System.out.printf( + "\t\tSensitive value %s occurs %d time(s).\n", + valueFrequency.getValue().toString(), valueFrequency.getCount()); } } } @@ -574,7 +571,8 @@ private static void calculateLDiversity( // [START dlp_k_map] /** - * Calculate k-map risk estimation for an attribute relative to quasi-identifiers in a BigQuery table. + * Calculate k-map risk estimation for an attribute relative to quasi-identifiers in a BigQuery + * table. * * @param projectId The Google Cloud Platform project ID to run the API call under. * @param datasetId The BigQuery dataset to analyze. @@ -610,10 +608,11 @@ private static void calculateKMap( ArrayList taggedFields = new ArrayList(); while (quasiIdsIterator.hasNext() || infoTypesIterator.hasNext()) { - taggedFields.add(TaggedField.newBuilder() - .setField(FieldId.newBuilder().setName(quasiIdsIterator.next()).build()) - .setInfoType(infoTypesIterator.next()) - .build()); + taggedFields.add( + TaggedField.newBuilder() + .setField(FieldId.newBuilder().setName(quasiIdsIterator.next()).build()) + .setInfoType(infoTypesIterator.next()) + .build()); } KMapEstimationConfig kmapConfig = @@ -660,24 +659,24 @@ private static void calculateKMap( // Set up a Pub/Sub subscriber to listen on the job completion status Subscriber subscriber = Subscriber.newBuilder( - ProjectSubscriptionName.newBuilder() - .setProject(projectId) - .setSubscription(subscriptionId) - .build(), - (pubsubMessage, ackReplyConsumer) -> { - if (pubsubMessage.getAttributesCount() > 0 - && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { - // notify job completion - done.set(true); - ackReplyConsumer.ack(); - } - }) + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) .build(); subscriber.startAsync(); // Wait for job completion semi-synchronously // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions - try{ + try { done.get(1, TimeUnit.MINUTES); Thread.sleep(500); // Wait for the job to become available } catch (TimeoutException e) { @@ -692,12 +691,10 @@ private static void calculateKMap( AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); KMapEstimationResult kmapResult = riskDetails.getKMapEstimationResult(); - for (KMapEstimationHistogramBucket result : - kmapResult.getKMapEstimationHistogramList()) { + for (KMapEstimationHistogramBucket result : kmapResult.getKMapEstimationHistogramList()) { - System.out.printf("\tAnonymity range: [%d, %d]\n", - result.getMinAnonymity(), - result.getMaxAnonymity()); + System.out.printf( + "\tAnonymity range: [%d, %d]\n", result.getMinAnonymity(), result.getMaxAnonymity()); System.out.printf("\tSize: %d\n", result.getBucketSize()); for (KMapEstimationQuasiIdValues valueBucket : result.getBucketValuesList()) { @@ -705,16 +702,16 @@ private static void calculateKMap( valueBucket .getQuasiIdsValuesList() .stream() - .map(v -> { - String s = v.toString(); - return s.substring(s.indexOf(':') + 1).trim(); - }) + .map( + v -> { + String s = v.toString(); + return s.substring(s.indexOf(':') + 1).trim(); + }) .collect(Collectors.joining(", ")); - System.out.printf("\tValues: {%s}\n", quasiIdValues); - System.out.printf("\tEstimated k-map anonymity: %d\n", - valueBucket.getEstimatedAnonymity()); + System.out.printf( + "\tEstimated k-map anonymity: %d\n", valueBucket.getEstimatedAnonymity()); } } } catch (Exception e) { @@ -773,8 +770,7 @@ public static void main(String[] args) throws Exception { Option.builder("sensitiveAttribute").hasArg(true).required(false).build(); commandLineOptions.addOption(sensitiveAttributeOption); - Option regionCodeOption = - Option.builder("regionCode").hasArg(true).required(false).build(); + Option regionCodeOption = Option.builder("regionCode").hasArg(true).required(false).build(); commandLineOptions.addOption(regionCodeOption); Option quasiIdColumnNamesOption = @@ -837,7 +833,15 @@ public static void main(String[] args) throws Exception { // k-map analysis List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); - calculateKMap(projectId, datasetId, tableId, quasiIdColumnNames, infoTypesList, regionCode, topicId, subscriptionId); + calculateKMap( + projectId, + datasetId, + tableId, + quasiIdColumnNames, + infoTypesList, + regionCode, + topicId, + subscriptionId); } else if (cmd.hasOption("l")) { // l-diversity analysis String sensitiveAttribute = cmd.getOptionValue(sensitiveAttributeOption.getOpt()); diff --git a/dlp/src/main/java/com/example/dlp/Templates.java b/dlp/src/main/java/com/example/dlp/Templates.java index cfdb1e339de..da4c5eb88c6 100644 --- a/dlp/src/main/java/com/example/dlp/Templates.java +++ b/dlp/src/main/java/com/example/dlp/Templates.java @@ -111,7 +111,8 @@ private static void listInspectTemplates(String projectId) { ListInspectTemplatesRequest request = ListInspectTemplatesRequest.newBuilder() .setParent(ProjectName.of(projectId).toString()) - .setPageSize(1).build(); + .setPageSize(1) + .build(); ListInspectTemplatesPagedResponse response = dlpServiceClient.listInspectTemplates(request); ListInspectTemplatesPage page = response.getPage(); @@ -167,7 +168,6 @@ private static void deleteInspectTemplate(String projectId, String templateId) { /** Command line application to create, list and delete DLP inspect templates. */ public static void main(String[] args) throws Exception { - OptionGroup optionsGroup = new OptionGroup(); optionsGroup.setRequired(true); @@ -245,8 +245,13 @@ public static void main(String[] args) throws Exception { } int maxFindings = Integer.valueOf(cmd.getOptionValue(maxFindingsOption.getOpt(), "0")); createInspectTemplate( - displayName, templateId, description, projectId, - infoTypesList, minLikelihood, maxFindings); + displayName, + templateId, + description, + projectId, + infoTypesList, + minLikelihood, + maxFindings); } else if (cmd.hasOption(listOption.getOpt())) { listInspectTemplates(projectId); diff --git a/dlp/src/main/java/com/example/dlp/Triggers.java b/dlp/src/main/java/com/example/dlp/Triggers.java index 107324602cb..c6e180d58dc 100644 --- a/dlp/src/main/java/com/example/dlp/Triggers.java +++ b/dlp/src/main/java/com/example/dlp/Triggers.java @@ -69,11 +69,12 @@ private static void createTrigger( List infoTypes, Likelihood minLikelihood, int maxFindings, - String projectId) throws Exception { + String projectId) + throws Exception { // instantiate a client DlpServiceClient dlpServiceClient = DlpServiceClient.create(); - try { + try { CloudStorageOptions cloudStorageOptions = CloudStorageOptions.newBuilder() @@ -141,7 +142,8 @@ private static void listTriggers(String projectId) { try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { ListJobTriggersRequest listJobTriggersRequest = ListJobTriggersRequest.newBuilder() - .setParent(ProjectName.of(projectId).toString()).build(); + .setParent(ProjectName.of(projectId).toString()) + .build(); DlpServiceClient.ListJobTriggersPagedResponse response = dlpServiceClient.listJobTriggers(listJobTriggersRequest); response @@ -169,7 +171,6 @@ private static void listTriggers(String projectId) { // [START dlp_delete_trigger] /** - * * Delete a DLP trigger in a project. * * @param projectId The project ID to run the API call under. diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index 2cae38da069..ec796c60bbb 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -25,15 +25,13 @@ import java.io.ByteArrayOutputStream; import java.io.PrintStream; import java.nio.file.Files; - -import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; +import java.nio.file.Paths; +import java.util.Arrays; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; -import java.nio.file.Paths; -import java.util.Arrays; @RunWith(JUnit4.class) // CHECKSTYLE OFF: AbbreviationAsWordInName @@ -79,27 +77,35 @@ public void testDeidReidFpe() throws Exception { String text = "My SSN is 372819127"; DeIdentification.main( new String[] { - "-f", "\"" + text + "\"", - "-wrappedKey", wrappedKey, - "-keyName", keyName, - "-commonAlphabet", "NUMERIC", - "-surrogateType", "SSN_TOKEN" + "-f", + "\"" + text + "\"", + "-wrappedKey", + wrappedKey, + "-keyName", + keyName, + "-commonAlphabet", + "NUMERIC", + "-surrogateType", + "SSN_TOKEN" }); String deidOutput = bout.toString(); - assertFalse( - "Response contains original SSN.", - deidOutput.contains("372819127")); + assertFalse("Response contains original SSN.", deidOutput.contains("372819127")); assertTrue(deidOutput.matches("My SSN is SSN_TOKEN\\(9\\):\\d+\n")); // Test ReID bout.flush(); DeIdentification.main( new String[] { - "-r", deidOutput.toString().trim(), - "-wrappedKey", wrappedKey, - "-keyName", keyName, - "-commonAlphabet", "NUMERIC", - "-surrogateType", "SSN_TOKEN" + "-r", + deidOutput.toString().trim(), + "-wrappedKey", + wrappedKey, + "-keyName", + keyName, + "-commonAlphabet", + "NUMERIC", + "-surrogateType", + "SSN_TOKEN" }); String reidOutput = bout.toString(); assertThat(reidOutput, containsString(text)); @@ -110,24 +116,30 @@ public void testDeidentifyWithDateShift() throws Exception { String outputPath = "src/test/resources/results.temp.csv"; DeIdentification.main( new String[] { - "-d", - "-inputCsvPath", "src/test/resources/dates.csv", - "-outputCsvPath", outputPath, - "-dateFields", "birth_date,register_date", - "-lowerBoundDays", "5", - "-upperBoundDays", "5", - "-contextField", "name", - "-wrappedKey", wrappedKey, - "-keyName", keyName + "-d", + "-inputCsvPath", + "src/test/resources/dates.csv", + "-outputCsvPath", + outputPath, + "-dateFields", + "birth_date,register_date", + "-lowerBoundDays", + "5", + "-upperBoundDays", + "5", + "-contextField", + "name", + "-wrappedKey", + wrappedKey, + "-keyName", + keyName }); String output = bout.toString(); - assertThat( - output, containsString("Successfully saved date-shift output to: results.temp.csv")); + assertThat(output, containsString("Successfully saved date-shift output to: results.temp.csv")); // Compare the result against an expected output file byte[] resultCsv = Files.readAllBytes(Paths.get(outputPath)); - byte[] correctCsv = Files.readAllBytes(Paths.get( - "src/test/resources/results.correct.csv")); + byte[] correctCsv = Files.readAllBytes(Paths.get("src/test/resources/results.correct.csv")); assertTrue(Arrays.equals(resultCsv, correctCsv)); } diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 7b07d1b6254..60f078dcb26 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -19,13 +19,11 @@ import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.io.PrintStream; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -59,10 +57,7 @@ public void setUp() { public void testStringInspectionReturnsInfoTypes() throws Exception { String text = "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; - Inspect.main(new String[] { - "-s", text, - "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" - }); + Inspect.main(new String[] {"-s", text, "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS"}); String output = bout.toString(); assertThat(output, containsString("PHONE_NUMBER")); @@ -71,10 +66,10 @@ public void testStringInspectionReturnsInfoTypes() throws Exception { @Test public void testTextFileInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[]{ - "-f", "src/test/resources/test.txt", - "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" - }); + Inspect.main( + new String[] { + "-f", "src/test/resources/test.txt", "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); assertThat(output, containsString("PHONE_NUMBER")); assertThat(output, containsString("EMAIL_ADDRESS")); @@ -82,10 +77,10 @@ public void testTextFileInspectionReturnsInfoTypes() throws Exception { @Test public void testImageFileInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[]{ - "-f", "src/test/resources/test.png", - "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" - }); + Inspect.main( + new String[] { + "-f", "src/test/resources/test.png", "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); assertThat(output, containsString("PHONE_NUMBER")); assertThat(output, containsString("EMAIL_ADDRESS")); @@ -94,14 +89,21 @@ public void testImageFileInspectionReturnsInfoTypes() throws Exception { // Requires that bucket by the specified name exists @Test public void testGcsFileInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] { - "-gcs", - "-bucketName", bucketName, - "-topicId", topicId, - "-subscriptionId", subscriptionId, - "-fileName", "test.txt", - "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" - }); + Inspect.main( + new String[] { + "-gcs", + "-bucketName", + bucketName, + "-topicId", + topicId, + "-subscriptionId", + subscriptionId, + "-fileName", + "test.txt", + "-infoTypes", + "PHONE_NUMBER", + "EMAIL_ADDRESS" + }); String output = bout.toString(); assertThat(output, containsString("PHONE_NUMBER")); assertThat(output, containsString("EMAIL_ADDRESS")); @@ -111,13 +113,19 @@ public void testGcsFileInspectionReturnsInfoTypes() throws Exception { // with phone number and email address properties. @Test public void testDatastoreInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] { - "-ds", - "-kind", datastoreKind, - "-topicId", topicId, - "-subscriptionId", subscriptionId, - "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" - }); + Inspect.main( + new String[] { + "-ds", + "-kind", + datastoreKind, + "-topicId", + topicId, + "-subscriptionId", + subscriptionId, + "-infoTypes", + "PHONE_NUMBER", + "EMAIL_ADDRESS" + }); String output = bout.toString(); assertThat(output, containsString("PHONE_NUMBER")); assertThat(output, containsString("EMAIL_ADDRESS")); @@ -125,14 +133,21 @@ public void testDatastoreInspectionReturnsInfoTypes() throws Exception { @Test public void testBigqueryInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] { - "-bq", - "-datasetId", "integration_tests_dlp", - "-topicId", topicId, - "-subscriptionId", subscriptionId, - "-tableId", "harmful", - "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" - }); + Inspect.main( + new String[] { + "-bq", + "-datasetId", + "integration_tests_dlp", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId, + "-tableId", + "harmful", + "-infoTypes", + "PHONE_NUMBER", + "EMAIL_ADDRESS" + }); String output = bout.toString(); assertThat(output, containsString("PHONE_NUMBER")); } diff --git a/dlp/src/test/java/com/example/dlp/JobsIT.java b/dlp/src/test/java/com/example/dlp/JobsIT.java index 14c336c2543..73ecff9d8f4 100644 --- a/dlp/src/test/java/com/example/dlp/JobsIT.java +++ b/dlp/src/test/java/com/example/dlp/JobsIT.java @@ -44,7 +44,6 @@ public class JobsIT { // Update to Google Cloud Storage path containing test.txt private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; - @Before public void setUp() { bout = new ByteArrayOutputStream(); @@ -59,37 +58,28 @@ public void tearDown() { bout.reset(); } - @Test public void testListJobs() throws Exception { - Jobs.main(new String[] { - "-l", - "-filter", "state=DONE" - }); + Jobs.main(new String[] {"-l", "-filter", "state=DONE"}); String output = bout.toString(); Matcher matcher = jobIdPattern.matcher(bout.toString()); - assertTrue("List must contain results.", matcher.find()); + assertTrue("List must contain results.", matcher.find()); } @Test public void testDeleteJobs() throws Exception { // Get a list of JobIds, and extract one to delete - Jobs.main(new String[] { "-l", "-filter", "state=DONE"}); + Jobs.main(new String[] {"-l", "-filter", "state=DONE"}); String jobList = bout.toString(); Matcher matcher = jobIdPattern.matcher(jobList); - assertTrue("List must contain results.", matcher.find()); + assertTrue("List must contain results.", matcher.find()); // Extract just the ID String jobId = matcher.group(0).split("/")[3]; bout.reset(); // Delete the Job - Jobs.main(new String[] { - "-d", - "-jobId", jobId - }); + Jobs.main(new String[] {"-d", "-jobId", jobId}); String output = bout.toString(); assertThat(output, containsString("Job deleted successfully.")); } - - } diff --git a/dlp/src/test/java/com/example/dlp/MetadataIT.java b/dlp/src/test/java/com/example/dlp/MetadataIT.java index fec3bfa8715..a4b968221c0 100644 --- a/dlp/src/test/java/com/example/dlp/MetadataIT.java +++ b/dlp/src/test/java/com/example/dlp/MetadataIT.java @@ -51,12 +51,12 @@ public void tearDown() { @Test public void testListInfoTypes() throws Exception { - Metadata.main(new String[] { - "-language", "en-US", - "-filter", "supported_by=INSPECT" - }); + Metadata.main( + new String[] { + "-language", "en-US", + "-filter", "supported_by=INSPECT" + }); String output = bout.toString(); assertTrue(output.contains("Name") && output.contains("Display name")); } - } diff --git a/dlp/src/test/java/com/example/dlp/RedactIT.java b/dlp/src/test/java/com/example/dlp/RedactIT.java index e82dcfc6163..5708fae6211 100644 --- a/dlp/src/test/java/com/example/dlp/RedactIT.java +++ b/dlp/src/test/java/com/example/dlp/RedactIT.java @@ -53,17 +53,15 @@ public void testRedactImage() throws Exception { // Restrict phone number, but not email Redact.main( new String[] { - "-f", "src/test/resources/test.png", - "-infoTypes", "PHONE_NUMBER", - "-o", outputFilePath + "-f", "src/test/resources/test.png", + "-infoTypes", "PHONE_NUMBER", + "-o", outputFilePath }); bout.reset(); // Verify that phone_number is missing but email is present - Inspect.main(new String[] { - "-f", outputFilePath, - "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" - }); + Inspect.main( + new String[] {"-f", outputFilePath, "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS"}); String output = bout.toString(); assertThat(output, not(containsString("PHONE_NUMBER"))); assertThat(output, containsString("EMAIL_ADDRESS")); diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index c75c5279be3..0dc18a87b47 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -26,7 +26,6 @@ import java.util.regex.Pattern; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -56,11 +55,17 @@ public void setUp() { public void testNumericalStats() throws Exception { RiskAnalysis.main( new String[] { - "-n", "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-columnName", "Age", - "-topicId", topicId, - "-subscriptionId", subscriptionId + "-n", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-columnName", + "Age", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId }); String output = bout.toString(); assertThat(output, containsString("Value at ")); @@ -71,11 +76,16 @@ public void testCategoricalStats() throws Exception { RiskAnalysis.main( new String[] { "-c", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-columnName", "Mystery", - "-topicId", topicId, - "-subscriptionId", subscriptionId + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-columnName", + "Mystery", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId }); String output = bout.toString(); @@ -85,14 +95,21 @@ public void testCategoricalStats() throws Exception { @Test public void testKAnonymity() throws Exception { - RiskAnalysis.main(new String[]{ - "-a", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-quasiIdColumnNames", "Age", "Mystery", - "-topicId", topicId, - "-subscriptionId", subscriptionId - }); + RiskAnalysis.main( + new String[] { + "-a", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-quasiIdColumnNames", + "Age", + "Mystery", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId + }); String output = bout.toString(); assertTrue(Pattern.compile("Bucket size range: \\[\\d, \\d\\]").matcher(output).find()); assertTrue(output.contains("Quasi-ID values: integer_value: 19")); @@ -104,12 +121,19 @@ public void testLDiversity() throws Exception { RiskAnalysis.main( new String[] { "-l", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-sensitiveAttribute", "Name", - "-quasiIdColumnNames", "Age", "Mystery", - "-topicId", topicId, - "-subscriptionId", subscriptionId + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-sensitiveAttribute", + "Name", + "-quasiIdColumnNames", + "Age", + "Mystery", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId }); String output = bout.toString(); assertTrue(output.contains("Quasi-ID values: integer_value: 19")); @@ -121,14 +145,23 @@ public void testLDiversity() throws Exception { public void testKMap() throws Exception { RiskAnalysis.main( new String[] { - "-m", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-topicId", topicId, - "-subscriptionId", subscriptionId, - "-regionCode", "US", - "-quasiIdColumnNames", "Age", "Gender", - "-infoTypes", "AGE", "GENDER" + "-m", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId, + "-regionCode", + "US", + "-quasiIdColumnNames", + "Age", + "Gender", + "-infoTypes", + "AGE", + "GENDER" }); String output = bout.toString(); diff --git a/dlp/src/test/java/com/example/dlp/TemplatesIT.java b/dlp/src/test/java/com/example/dlp/TemplatesIT.java index 3d0ef2356a9..11c3525d12d 100644 --- a/dlp/src/test/java/com/example/dlp/TemplatesIT.java +++ b/dlp/src/test/java/com/example/dlp/TemplatesIT.java @@ -23,6 +23,7 @@ import java.io.ByteArrayOutputStream; import java.io.PrintStream; +import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.junit.After; @@ -30,7 +31,6 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; -import java.util.UUID; @RunWith(JUnit4.class) // CHECKSTYLE OFF: AbbreviationAsWordInName @@ -56,21 +56,23 @@ public void tearDown() { @Test public void testCreateInspectTemplate() throws Exception { - Templates.main(new String[] { - "-c", - "-displayName", String.format("test-name-%s", UUID.randomUUID()), - "-templateId", String.format("template%s", UUID.randomUUID()), - "-description", String.format("description-%s", UUID.randomUUID()) - }); + Templates.main( + new String[] { + "-c", + "-displayName", + String.format("test-name-%s", UUID.randomUUID()), + "-templateId", + String.format("template%s", UUID.randomUUID()), + "-description", + String.format("description-%s", UUID.randomUUID()) + }); String output = bout.toString(); assertThat(output, containsString("Template created: ")); } @Test public void testListInspectemplate() throws Exception { - Templates.main(new String[] { - "-l" - }); + Templates.main(new String[] {"-l"}); String output = bout.toString(); assertThat(output, containsString("Template name:")); } @@ -78,18 +80,14 @@ public void testListInspectemplate() throws Exception { @Test public void testDeleteInspectTemplate() throws Exception { // Extract a Template ID - Templates.main(new String[] { "-l" }); + Templates.main(new String[] {"-l"}); String output = bout.toString(); Matcher templateIds = Pattern.compile("template(\\w|\\-)+").matcher(output); assertTrue(templateIds.find()); String templateId = templateIds.group(0); bout.reset(); - Templates.main(new String[] { - "-d", - "-templateId", templateId - }); + Templates.main(new String[] {"-d", "-templateId", templateId}); output = bout.toString(); assertThat(output, containsString("Deleted template:")); } - } diff --git a/dlp/src/test/java/com/example/dlp/TriggersIT.java b/dlp/src/test/java/com/example/dlp/TriggersIT.java index c3e9ef521a0..4a9b07c4063 100644 --- a/dlp/src/test/java/com/example/dlp/TriggersIT.java +++ b/dlp/src/test/java/com/example/dlp/TriggersIT.java @@ -61,42 +61,46 @@ public void tearDown() { @Test public void testCreateTrigger() throws Exception { - Triggers.main(new String[] { - "-c", - "-displayName", String.format("trigger-name-%s", UUID.randomUUID()), - "-triggerId", String.format("trigger%s", UUID.randomUUID()), - "-description", String.format("description-%s", UUID.randomUUID()), - "-bucketName", bucketName, - "-fileName", "test.txt", - "-scanPeriod", "1" - }); + Triggers.main( + new String[] { + "-c", + "-displayName", + String.format("trigger-name-%s", UUID.randomUUID()), + "-triggerId", + String.format("trigger%s", UUID.randomUUID()), + "-description", + String.format("description-%s", UUID.randomUUID()), + "-bucketName", + bucketName, + "-fileName", + "test.txt", + "-scanPeriod", + "1" + }); String output = bout.toString(); assertThat(output, containsString("Created Trigger:")); } @Test public void testListTrigger() throws Exception { - Triggers.main(new String[] { - "-l" - }); + Triggers.main(new String[] {"-l"}); String output = bout.toString(); assertThat(output, containsString("Trigger:")); } @Test public void testDeleteTrigger() throws Exception { - Triggers.main(new String[] { "-l" }); + Triggers.main(new String[] {"-l"}); String output = bout.toString(); Matcher templateIds = Pattern.compile("(?<=jobTriggers/)[0-9]+").matcher(output); assertTrue(templateIds.find()); String triggerId = templateIds.group(0); bout.reset(); - Triggers.main(new String[] { - "-d", - "-triggerId", triggerId, - }); + Triggers.main( + new String[] { + "-d", "-triggerId", triggerId, + }); output = bout.toString(); assertThat(output, containsString("Trigger deleted:")); } - }