docs: update docs (#916)
* fix: re-run script
* test: fix noxfile
diff --git a/docs/dyn/dlp_v2.projects.jobTriggers.html b/docs/dyn/dlp_v2.projects.jobTriggers.html
index ff14534..bcc3ecc 100644
--- a/docs/dyn/dlp_v2.projects.jobTriggers.html
+++ b/docs/dyn/dlp_v2.projects.jobTriggers.html
@@ -119,247 +119,1073 @@
An object of the form:
{ # Combines all of the information about a DLP job.
- "createTime": "A String", # Time when the job was created.
- "state": "A String", # State of a job.
- "riskDetails": { # Result of a risk analysis operation request. # Results from analyzing risk of a data source.
- "numericalStatsResult": { # Result of the numerical stats computation. # Numerical stats result
- "minValue": { # Set of primitive values supported by the system. # Minimum value appearing in the column.
- # Note that for the purposes of inspection or transformation, the number
- # of bytes considered to comprise a 'Value' is based on its representation
- # as a UTF-8 encoded string. For example, if 'integer_value' is set to
- # 123456789, the number of bytes would be counted as 9, even though an
- # int64 only holds up to 8 bytes of data.
- "booleanValue": True or False, # boolean
- "floatValue": 3.14, # float
- "dayOfWeekValue": "A String", # day of week
- "timestampValue": "A String", # timestamp
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
- # and time zone are either specified elsewhere or are not significant. The date
- # is relative to the Proleptic Gregorian Calendar. This can represent:
+ "type": "A String", # The type of job.
+ "endTime": "A String", # Time when the job finished.
+ "startTime": "A String", # Time when the job started.
+ "inspectDetails": { # The results of an inspect DataSource job. # Results from inspecting a data source.
+ "requestedOptions": { # Snapshot of the inspection configuration. # The configuration used for this job.
+ "snapshotInspectTemplate": { # The inspectTemplate contains a configuration (set of types of sensitive data # If run with an InspectTemplate, a snapshot of its state at the time of
+ # this run.
+ # to be detected) to be used anywhere you otherwise would normally specify
+ # InspectConfig. See https://cloud.google.com/dlp/docs/concepts-templates
+ # to learn more.
+ "name": "A String", # Output only. The template name.
#
- # * A full date, with non-zero year, month and day values
- # * A month and day value, with a zero year, e.g. an anniversary
- # * A year on its own, with zero month and day values
- # * A year and month value, with a zero day, e.g. a credit card expiration date
- #
- # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
- "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
- # if specifying a year by itself or a year and month where the day is not
- # significant.
- },
- "stringValue": "A String", # string
- "integerValue": "A String", # integer
- "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
- # or are specified elsewhere. An API may choose to allow leap seconds. Related
- # types are google.type.Date and `google.protobuf.Timestamp`.
- "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
- # allow the value 60 if it allows leap-seconds.
- "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
- "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
- # to allow the value "24:00:00" for scenarios like business closing time.
- "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
- },
- },
- "maxValue": { # Set of primitive values supported by the system. # Maximum value appearing in the column.
- # Note that for the purposes of inspection or transformation, the number
- # of bytes considered to comprise a 'Value' is based on its representation
- # as a UTF-8 encoded string. For example, if 'integer_value' is set to
- # 123456789, the number of bytes would be counted as 9, even though an
- # int64 only holds up to 8 bytes of data.
- "booleanValue": True or False, # boolean
- "floatValue": 3.14, # float
- "dayOfWeekValue": "A String", # day of week
- "timestampValue": "A String", # timestamp
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
- # and time zone are either specified elsewhere or are not significant. The date
- # is relative to the Proleptic Gregorian Calendar. This can represent:
- #
- # * A full date, with non-zero year, month and day values
- # * A month and day value, with a zero year, e.g. an anniversary
- # * A year on its own, with zero month and day values
- # * A year and month value, with a zero day, e.g. a credit card expiration date
- #
- # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
- "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
- # if specifying a year by itself or a year and month where the day is not
- # significant.
- },
- "stringValue": "A String", # string
- "integerValue": "A String", # integer
- "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
- # or are specified elsewhere. An API may choose to allow leap seconds. Related
- # types are google.type.Date and `google.protobuf.Timestamp`.
- "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
- # allow the value 60 if it allows leap-seconds.
- "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
- "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
- # to allow the value "24:00:00" for scenarios like business closing time.
- "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
- },
- },
- "quantileValues": [ # List of 99 values that partition the set of field values into 100 equal
- # sized buckets.
- { # Set of primitive values supported by the system.
- # Note that for the purposes of inspection or transformation, the number
- # of bytes considered to comprise a 'Value' is based on its representation
- # as a UTF-8 encoded string. For example, if 'integer_value' is set to
- # 123456789, the number of bytes would be counted as 9, even though an
- # int64 only holds up to 8 bytes of data.
- "booleanValue": True or False, # boolean
- "floatValue": 3.14, # float
- "dayOfWeekValue": "A String", # day of week
- "timestampValue": "A String", # timestamp
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
- # and time zone are either specified elsewhere or are not significant. The date
- # is relative to the Proleptic Gregorian Calendar. This can represent:
+ # The template will have one of the following formats:
+ # `projects/PROJECT_ID/inspectTemplates/TEMPLATE_ID` OR
+ # `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`;
+ "description": "A String", # Short description (max 256 chars).
+ "displayName": "A String", # Display name (max 256 chars).
+ "inspectConfig": { # Configuration description of the scanning process. # The core content of the template. Configuration of the scanning process.
+ # When used with redactContent only info_types and min_likelihood are currently
+ # used.
+ "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
+ # POSSIBLE.
+ # See https://cloud.google.com/dlp/docs/likelihood to learn more.
+ "contentOptions": [ # List of options defining data content to scan.
+ # If empty, text, images, and other content will be included.
+ "A String",
+ ],
+ "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
+ # InfoType values returned by ListInfoTypes or listed at
+ # https://cloud.google.com/dlp/docs/infotypes-reference.
#
- # * A full date, with non-zero year, month and day values
- # * A month and day value, with a zero year, e.g. an anniversary
- # * A year on its own, with zero month and day values
- # * A year and month value, with a zero day, e.g. a credit card expiration date
+ # When no InfoTypes or CustomInfoTypes are specified in a request, the
+ # system may automatically choose what detectors to run. By default this may
+ # be all types, but may change over time as detectors are updated.
#
- # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
- "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
- # if specifying a year by itself or a year and month where the day is not
- # significant.
- },
- "stringValue": "A String", # string
- "integerValue": "A String", # integer
- "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
- # or are specified elsewhere. An API may choose to allow leap seconds. Related
- # types are google.type.Date and `google.protobuf.Timestamp`.
- "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
- # allow the value 60 if it allows leap-seconds.
- "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
- "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
- # to allow the value "24:00:00" for scenarios like business closing time.
- "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
- },
- },
- ],
- },
- "lDiversityResult": { # Result of the l-diversity computation. # L-divesity result
- "sensitiveValueFrequencyHistogramBuckets": [ # Histogram of l-diversity equivalence class sensitive value frequencies.
- { # Histogram of l-diversity equivalence class sensitive value frequencies.
- "sensitiveValueFrequencyLowerBound": "A String", # Lower bound on the sensitive value frequencies of the equivalence
- # classes in this bucket.
- "sensitiveValueFrequencyUpperBound": "A String", # Upper bound on the sensitive value frequencies of the equivalence
- # classes in this bucket.
- "bucketSize": "A String", # Total number of equivalence classes in this bucket.
- "bucketValueCount": "A String", # Total number of distinct equivalence classes in this bucket.
- "bucketValues": [ # Sample of equivalence classes in this bucket. The total number of
- # classes returned per bucket is capped at 20.
- { # The set of columns' values that share the same ldiversity value.
- "quasiIdsValues": [ # Quasi-identifier values defining the k-anonymity equivalence
- # class. The order is always the same as the original request.
- { # Set of primitive values supported by the system.
- # Note that for the purposes of inspection or transformation, the number
- # of bytes considered to comprise a 'Value' is based on its representation
- # as a UTF-8 encoded string. For example, if 'integer_value' is set to
- # 123456789, the number of bytes would be counted as 9, even though an
- # int64 only holds up to 8 bytes of data.
- "booleanValue": True or False, # boolean
- "floatValue": 3.14, # float
- "dayOfWeekValue": "A String", # day of week
- "timestampValue": "A String", # timestamp
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
- # and time zone are either specified elsewhere or are not significant. The date
- # is relative to the Proleptic Gregorian Calendar. This can represent:
- #
- # * A full date, with non-zero year, month and day values
- # * A month and day value, with a zero year, e.g. an anniversary
- # * A year on its own, with zero month and day values
- # * A year and month value, with a zero day, e.g. a credit card expiration date
- #
- # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
- "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
- # if specifying a year by itself or a year and month where the day is not
- # significant.
- },
- "stringValue": "A String", # string
- "integerValue": "A String", # integer
- "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
- # or are specified elsewhere. An API may choose to allow leap seconds. Related
- # types are google.type.Date and `google.protobuf.Timestamp`.
- "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
- # allow the value 60 if it allows leap-seconds.
- "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
- "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
- # to allow the value "24:00:00" for scenarios like business closing time.
- "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
+ "customInfoTypes": [ # CustomInfoTypes provided by the user. See
+ # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
+ { # Custom information type provided by the user. Used to find domain-specific
+ # sensitive information configurable to the data in question.
+ "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
+ # support reversing.
+ # such as
+ # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
+ # These types of transformations are
+ # those that perform pseudonymization, thereby producing a "surrogate" as
+ # output. This should be used in conjunction with a field on the
+ # transformation such as `surrogate_info_type`. This CustomInfoType does
+ # not support the use of `detection_rules`.
+ },
+ "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
+ # altered by a detection rule if the finding meets the criteria specified by
+ # the rule. Defaults to `VERY_LIKELY` if not specified.
+ "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
+ # infoType, when the name matches one of existing infoTypes and that infoType
+ # is specified in `InspectContent.info_types` field. Specifying the latter
+ # adds findings to the one detected by the system. If built-in info type is
+ # not specified in `InspectContent.info_types` list then the name is treated
+ # as a custom info type.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
+ # Rules are applied in order that they are specified. Not supported for the
+ # `surrogate_type` CustomInfoType.
+ { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
+ # `CustomInfoType` to alter behavior under certain circumstances, depending
+ # on the specific details of the rule. Not supported for the `surrogate_type`
+ # custom infoType.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
+ },
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
+ },
},
},
],
- "equivalenceClassSize": "A String", # Size of the k-anonymity equivalence class.
- "numDistinctSensitiveValues": "A String", # Number of distinct sensitive values in this equivalence class.
- "topSensitiveValues": [ # Estimated frequencies of top sensitive values.
- { # A value of a field, including its frequency.
- "value": { # Set of primitive values supported by the system. # A value contained in the field in question.
- # Note that for the purposes of inspection or transformation, the number
- # of bytes considered to comprise a 'Value' is based on its representation
- # as a UTF-8 encoded string. For example, if 'integer_value' is set to
- # 123456789, the number of bytes would be counted as 9, even though an
- # int64 only holds up to 8 bytes of data.
- "booleanValue": True or False, # boolean
- "floatValue": 3.14, # float
- "dayOfWeekValue": "A String", # day of week
- "timestampValue": "A String", # timestamp
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
- # and time zone are either specified elsewhere or are not significant. The date
- # is relative to the Proleptic Gregorian Calendar. This can represent:
- #
- # * A full date, with non-zero year, month and day values
- # * A month and day value, with a zero year, e.g. an anniversary
- # * A year on its own, with zero month and day values
- # * A year and month value, with a zero day, e.g. a credit card expiration date
- #
- # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
- "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
- # if specifying a year by itself or a year and month where the day is not
- # significant.
+ "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
+ # to be returned. It still can be used for rules matching.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
+ # `InspectDataSource`. Not currently supported in `InspectContent`.
+ "name": "A String", # Resource name of the requested `StoredInfoType`, for example
+ # `organizations/433245324/storedInfoTypes/432452342` or
+ # `projects/project-id/storedInfoTypes/432452342`.
+ "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
+ # inspection was created. Output-only field, populated by the system.
+ },
+ },
+ ],
+ "includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
+ # included in the response; see Finding.quote.
+ "ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
+ # Exclusion rules, contained in the set are executed in the end, other
+ # rules are executed in the order they are specified for each info type.
+ { # Rule set for modifying a set of infoTypes to alter behavior under certain
+ # circumstances, depending on the specific details of the rules within the set.
+ "infoTypes": [ # List of infoTypes this rule set is applied to.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "rules": [ # Set of rules to be applied to infoTypes. The rules are applied in order.
+ { # A single inspection rule to be applied to infoTypes, specified in
+ # `InspectionRuleSet`.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
},
- "stringValue": "A String", # string
- "integerValue": "A String", # integer
- "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
- # or are specified elsewhere. An API may choose to allow leap seconds. Related
- # types are google.type.Date and `google.protobuf.Timestamp`.
- "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
- # allow the value 60 if it allows leap-seconds.
- "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
- "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
- # to allow the value "24:00:00" for scenarios like business closing time.
- "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
},
- "count": "A String", # How many times the value is contained in the field.
+ "exclusionRule": { # The rule that specifies conditions when findings of infoTypes specified in # Exclusion rule.
+ # `InspectionRuleSet` are removed from results.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # Dictionary which defines the rule.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression which defines the rule.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "excludeInfoTypes": { # List of exclude infoTypes. # Set of infoTypes for which findings would affect this rule.
+ "infoTypes": [ # InfoType list in ExclusionRule rule drops a finding when it overlaps or
+ # contained within with a finding of an infoType from this list. For
+ # example, for `InspectionRuleSet.info_types` containing "PHONE_NUMBER"` and
+ # `exclusion_rule` containing `exclude_info_types.info_types` with
+ # "EMAIL_ADDRESS" the phone number findings are dropped if they overlap
+ # with EMAIL_ADDRESS finding.
+ # That leads to "555-222-2222@example.org" to generate only a single
+ # finding, namely email address.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ },
+ "matchingType": "A String", # How the rule is applied, see MatchingType documentation for details.
+ },
},
],
},
],
+ "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
+ "maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
+ # When set within `InspectJobConfig`,
+ # the maximum returned is 2000 regardless if this is set higher.
+ # When set within `InspectContentRequest`, this field is ignored.
+ "maxFindingsPerInfoType": [ # Configuration of findings limit given for specified infoTypes.
+ { # Max findings configuration per infoType, per content item or long
+ # running DlpJob.
+ "maxFindings": 42, # Max findings limit for the given infoType.
+ "infoType": { # Type of information detected by the API. # Type of information the findings limit applies to. Only one limit per
+ # info_type should be provided. If InfoTypeLimit does not have an
+ # info_type, the DLP API applies the limit against all info_types that
+ # are found but not specified in another InfoTypeLimit.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ },
+ ],
+ "maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
+ # When set within `InspectContentRequest`, the maximum returned is 2000
+ # regardless if this is set higher.
+ },
+ },
+ "createTime": "A String", # Output only. The creation timestamp of an inspectTemplate.
+ "updateTime": "A String", # Output only. The last update timestamp of an inspectTemplate.
+ },
+ "jobConfig": { # Controls what and how to inspect for findings. # Inspect config.
+ "inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
+ # When used with redactContent only info_types and min_likelihood are currently
+ # used.
+ "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
+ # POSSIBLE.
+ # See https://cloud.google.com/dlp/docs/likelihood to learn more.
+ "contentOptions": [ # List of options defining data content to scan.
+ # If empty, text, images, and other content will be included.
+ "A String",
+ ],
+ "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
+ # InfoType values returned by ListInfoTypes or listed at
+ # https://cloud.google.com/dlp/docs/infotypes-reference.
+ #
+ # When no InfoTypes or CustomInfoTypes are specified in a request, the
+ # system may automatically choose what detectors to run. By default this may
+ # be all types, but may change over time as detectors are updated.
+ #
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
+ "customInfoTypes": [ # CustomInfoTypes provided by the user. See
+ # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
+ { # Custom information type provided by the user. Used to find domain-specific
+ # sensitive information configurable to the data in question.
+ "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
+ # support reversing.
+ # such as
+ # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
+ # These types of transformations are
+ # those that perform pseudonymization, thereby producing a "surrogate" as
+ # output. This should be used in conjunction with a field on the
+ # transformation such as `surrogate_info_type`. This CustomInfoType does
+ # not support the use of `detection_rules`.
+ },
+ "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
+ # altered by a detection rule if the finding meets the criteria specified by
+ # the rule. Defaults to `VERY_LIKELY` if not specified.
+ "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
+ # infoType, when the name matches one of existing infoTypes and that infoType
+ # is specified in `InspectContent.info_types` field. Specifying the latter
+ # adds findings to the one detected by the system. If built-in info type is
+ # not specified in `InspectContent.info_types` list then the name is treated
+ # as a custom info type.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
+ # Rules are applied in order that they are specified. Not supported for the
+ # `surrogate_type` CustomInfoType.
+ { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
+ # `CustomInfoType` to alter behavior under certain circumstances, depending
+ # on the specific details of the rule. Not supported for the `surrogate_type`
+ # custom infoType.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
+ },
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
+ },
+ },
+ },
+ ],
+ "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
+ # to be returned. It still can be used for rules matching.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
+ # `InspectDataSource`. Not currently supported in `InspectContent`.
+ "name": "A String", # Resource name of the requested `StoredInfoType`, for example
+ # `organizations/433245324/storedInfoTypes/432452342` or
+ # `projects/project-id/storedInfoTypes/432452342`.
+ "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
+ # inspection was created. Output-only field, populated by the system.
+ },
+ },
+ ],
+ "includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
+ # included in the response; see Finding.quote.
+ "ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
+ # Exclusion rules, contained in the set are executed in the end, other
+ # rules are executed in the order they are specified for each info type.
+ { # Rule set for modifying a set of infoTypes to alter behavior under certain
+ # circumstances, depending on the specific details of the rules within the set.
+ "infoTypes": [ # List of infoTypes this rule set is applied to.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "rules": [ # Set of rules to be applied to infoTypes. The rules are applied in order.
+ { # A single inspection rule to be applied to infoTypes, specified in
+ # `InspectionRuleSet`.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
+ },
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
+ },
+ },
+ "exclusionRule": { # The rule that specifies conditions when findings of infoTypes specified in # Exclusion rule.
+ # `InspectionRuleSet` are removed from results.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # Dictionary which defines the rule.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression which defines the rule.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "excludeInfoTypes": { # List of exclude infoTypes. # Set of infoTypes for which findings would affect this rule.
+ "infoTypes": [ # InfoType list in ExclusionRule rule drops a finding when it overlaps or
+ # contained within with a finding of an infoType from this list. For
+ # example, for `InspectionRuleSet.info_types` containing "PHONE_NUMBER"` and
+ # `exclusion_rule` containing `exclude_info_types.info_types` with
+ # "EMAIL_ADDRESS" the phone number findings are dropped if they overlap
+ # with EMAIL_ADDRESS finding.
+ # That leads to "555-222-2222@example.org" to generate only a single
+ # finding, namely email address.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ },
+ "matchingType": "A String", # How the rule is applied, see MatchingType documentation for details.
+ },
+ },
+ ],
+ },
+ ],
+ "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
+ "maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
+ # When set within `InspectJobConfig`,
+ # the maximum returned is 2000 regardless if this is set higher.
+ # When set within `InspectContentRequest`, this field is ignored.
+ "maxFindingsPerInfoType": [ # Configuration of findings limit given for specified infoTypes.
+ { # Max findings configuration per infoType, per content item or long
+ # running DlpJob.
+ "maxFindings": 42, # Max findings limit for the given infoType.
+ "infoType": { # Type of information detected by the API. # Type of information the findings limit applies to. Only one limit per
+ # info_type should be provided. If InfoTypeLimit does not have an
+ # info_type, the DLP API applies the limit against all info_types that
+ # are found but not specified in another InfoTypeLimit.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ },
+ ],
+ "maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
+ # When set within `InspectContentRequest`, the maximum returned is 2000
+ # regardless if this is set higher.
+ },
+ },
+ "actions": [ # Actions to execute at the completion of the job.
+ { # A task to execute on the completion of a job.
+ # See https://cloud.google.com/dlp/docs/concepts-actions to learn more.
+ "publishSummaryToCscc": { # Publish the result summary of a DlpJob to the Cloud Security # Publish summary to Cloud Security Command Center (Alpha).
+ # Command Center (CSCC Alpha).
+ # This action is only available for projects which are parts of
+ # an organization and whitelisted for the alpha Cloud Security Command
+ # Center.
+ # The action will publish count of finding instances and their info types.
+ # The summary of findings will be persisted in CSCC and are governed by CSCC
+ # service-specific policy, see https://cloud.google.com/terms/service-terms
+ # Only a single instance of this action can be specified.
+ # Compatible with: Inspect
+ },
+ "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification for project owners and editors on job's
+ # completion/failure.
+ # completion/failure.
+ },
+ "saveFindings": { # If set, the detailed findings will be persisted to the specified # Save resulting findings in a provided location.
+ # OutputStorageConfig. Only a single instance of this action can be
+ # specified.
+ # Compatible with: Inspect, Risk
+ "outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
+ # dataset. If table_id is not set a new one will be generated
+ # for you with the following format:
+ # dlp_googleapis_yyyy_mm_dd_[dlp_job_id]. Pacific timezone will be used for
+ # generating the date details.
+ #
+ # For Inspect, each column in an existing output table must have the same
+ # name, type, and mode of a field in the `Finding` object.
+ #
+ # For Risk, an existing output table should be the output of a previous
+ # Risk analysis job run on the same source table, with the same privacy
+ # metric and quasi-identifiers. Risk jobs that analyze the same table but
+ # compute a different privacy metric, or use different sets of
+ # quasi-identifiers, cannot store their results in the same table.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "datasetId": "A String", # Dataset ID of the table.
+ "tableId": "A String", # Name of the table.
+ },
+ "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
+ # used for Inspect and must be unspecified for Risk jobs. Columns are derived
+ # from the `Finding` object. If appending to an existing table, any columns
+ # from the predefined schema that are missing will be added. No columns in
+ # the existing table will be deleted.
+ #
+ # If unspecified, then all available columns will be used for a new table or
+ # an (existing) table with no schema, and no changes will be made to an
+ # existing table that has a schema.
+ # Only for use with external storage.
+ },
+ },
+ "pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
+ # message contains a single field, `DlpJobName`, which is equal to the
+ # finished job's
+ # [`DlpJob.name`](/dlp/docs/reference/rest/v2/projects.dlpJobs#DlpJob).
+ # Compatible with: Inspect, Risk
+ "topic": "A String", # Cloud Pub/Sub topic to send notifications to. The topic must have given
+ # publishing access rights to the DLP API service account executing
+ # the long running DlpJob sending the notifications.
+ # Format is projects/{project}/topics/{topic}.
+ },
+ "publishFindingsToCloudDataCatalog": { # Publish findings of a DlpJob to Cloud Data Catalog. Labels summarizing the # Publish findings to Cloud Datahub.
+ # results of the DlpJob will be applied to the entry for the resource scanned
+ # in Cloud Data Catalog. Any labels previously written by another DlpJob will
+ # be deleted. InfoType naming patterns are strictly enforced when using this
+ # feature. Note that the findings will be persisted in Cloud Data Catalog
+ # storage and are governed by Data Catalog service-specific policy, see
+ # https://cloud.google.com/terms/service-terms
+ # Only a single instance of this action can be specified and only allowed if
+ # all resources being scanned are BigQuery tables.
+ # Compatible with: Inspect
+ },
+ "publishToStackdriver": { # Enable Stackdriver metric dlp.googleapis.com/finding_count. This # Enable Stackdriver metric dlp.googleapis.com/finding_count.
+ # will publish a metric to stack driver on each infotype requested and
+ # how many findings were found for it. CustomDetectors will be bucketed
+ # as 'Custom' under the Stackdriver label 'info_type'.
+ },
+ },
+ ],
+ "storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
+ "hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
+ # Early access feature is in a pre-release state and might change or have
+ # limited support. For more information, see
+ # https://cloud.google.com/products#product-launch-stages.
+ # of Google Cloud Platform.
+ "labels": { # To organize findings, these labels will be added to each finding.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # Label values must be between 0 and 63 characters long and must conform
+ # to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
+ #
+ # No more than 10 labels can be associated with a given finding.
+ #
+ # Examples:
+ # * `"environment" : "production"`
+ # * `"pipeline" : "etl"`
+ "a_key": "A String",
+ },
+ "description": "A String", # A short description of where the data is coming from. Will be stored once
+ # in the job. 256 max length.
+ "tableOptions": { # Instructions regarding the table content being inspected. # If the container is a table, additional information to make findings
+ # meaningful such as the columns that are primary keys.
+ "identifyingFields": [ # The columns that are the primary keys for table objects included in
+ # ContentItem. A copy of this cell's value will stored alongside alongside
+ # each finding so that the finding can be traced to the specific row it came
+ # from. No more than 3 may be provided.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ },
+ "requiredFindingLabelKeys": [ # These are labels that each inspection request must include within their
+ # 'finding_labels' map. Request may contain others, but any missing one of
+ # these will be rejected.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # No more than 10 keys can be required.
+ "A String",
+ ],
+ },
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ },
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
+ # bucket.
+ "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
+ # than this value then the rest of the bytes are omitted. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ "fileSet": { # Set of files to scan. # The set of one or more files to scan.
+ "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
+ # `regex_file_set` must be set.
+ # expressions are used to allow fine-grained control over which files in the
+ # bucket to include.
+ #
+ # Included files are those that match at least one item in `include_regex` and
+ # do not match any items in `exclude_regex`. Note that a file that matches
+ # items from both lists will _not_ be included. For a match to occur, the
+ # entire file path (i.e., everything in the url after the bucket name) must
+ # match the regular expression.
+ #
+ # For example, given the input `{bucket_name: "mybucket", include_regex:
+ # ["directory1/.*"], exclude_regex:
+ # ["directory1/excluded.*"]}`:
+ #
+ # * `gs://mybucket/directory1/myfile` will be included
+ # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
+ # across `/`)
+ # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
+ # full path doesn't match any items in `include_regex`)
+ # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
+ # matches an item in `exclude_regex`)
+ #
+ # If `include_regex` is left empty, it will match all files by default
+ # (this is equivalent to setting `include_regex: [".*"]`).
+ #
+ # Some other common use cases:
+ #
+ # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
+ # files in `mybucket` except for .pdf files
+ # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
+ # include all files directly under `gs://mybucket/directory/`, without matching
+ # across `/`
+ "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
+ "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # included in the set of files, except for those that also match an item in
+ # `exclude_regex`. Leaving this field empty will match all files by default
+ # (this is equivalent to including `.*` in the list).
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # excluded from the scan.
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ },
+ "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ #
+ # If the url ends in a trailing slash, the bucket or directory represented
+ # by the url will be scanned non-recursively (content in sub-directories
+ # will not be scanned). This means that `gs://mybucket/` is equivalent to
+ # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
+ # `gs://mybucket/directory/*`.
+ #
+ # Exactly one of `url` or `regex_file_set` must be set.
+ },
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
+ "fileTypes": [ # List of file type groups to include in the scan.
+ # If empty, all files are scanned and available data format processors
+ # are applied. In addition, the binary content of the selected files
+ # is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
+ "A String",
+ ],
+ "sampleMethod": "A String",
+ "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
+ # number of bytes scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "datasetId": "A String", # Dataset ID of the table.
+ "tableId": "A String", # Name of the table.
+ },
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "sampleMethod": "A String",
+ },
+ },
+ "inspectTemplateName": "A String", # If provided, will be used as the default for all values in InspectConfig.
+ # `inspect_config` will be merged into the values persisted as part of the
+ # template.
+ },
+ },
+ "result": { # All result fields mentioned below are updated while the job is processing. # A summary of the outcome of this inspect job.
+ "processedBytes": "A String", # Total size in bytes that were processed.
+ "hybridStats": { # Statistics related to processing hybrid inspect requests. # Statistics related to the processing of hybrid inspect.
+ # Early access feature is in a pre-release state and might change or have
+ # limited support. For more information, see
+ # https://cloud.google.com/products#product-launch-stages.
+ "pendingCount": "A String", # The number of hybrid requests currently being processed. Only populated
+ # when called via method `getDlpJob`.
+ # A burst of traffic may cause hybrid inspect requests to be enqueued.
+ # Processing will take place as quickly as possible, but resource limitations
+ # may impact how long a request is enqueued for.
+ "processedCount": "A String", # The number of hybrid inspection requests processed within this job.
+ "abortedCount": "A String", # The number of hybrid inspection requests aborted because the job ran
+ # out of quota or was ended before they could be processed.
+ },
+ "infoTypeStats": [ # Statistics of how many instances of each info type were found during
+ # inspect job.
+ { # Statistics regarding a specific InfoType.
+ "infoType": { # Type of information detected by the API. # The type of finding this stat is for.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "count": "A String", # Number of findings for this infoType.
},
],
+ "totalEstimatedBytes": "A String", # Estimate of the number of bytes to process.
},
+ },
+ "createTime": "A String", # Time when the job was created.
+ "state": "A String", # State of a job.
+ "jobTriggerName": "A String", # If created by a job trigger, the resource name of the trigger that
+ # instantiated the job.
+ "riskDetails": { # Result of a risk analysis operation request. # Results from analyzing risk of a data source.
"requestedPrivacyMetric": { # Privacy metric to compute for reidentification risk analysis. # Privacy metric to compute.
+ "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to # delta-presence
+ # figure out that one given individual appears in a de-identified dataset.
+ # Similarly to the k-map metric, we cannot compute δ-presence exactly without
+ # knowing the attack dataset, so we use a statistical model instead.
+ "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
+ # Set if no column is tagged with a region-specific InfoType (like
+ # US_ZIP_5) or a region code.
+ "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
+ # used to tag a quasi-identifiers field must appear in exactly one
+ # field of one auxiliary table.
+ { # An auxiliary table containing statistical information on the relative
+ # frequency of different quasi-identifiers values. It has one or several
+ # quasi-identifiers columns, and one column that indicates the relative
+ # frequency of each quasi-identifier tuple.
+ # If a tuple is present in the data but not in the auxiliary table, the
+ # corresponding relative frequency is assumed to be zero (and thus, the
+ # tuple is highly reidentifiable).
+ "quasiIds": [ # Required. Quasi-identifier columns.
+ { # A quasi-identifier column has a custom_tag, used to know which column
+ # in the data corresponds to which column in the statistical model.
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ "field": { # General identifier of a data field in a storage service. # Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ },
+ ],
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "datasetId": "A String", # Dataset ID of the table.
+ "tableId": "A String", # Name of the table.
+ },
+ "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
+ # between 0 and 1 (inclusive). Null values are assumed to be zero.
+ "name": "A String", # Name describing the field.
+ },
+ },
+ ],
+ "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two fields can have the
+ # same tag.
+ { # A column with a semantic tag attached.
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
+ # dataset as a statistical model of population, if available. We
+ # currently support US ZIP codes, region codes, ages and genders.
+ # To programmatically obtain the list of supported InfoTypes, use
+ # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
+ # the distribution of values in the input data
+ # empty messages in your APIs. A typical example is to use it as the request
+ # or the response type of an API method. For instance:
+ #
+ # service Foo {
+ # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
+ # }
+ #
+ # The JSON representation for `Empty` is empty JSON object `{}`.
+ },
+ },
+ ],
+ },
"categoricalStatsConfig": { # Compute numerical stats over an individual column, including # Categorical stats
# number of distinct values and value count distribution.
"field": { # General identifier of a data field in a storage service. # Field to compute categorical stats on. All column types are
@@ -370,15 +1196,6 @@
},
},
"kAnonymityConfig": { # k-anonymity metric, used for analysis of reidentification risk. # K-anonymity
- "quasiIds": [ # Set of fields to compute k-anonymity over. When multiple fields are
- # specified, they are considered a single composite key. Structs and
- # repeated data types are not supported; however, nested fields are
- # supported so long as they are not structs themselves or nested within
- # a repeated field.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
"entityId": { # An entity in a dataset is a field or set of fields that correspond to a # Message indicating that multiple rows might be associated to a
# single individual. If the same entity_id is associated to multiple
# quasi-identifier tuples over distinct rows, we consider the entire
@@ -397,6 +1214,15 @@
"name": "A String", # Name describing the field.
},
},
+ "quasiIds": [ # Set of fields to compute k-anonymity over. When multiple fields are
+ # specified, they are considered a single composite key. Structs and
+ # repeated data types are not supported; however, nested fields are
+ # supported so long as they are not structs themselves or nested within
+ # a repeated field.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
},
"numericalStatsConfig": { # Compute numerical stats over an individual column, including # Numerical stats
# min, max, and quantiles.
@@ -452,20 +1278,6 @@
"quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two columns can have the
# same tag.
{ # A column with a semantic tag attached.
- "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
- "name": "A String", # Name describing the field.
- },
- "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
- # dataset as a statistical model of population, if available. We
- # currently support US ZIP codes, region codes, ages and genders.
- # To programmatically obtain the list of supported InfoTypes, use
- # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
"customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
# indicate an auxiliary table that contains statistical information on
# the possible values of this column (below).
@@ -480,10 +1292,27 @@
#
# The JSON representation for `Empty` is empty JSON object `{}`.
},
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
+ # dataset as a statistical model of population, if available. We
+ # currently support US ZIP codes, region codes, ages and genders.
+ # To programmatically obtain the list of supported InfoTypes, use
+ # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
},
],
},
"lDiversityConfig": { # l-diversity metric, used for analysis of reidentification risk. # l-diversity
+ "sensitiveAttribute": { # General identifier of a data field in a storage service. # Sensitive field for computing the l-value.
+ "name": "A String", # Name describing the field.
+ },
"quasiIds": [ # Set of quasi-identifiers indicating how equivalence classes are
# defined for the l-diversity computation. When multiple fields are
# specified, they are considered a single composite key.
@@ -491,104 +1320,25 @@
"name": "A String", # Name describing the field.
},
],
- "sensitiveAttribute": { # General identifier of a data field in a storage service. # Sensitive field for computing the l-value.
- "name": "A String", # Name describing the field.
- },
- },
- "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to # delta-presence
- # figure out that one given individual appears in a de-identified dataset.
- # Similarly to the k-map metric, we cannot compute δ-presence exactly without
- # knowing the attack dataset, so we use a statistical model instead.
- "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two fields can have the
- # same tag.
- { # A column with a semantic tag attached.
- "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
- "name": "A String", # Name describing the field.
- },
- "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
- # dataset as a statistical model of population, if available. We
- # currently support US ZIP codes, region codes, ages and genders.
- # To programmatically obtain the list of supported InfoTypes, use
- # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
- # indicate an auxiliary table that contains statistical information on
- # the possible values of this column (below).
- "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
- # the distribution of values in the input data
- # empty messages in your APIs. A typical example is to use it as the request
- # or the response type of an API method. For instance:
- #
- # service Foo {
- # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
- # }
- #
- # The JSON representation for `Empty` is empty JSON object `{}`.
- },
- },
- ],
- "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
- # Set if no column is tagged with a region-specific InfoType (like
- # US_ZIP_5) or a region code.
- "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
- # used to tag a quasi-identifiers field must appear in exactly one
- # field of one auxiliary table.
- { # An auxiliary table containing statistical information on the relative
- # frequency of different quasi-identifiers values. It has one or several
- # quasi-identifiers columns, and one column that indicates the relative
- # frequency of each quasi-identifier tuple.
- # If a tuple is present in the data but not in the auxiliary table, the
- # corresponding relative frequency is assumed to be zero (and thus, the
- # tuple is highly reidentifiable).
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "datasetId": "A String", # Dataset ID of the table.
- "tableId": "A String", # Name of the table.
- },
- "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
- # between 0 and 1 (inclusive). Null values are assumed to be zero.
- "name": "A String", # Name describing the field.
- },
- "quasiIds": [ # Required. Quasi-identifier columns.
- { # A quasi-identifier column has a custom_tag, used to know which column
- # in the data corresponds to which column in the statistical model.
- "field": { # General identifier of a data field in a storage service. # Identifies the column.
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
- # indicate an auxiliary table that contains statistical information on
- # the possible values of this column (below).
- },
- ],
- },
- ],
},
},
"categoricalStatsResult": { # Result of the categorical stats computation. # Categorical stats result
"valueFrequencyHistogramBuckets": [ # Histogram of value frequencies in the column.
{ # Histogram of value frequencies in the column.
+ "valueFrequencyLowerBound": "A String", # Lower bound on the value frequency of the values in this bucket.
+ "valueFrequencyUpperBound": "A String", # Upper bound on the value frequency of the values in this bucket.
+ "bucketSize": "A String", # Total number of values in this bucket.
+ "bucketValueCount": "A String", # Total number of distinct values in this bucket.
"bucketValues": [ # Sample of value frequencies in this bucket. The total number of
# values returned per bucket is capped at 20.
{ # A value of a field, including its frequency.
+ "count": "A String", # How many times the value is contained in the field.
"value": { # Set of primitive values supported by the system. # A value contained in the field in question.
# Note that for the purposes of inspection or transformation, the number
# of bytes considered to comprise a 'Value' is based on its representation
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "booleanValue": True or False, # boolean
- "floatValue": 3.14, # float
- "dayOfWeekValue": "A String", # day of week
"timestampValue": "A String", # timestamp
"dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
@@ -613,21 +1363,19 @@
"timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
- "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
- # allow the value 60 if it allows leap-seconds.
- "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
# to allow the value "24:00:00" for scenarios like business closing time.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
+ # allow the value 60 if it allows leap-seconds.
+ "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
},
+ "booleanValue": True or False, # boolean
+ "floatValue": 3.14, # float
+ "dayOfWeekValue": "A String", # day of week
},
- "count": "A String", # How many times the value is contained in the field.
},
],
- "valueFrequencyLowerBound": "A String", # Lower bound on the value frequency of the values in this bucket.
- "valueFrequencyUpperBound": "A String", # Upper bound on the value frequency of the values in this bucket.
- "bucketSize": "A String", # Total number of values in this bucket.
- "bucketValueCount": "A String", # Total number of distinct values in this bucket.
},
],
},
@@ -649,14 +1397,10 @@
# to 3, 4 or 5 people in the overlying population. An important particular
# case is when min_anonymity = max_anonymity = 1: the frequency field then
# corresponds to the number of uniquely identifiable records.
- "bucketSize": "A String", # Number of records within these anonymity bounds.
- "bucketValueCount": "A String", # Total number of distinct quasi-identifier tuple values in this bucket.
- "maxAnonymity": "A String", # Always greater than or equal to min_anonymity.
"minAnonymity": "A String", # Always positive.
"bucketValues": [ # Sample of quasi-identifier tuple values in this bucket. The total
# number of classes returned per bucket is capped at 20.
{ # A tuple of values for the quasi-identifier columns.
- "estimatedAnonymity": "A String", # The estimated anonymity for these quasi-identifier values.
"quasiIdsValues": [ # The quasi-identifier values.
{ # Set of primitive values supported by the system.
# Note that for the purposes of inspection or transformation, the number
@@ -664,9 +1408,6 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "booleanValue": True or False, # boolean
- "floatValue": 3.14, # float
- "dayOfWeekValue": "A String", # day of week
"timestampValue": "A String", # timestamp
"dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
@@ -691,25 +1432,30 @@
"timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
- "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
- # allow the value 60 if it allows leap-seconds.
- "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
# to allow the value "24:00:00" for scenarios like business closing time.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
+ # allow the value 60 if it allows leap-seconds.
+ "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
},
+ "booleanValue": True or False, # boolean
+ "floatValue": 3.14, # float
+ "dayOfWeekValue": "A String", # day of week
},
],
+ "estimatedAnonymity": "A String", # The estimated anonymity for these quasi-identifier values.
},
],
+ "bucketSize": "A String", # Number of records within these anonymity bounds.
+ "bucketValueCount": "A String", # Total number of distinct quasi-identifier tuple values in this bucket.
+ "maxAnonymity": "A String", # Always greater than or equal to min_anonymity.
},
],
},
"kAnonymityResult": { # Result of the k-anonymity computation. # K-anonymity result
"equivalenceClassHistogramBuckets": [ # Histogram of k-anonymity equivalence classes.
{ # Histogram of k-anonymity equivalence classes.
- "bucketSize": "A String", # Total number of equivalence classes in this bucket.
- "bucketValueCount": "A String", # Total number of distinct equivalence classes in this bucket.
"equivalenceClassSizeLowerBound": "A String", # Lower bound on the size of the equivalence classes in this bucket.
"equivalenceClassSizeUpperBound": "A String", # Upper bound on the size of the equivalence classes in this bucket.
"bucketValues": [ # Sample of equivalence classes in this bucket. The total number of
@@ -724,9 +1470,6 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "booleanValue": True or False, # boolean
- "floatValue": 3.14, # float
- "dayOfWeekValue": "A String", # day of week
"timestampValue": "A String", # timestamp
"dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
@@ -751,19 +1494,24 @@
"timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
- "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
- # allow the value 60 if it allows leap-seconds.
- "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
# to allow the value "24:00:00" for scenarios like business closing time.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
+ # allow the value 60 if it allows leap-seconds.
+ "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
},
+ "booleanValue": True or False, # boolean
+ "floatValue": 3.14, # float
+ "dayOfWeekValue": "A String", # day of week
},
],
"equivalenceClassSize": "A String", # Size of the equivalence class, for example number of rows with the
# above set of values.
},
],
+ "bucketSize": "A String", # Total number of equivalence classes in this bucket.
+ "bucketValueCount": "A String", # Total number of distinct equivalence classes in this bucket.
},
],
},
@@ -786,6 +1534,10 @@
# important particular case is when min_probability = max_probability = 1:
# then, every individual who shares this quasi-identifier combination is in
# the dataset.
+ "minProbability": 3.14, # Between 0 and 1.
+ "maxProbability": 3.14, # Always greater than or equal to min_probability.
+ "bucketSize": "A String", # Number of records within these probability bounds.
+ "bucketValueCount": "A String", # Total number of distinct quasi-identifier tuple values in this bucket.
"bucketValues": [ # Sample of quasi-identifier tuple values in this bucket. The total
# number of classes returned per bucket is capped at 20.
{ # A tuple of values for the quasi-identifier columns.
@@ -796,9 +1548,6 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "booleanValue": True or False, # boolean
- "floatValue": 3.14, # float
- "dayOfWeekValue": "A String", # day of week
"timestampValue": "A String", # timestamp
"dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
@@ -823,13 +1572,16 @@
"timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
- "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
- # allow the value 60 if it allows leap-seconds.
- "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
# to allow the value "24:00:00" for scenarios like business closing time.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
+ # allow the value 60 if it allows leap-seconds.
+ "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
},
+ "booleanValue": True or False, # boolean
+ "floatValue": 3.14, # float
+ "dayOfWeekValue": "A String", # day of week
},
],
"estimatedProbability": 3.14, # The estimated probability that a given individual sharing these
@@ -842,10 +1594,6 @@
# population with these values, then δ is 0.15.
},
],
- "minProbability": 3.14, # Between 0 and 1.
- "maxProbability": 3.14, # Always greater than or equal to min_probability.
- "bucketSize": "A String", # Number of records within these probability bounds.
- "bucketValueCount": "A String", # Total number of distinct quasi-identifier tuple values in this bucket.
},
],
},
@@ -859,9 +1607,245 @@
"datasetId": "A String", # Dataset ID of the table.
"tableId": "A String", # Name of the table.
},
+ "numericalStatsResult": { # Result of the numerical stats computation. # Numerical stats result
+ "minValue": { # Set of primitive values supported by the system. # Minimum value appearing in the column.
+ # Note that for the purposes of inspection or transformation, the number
+ # of bytes considered to comprise a 'Value' is based on its representation
+ # as a UTF-8 encoded string. For example, if 'integer_value' is set to
+ # 123456789, the number of bytes would be counted as 9, even though an
+ # int64 only holds up to 8 bytes of data.
+ "timestampValue": "A String", # timestamp
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
+ # and time zone are either specified elsewhere or are not significant. The date
+ # is relative to the Proleptic Gregorian Calendar. This can represent:
+ #
+ # * A full date, with non-zero year, month and day values
+ # * A month and day value, with a zero year, e.g. an anniversary
+ # * A year on its own, with zero month and day values
+ # * A year and month value, with a zero day, e.g. a credit card expiration date
+ #
+ # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
+ "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
+ # if specifying a year by itself or a year and month where the day is not
+ # significant.
+ },
+ "stringValue": "A String", # string
+ "integerValue": "A String", # integer
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
+ # or are specified elsewhere. An API may choose to allow leap seconds. Related
+ # types are google.type.Date and `google.protobuf.Timestamp`.
+ "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
+ # to allow the value "24:00:00" for scenarios like business closing time.
+ "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
+ # allow the value 60 if it allows leap-seconds.
+ "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
+ },
+ "booleanValue": True or False, # boolean
+ "floatValue": 3.14, # float
+ "dayOfWeekValue": "A String", # day of week
+ },
+ "maxValue": { # Set of primitive values supported by the system. # Maximum value appearing in the column.
+ # Note that for the purposes of inspection or transformation, the number
+ # of bytes considered to comprise a 'Value' is based on its representation
+ # as a UTF-8 encoded string. For example, if 'integer_value' is set to
+ # 123456789, the number of bytes would be counted as 9, even though an
+ # int64 only holds up to 8 bytes of data.
+ "timestampValue": "A String", # timestamp
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
+ # and time zone are either specified elsewhere or are not significant. The date
+ # is relative to the Proleptic Gregorian Calendar. This can represent:
+ #
+ # * A full date, with non-zero year, month and day values
+ # * A month and day value, with a zero year, e.g. an anniversary
+ # * A year on its own, with zero month and day values
+ # * A year and month value, with a zero day, e.g. a credit card expiration date
+ #
+ # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
+ "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
+ # if specifying a year by itself or a year and month where the day is not
+ # significant.
+ },
+ "stringValue": "A String", # string
+ "integerValue": "A String", # integer
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
+ # or are specified elsewhere. An API may choose to allow leap seconds. Related
+ # types are google.type.Date and `google.protobuf.Timestamp`.
+ "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
+ # to allow the value "24:00:00" for scenarios like business closing time.
+ "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
+ # allow the value 60 if it allows leap-seconds.
+ "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
+ },
+ "booleanValue": True or False, # boolean
+ "floatValue": 3.14, # float
+ "dayOfWeekValue": "A String", # day of week
+ },
+ "quantileValues": [ # List of 99 values that partition the set of field values into 100 equal
+ # sized buckets.
+ { # Set of primitive values supported by the system.
+ # Note that for the purposes of inspection or transformation, the number
+ # of bytes considered to comprise a 'Value' is based on its representation
+ # as a UTF-8 encoded string. For example, if 'integer_value' is set to
+ # 123456789, the number of bytes would be counted as 9, even though an
+ # int64 only holds up to 8 bytes of data.
+ "timestampValue": "A String", # timestamp
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
+ # and time zone are either specified elsewhere or are not significant. The date
+ # is relative to the Proleptic Gregorian Calendar. This can represent:
+ #
+ # * A full date, with non-zero year, month and day values
+ # * A month and day value, with a zero year, e.g. an anniversary
+ # * A year on its own, with zero month and day values
+ # * A year and month value, with a zero day, e.g. a credit card expiration date
+ #
+ # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
+ "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
+ # if specifying a year by itself or a year and month where the day is not
+ # significant.
+ },
+ "stringValue": "A String", # string
+ "integerValue": "A String", # integer
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
+ # or are specified elsewhere. An API may choose to allow leap seconds. Related
+ # types are google.type.Date and `google.protobuf.Timestamp`.
+ "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
+ # to allow the value "24:00:00" for scenarios like business closing time.
+ "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
+ # allow the value 60 if it allows leap-seconds.
+ "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
+ },
+ "booleanValue": True or False, # boolean
+ "floatValue": 3.14, # float
+ "dayOfWeekValue": "A String", # day of week
+ },
+ ],
+ },
+ "lDiversityResult": { # Result of the l-diversity computation. # L-divesity result
+ "sensitiveValueFrequencyHistogramBuckets": [ # Histogram of l-diversity equivalence class sensitive value frequencies.
+ { # Histogram of l-diversity equivalence class sensitive value frequencies.
+ "bucketValues": [ # Sample of equivalence classes in this bucket. The total number of
+ # classes returned per bucket is capped at 20.
+ { # The set of columns' values that share the same ldiversity value.
+ "numDistinctSensitiveValues": "A String", # Number of distinct sensitive values in this equivalence class.
+ "topSensitiveValues": [ # Estimated frequencies of top sensitive values.
+ { # A value of a field, including its frequency.
+ "count": "A String", # How many times the value is contained in the field.
+ "value": { # Set of primitive values supported by the system. # A value contained in the field in question.
+ # Note that for the purposes of inspection or transformation, the number
+ # of bytes considered to comprise a 'Value' is based on its representation
+ # as a UTF-8 encoded string. For example, if 'integer_value' is set to
+ # 123456789, the number of bytes would be counted as 9, even though an
+ # int64 only holds up to 8 bytes of data.
+ "timestampValue": "A String", # timestamp
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
+ # and time zone are either specified elsewhere or are not significant. The date
+ # is relative to the Proleptic Gregorian Calendar. This can represent:
+ #
+ # * A full date, with non-zero year, month and day values
+ # * A month and day value, with a zero year, e.g. an anniversary
+ # * A year on its own, with zero month and day values
+ # * A year and month value, with a zero day, e.g. a credit card expiration date
+ #
+ # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
+ "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
+ # if specifying a year by itself or a year and month where the day is not
+ # significant.
+ },
+ "stringValue": "A String", # string
+ "integerValue": "A String", # integer
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
+ # or are specified elsewhere. An API may choose to allow leap seconds. Related
+ # types are google.type.Date and `google.protobuf.Timestamp`.
+ "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
+ # to allow the value "24:00:00" for scenarios like business closing time.
+ "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
+ # allow the value 60 if it allows leap-seconds.
+ "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
+ },
+ "booleanValue": True or False, # boolean
+ "floatValue": 3.14, # float
+ "dayOfWeekValue": "A String", # day of week
+ },
+ },
+ ],
+ "quasiIdsValues": [ # Quasi-identifier values defining the k-anonymity equivalence
+ # class. The order is always the same as the original request.
+ { # Set of primitive values supported by the system.
+ # Note that for the purposes of inspection or transformation, the number
+ # of bytes considered to comprise a 'Value' is based on its representation
+ # as a UTF-8 encoded string. For example, if 'integer_value' is set to
+ # 123456789, the number of bytes would be counted as 9, even though an
+ # int64 only holds up to 8 bytes of data.
+ "timestampValue": "A String", # timestamp
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
+ # and time zone are either specified elsewhere or are not significant. The date
+ # is relative to the Proleptic Gregorian Calendar. This can represent:
+ #
+ # * A full date, with non-zero year, month and day values
+ # * A month and day value, with a zero year, e.g. an anniversary
+ # * A year on its own, with zero month and day values
+ # * A year and month value, with a zero day, e.g. a credit card expiration date
+ #
+ # Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
+ "day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
+ # if specifying a year by itself or a year and month where the day is not
+ # significant.
+ },
+ "stringValue": "A String", # string
+ "integerValue": "A String", # integer
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
+ # or are specified elsewhere. An API may choose to allow leap seconds. Related
+ # types are google.type.Date and `google.protobuf.Timestamp`.
+ "hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
+ # to allow the value "24:00:00" for scenarios like business closing time.
+ "minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
+ "seconds": 42, # Seconds of minutes of the time. Must normally be from 0 to 59. An API may
+ # allow the value 60 if it allows leap-seconds.
+ "nanos": 42, # Fractions of seconds in nanoseconds. Must be from 0 to 999,999,999.
+ },
+ "booleanValue": True or False, # boolean
+ "floatValue": 3.14, # float
+ "dayOfWeekValue": "A String", # day of week
+ },
+ ],
+ "equivalenceClassSize": "A String", # Size of the k-anonymity equivalence class.
+ },
+ ],
+ "sensitiveValueFrequencyLowerBound": "A String", # Lower bound on the sensitive value frequencies of the equivalence
+ # classes in this bucket.
+ "sensitiveValueFrequencyUpperBound": "A String", # Upper bound on the sensitive value frequencies of the equivalence
+ # classes in this bucket.
+ "bucketSize": "A String", # Total number of equivalence classes in this bucket.
+ "bucketValueCount": "A String", # Total number of distinct equivalence classes in this bucket.
+ },
+ ],
+ },
},
- "jobTriggerName": "A String", # If created by a job trigger, the resource name of the trigger that
- # instantiated the job.
+ "name": "A String", # The server-assigned name.
"errors": [ # A stream of errors encountered running the job.
{ # Details information about an error encountered during job execution or
# the results of an unsuccessful activation of the JobTrigger.
@@ -872,7 +1856,6 @@
#
# You can find out more about this error model and how to work with it in the
# [API Design Guide](https://cloud.google.com/apis/design/errors).
- "code": 42, # The status code, which should be an enum value of google.rpc.Code.
"message": "A String", # A developer-facing error message, which should be in English. Any
# user-facing error message should be localized and sent in the
# google.rpc.Status.details field, or localized by the client.
@@ -882,996 +1865,13 @@
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
],
+ "code": 42, # The status code, which should be an enum value of google.rpc.Code.
},
"timestamps": [ # The times the error occurred.
"A String",
],
},
],
- "name": "A String", # The server-assigned name.
- "type": "A String", # The type of job.
- "endTime": "A String", # Time when the job finished.
- "startTime": "A String", # Time when the job started.
- "inspectDetails": { # The results of an inspect DataSource job. # Results from inspecting a data source.
- "requestedOptions": { # Snapshot of the inspection configuration. # The configuration used for this job.
- "snapshotInspectTemplate": { # The inspectTemplate contains a configuration (set of types of sensitive data # If run with an InspectTemplate, a snapshot of its state at the time of
- # this run.
- # to be detected) to be used anywhere you otherwise would normally specify
- # InspectConfig. See https://cloud.google.com/dlp/docs/concepts-templates
- # to learn more.
- "displayName": "A String", # Display name (max 256 chars).
- "inspectConfig": { # Configuration description of the scanning process. # The core content of the template. Configuration of the scanning process.
- # When used with redactContent only info_types and min_likelihood are currently
- # used.
- "includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
- # included in the response; see Finding.quote.
- "ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
- # Exclusion rules, contained in the set are executed in the end, other
- # rules are executed in the order they are specified for each info type.
- { # Rule set for modifying a set of infoTypes to alter behavior under certain
- # circumstances, depending on the specific details of the rules within the set.
- "infoTypes": [ # List of infoTypes this rule set is applied to.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "rules": [ # Set of rules to be applied to infoTypes. The rules are applied in order.
- { # A single inspection rule to be applied to infoTypes, specified in
- # `InspectionRuleSet`.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- "exclusionRule": { # The rule that specifies conditions when findings of infoTypes specified in # Exclusion rule.
- # `InspectionRuleSet` are removed from results.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # Dictionary which defines the rule.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "regex": { # Message defining a custom regular expression. # Regular expression which defines the rule.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "excludeInfoTypes": { # List of exclude infoTypes. # Set of infoTypes for which findings would affect this rule.
- "infoTypes": [ # InfoType list in ExclusionRule rule drops a finding when it overlaps or
- # contained within with a finding of an infoType from this list. For
- # example, for `InspectionRuleSet.info_types` containing "PHONE_NUMBER"` and
- # `exclusion_rule` containing `exclude_info_types.info_types` with
- # "EMAIL_ADDRESS" the phone number findings are dropped if they overlap
- # with EMAIL_ADDRESS finding.
- # That leads to "555-222-2222@example.org" to generate only a single
- # finding, namely email address.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- },
- "matchingType": "A String", # How the rule is applied, see MatchingType documentation for details.
- },
- },
- ],
- },
- ],
- "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
- "maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
- # When set within `InspectJobConfig`,
- # the maximum returned is 2000 regardless if this is set higher.
- # When set within `InspectContentRequest`, this field is ignored.
- "maxFindingsPerInfoType": [ # Configuration of findings limit given for specified infoTypes.
- { # Max findings configuration per infoType, per content item or long
- # running DlpJob.
- "maxFindings": 42, # Max findings limit for the given infoType.
- "infoType": { # Type of information detected by the API. # Type of information the findings limit applies to. Only one limit per
- # info_type should be provided. If InfoTypeLimit does not have an
- # info_type, the DLP API applies the limit against all info_types that
- # are found but not specified in another InfoTypeLimit.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- },
- ],
- "maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
- # When set within `InspectContentRequest`, the maximum returned is 2000
- # regardless if this is set higher.
- },
- "contentOptions": [ # List of options defining data content to scan.
- # If empty, text, images, and other content will be included.
- "A String",
- ],
- "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
- # InfoType values returned by ListInfoTypes or listed at
- # https://cloud.google.com/dlp/docs/infotypes-reference.
- #
- # When no InfoTypes or CustomInfoTypes are specified in a request, the
- # system may automatically choose what detectors to run. By default this may
- # be all types, but may change over time as detectors are updated.
- #
- # If you need precise control and predictability as to what detectors are
- # run you should specify specific InfoTypes listed in the reference,
- # otherwise a default list will be used, which may change over time.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
- # POSSIBLE.
- # See https://cloud.google.com/dlp/docs/likelihood to learn more.
- "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "customInfoTypes": [ # CustomInfoTypes provided by the user. See
- # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
- { # Custom information type provided by the user. Used to find domain-specific
- # sensitive information configurable to the data in question.
- "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
- # altered by a detection rule if the finding meets the criteria specified by
- # the rule. Defaults to `VERY_LIKELY` if not specified.
- "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
- # infoType, when the name matches one of existing infoTypes and that infoType
- # is specified in `InspectContent.info_types` field. Specifying the latter
- # adds findings to the one detected by the system. If built-in info type is
- # not specified in `InspectContent.info_types` list then the name is treated
- # as a custom info type.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
- # Rules are applied in order that they are specified. Not supported for the
- # `surrogate_type` CustomInfoType.
- { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
- # `CustomInfoType` to alter behavior under certain circumstances, depending
- # on the specific details of the rule. Not supported for the `surrogate_type`
- # custom infoType.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- },
- ],
- "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
- # to be returned. It still can be used for rules matching.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
- # `InspectDataSource`. Not currently supported in `InspectContent`.
- "name": "A String", # Resource name of the requested `StoredInfoType`, for example
- # `organizations/433245324/storedInfoTypes/432452342` or
- # `projects/project-id/storedInfoTypes/432452342`.
- "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
- # inspection was created. Output-only field, populated by the system.
- },
- "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
- # support reversing.
- # such as
- # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
- # These types of transformations are
- # those that perform pseudonymization, thereby producing a "surrogate" as
- # output. This should be used in conjunction with a field on the
- # transformation such as `surrogate_info_type`. This CustomInfoType does
- # not support the use of `detection_rules`.
- },
- },
- ],
- },
- "createTime": "A String", # Output only. The creation timestamp of an inspectTemplate.
- "updateTime": "A String", # Output only. The last update timestamp of an inspectTemplate.
- "name": "A String", # Output only. The template name.
- #
- # The template will have one of the following formats:
- # `projects/PROJECT_ID/inspectTemplates/TEMPLATE_ID` OR
- # `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`;
- "description": "A String", # Short description (max 256 chars).
- },
- "jobConfig": { # Controls what and how to inspect for findings. # Inspect config.
- "inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
- # When used with redactContent only info_types and min_likelihood are currently
- # used.
- "includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
- # included in the response; see Finding.quote.
- "ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
- # Exclusion rules, contained in the set are executed in the end, other
- # rules are executed in the order they are specified for each info type.
- { # Rule set for modifying a set of infoTypes to alter behavior under certain
- # circumstances, depending on the specific details of the rules within the set.
- "infoTypes": [ # List of infoTypes this rule set is applied to.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "rules": [ # Set of rules to be applied to infoTypes. The rules are applied in order.
- { # A single inspection rule to be applied to infoTypes, specified in
- # `InspectionRuleSet`.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- "exclusionRule": { # The rule that specifies conditions when findings of infoTypes specified in # Exclusion rule.
- # `InspectionRuleSet` are removed from results.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # Dictionary which defines the rule.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "regex": { # Message defining a custom regular expression. # Regular expression which defines the rule.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "excludeInfoTypes": { # List of exclude infoTypes. # Set of infoTypes for which findings would affect this rule.
- "infoTypes": [ # InfoType list in ExclusionRule rule drops a finding when it overlaps or
- # contained within with a finding of an infoType from this list. For
- # example, for `InspectionRuleSet.info_types` containing "PHONE_NUMBER"` and
- # `exclusion_rule` containing `exclude_info_types.info_types` with
- # "EMAIL_ADDRESS" the phone number findings are dropped if they overlap
- # with EMAIL_ADDRESS finding.
- # That leads to "555-222-2222@example.org" to generate only a single
- # finding, namely email address.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- },
- "matchingType": "A String", # How the rule is applied, see MatchingType documentation for details.
- },
- },
- ],
- },
- ],
- "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
- "maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
- # When set within `InspectJobConfig`,
- # the maximum returned is 2000 regardless if this is set higher.
- # When set within `InspectContentRequest`, this field is ignored.
- "maxFindingsPerInfoType": [ # Configuration of findings limit given for specified infoTypes.
- { # Max findings configuration per infoType, per content item or long
- # running DlpJob.
- "maxFindings": 42, # Max findings limit for the given infoType.
- "infoType": { # Type of information detected by the API. # Type of information the findings limit applies to. Only one limit per
- # info_type should be provided. If InfoTypeLimit does not have an
- # info_type, the DLP API applies the limit against all info_types that
- # are found but not specified in another InfoTypeLimit.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- },
- ],
- "maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
- # When set within `InspectContentRequest`, the maximum returned is 2000
- # regardless if this is set higher.
- },
- "contentOptions": [ # List of options defining data content to scan.
- # If empty, text, images, and other content will be included.
- "A String",
- ],
- "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
- # InfoType values returned by ListInfoTypes or listed at
- # https://cloud.google.com/dlp/docs/infotypes-reference.
- #
- # When no InfoTypes or CustomInfoTypes are specified in a request, the
- # system may automatically choose what detectors to run. By default this may
- # be all types, but may change over time as detectors are updated.
- #
- # If you need precise control and predictability as to what detectors are
- # run you should specify specific InfoTypes listed in the reference,
- # otherwise a default list will be used, which may change over time.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
- # POSSIBLE.
- # See https://cloud.google.com/dlp/docs/likelihood to learn more.
- "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "customInfoTypes": [ # CustomInfoTypes provided by the user. See
- # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
- { # Custom information type provided by the user. Used to find domain-specific
- # sensitive information configurable to the data in question.
- "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
- # altered by a detection rule if the finding meets the criteria specified by
- # the rule. Defaults to `VERY_LIKELY` if not specified.
- "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
- # infoType, when the name matches one of existing infoTypes and that infoType
- # is specified in `InspectContent.info_types` field. Specifying the latter
- # adds findings to the one detected by the system. If built-in info type is
- # not specified in `InspectContent.info_types` list then the name is treated
- # as a custom info type.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
- # Rules are applied in order that they are specified. Not supported for the
- # `surrogate_type` CustomInfoType.
- { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
- # `CustomInfoType` to alter behavior under certain circumstances, depending
- # on the specific details of the rule. Not supported for the `surrogate_type`
- # custom infoType.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- },
- ],
- "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
- # to be returned. It still can be used for rules matching.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
- # `InspectDataSource`. Not currently supported in `InspectContent`.
- "name": "A String", # Resource name of the requested `StoredInfoType`, for example
- # `organizations/433245324/storedInfoTypes/432452342` or
- # `projects/project-id/storedInfoTypes/432452342`.
- "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
- # inspection was created. Output-only field, populated by the system.
- },
- "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
- # support reversing.
- # such as
- # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
- # These types of transformations are
- # those that perform pseudonymization, thereby producing a "surrogate" as
- # output. This should be used in conjunction with a field on the
- # transformation such as `surrogate_info_type`. This CustomInfoType does
- # not support the use of `detection_rules`.
- },
- },
- ],
- },
- "actions": [ # Actions to execute at the completion of the job.
- { # A task to execute on the completion of a job.
- # See https://cloud.google.com/dlp/docs/concepts-actions to learn more.
- "publishSummaryToCscc": { # Publish the result summary of a DlpJob to the Cloud Security # Publish summary to Cloud Security Command Center (Alpha).
- # Command Center (CSCC Alpha).
- # This action is only available for projects which are parts of
- # an organization and whitelisted for the alpha Cloud Security Command
- # Center.
- # The action will publish count of finding instances and their info types.
- # The summary of findings will be persisted in CSCC and are governed by CSCC
- # service-specific policy, see https://cloud.google.com/terms/service-terms
- # Only a single instance of this action can be specified.
- # Compatible with: Inspect
- },
- "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification for project owners and editors on job's
- # completion/failure.
- # completion/failure.
- },
- "saveFindings": { # If set, the detailed findings will be persisted to the specified # Save resulting findings in a provided location.
- # OutputStorageConfig. Only a single instance of this action can be
- # specified.
- # Compatible with: Inspect, Risk
- "outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
- "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
- # used for Inspect and must be unspecified for Risk jobs. Columns are derived
- # from the `Finding` object. If appending to an existing table, any columns
- # from the predefined schema that are missing will be added. No columns in
- # the existing table will be deleted.
- #
- # If unspecified, then all available columns will be used for a new table or
- # an (existing) table with no schema, and no changes will be made to an
- # existing table that has a schema.
- # Only for use with external storage.
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
- # dataset. If table_id is not set a new one will be generated
- # for you with the following format:
- # dlp_googleapis_yyyy_mm_dd_[dlp_job_id]. Pacific timezone will be used for
- # generating the date details.
- #
- # For Inspect, each column in an existing output table must have the same
- # name, type, and mode of a field in the `Finding` object.
- #
- # For Risk, an existing output table should be the output of a previous
- # Risk analysis job run on the same source table, with the same privacy
- # metric and quasi-identifiers. Risk jobs that analyze the same table but
- # compute a different privacy metric, or use different sets of
- # quasi-identifiers, cannot store their results in the same table.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "datasetId": "A String", # Dataset ID of the table.
- "tableId": "A String", # Name of the table.
- },
- },
- },
- "pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
- # message contains a single field, `DlpJobName`, which is equal to the
- # finished job's
- # [`DlpJob.name`](/dlp/docs/reference/rest/v2/projects.dlpJobs#DlpJob).
- # Compatible with: Inspect, Risk
- "topic": "A String", # Cloud Pub/Sub topic to send notifications to. The topic must have given
- # publishing access rights to the DLP API service account executing
- # the long running DlpJob sending the notifications.
- # Format is projects/{project}/topics/{topic}.
- },
- "publishFindingsToCloudDataCatalog": { # Publish findings of a DlpJob to Cloud Data Catalog. Labels summarizing the # Publish findings to Cloud Datahub.
- # results of the DlpJob will be applied to the entry for the resource scanned
- # in Cloud Data Catalog. Any labels previously written by another DlpJob will
- # be deleted. InfoType naming patterns are strictly enforced when using this
- # feature. Note that the findings will be persisted in Cloud Data Catalog
- # storage and are governed by Data Catalog service-specific policy, see
- # https://cloud.google.com/terms/service-terms
- # Only a single instance of this action can be specified and only allowed if
- # all resources being scanned are BigQuery tables.
- # Compatible with: Inspect
- },
- "publishToStackdriver": { # Enable Stackdriver metric dlp.googleapis.com/finding_count. This # Enable Stackdriver metric dlp.googleapis.com/finding_count.
- # will publish a metric to stack driver on each infotype requested and
- # how many findings were found for it. CustomDetectors will be bucketed
- # as 'Custom' under the Stackdriver label 'info_type'.
- },
- },
- ],
- "storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- "projectId": "A String", # The ID of the project to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore and BigQuery.
- #
- # For BigQuery:
- # Required to filter out rows based on the given start and
- # end times. If not specified and the table was modified between the given
- # start and end times, the entire table will be scanned.
- # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
- # `TIMESTAMP`, or `DATETIME` BigQuery column.
- #
- # For Datastore.
- # Valid data types of the timestamp field are: `TIMESTAMP`.
- # Datastore entity will be scanned if the timestamp property does not
- # exist or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
- # bucket.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
- "fileTypes": [ # List of file type groups to include in the scan.
- # If empty, all files are scanned and available data format processors
- # are applied. In addition, the binary content of the selected files
- # is always scanned as well.
- # Images are scanned only as binary if the specified region
- # does not support image inspection and no file_types were specified.
- # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
- "A String",
- ],
- "sampleMethod": "A String",
- "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
- # number of bytes scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
- # than this value then the rest of the bytes are omitted. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "fileSet": { # Set of files to scan. # The set of one or more files to scan.
- "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
- # `regex_file_set` must be set.
- # expressions are used to allow fine-grained control over which files in the
- # bucket to include.
- #
- # Included files are those that match at least one item in `include_regex` and
- # do not match any items in `exclude_regex`. Note that a file that matches
- # items from both lists will _not_ be included. For a match to occur, the
- # entire file path (i.e., everything in the url after the bucket name) must
- # match the regular expression.
- #
- # For example, given the input `{bucket_name: "mybucket", include_regex:
- # ["directory1/.*"], exclude_regex:
- # ["directory1/excluded.*"]}`:
- #
- # * `gs://mybucket/directory1/myfile` will be included
- # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
- # across `/`)
- # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
- # full path doesn't match any items in `include_regex`)
- # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
- # matches an item in `exclude_regex`)
- #
- # If `include_regex` is left empty, it will match all files by default
- # (this is equivalent to setting `include_regex: [".*"]`).
- #
- # Some other common use cases:
- #
- # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
- # files in `mybucket` except for .pdf files
- # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
- # include all files directly under `gs://mybucket/directory/`, without matching
- # across `/`
- "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
- # the bucket that match at least one of these regular expressions will be
- # excluded from the scan.
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
- "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
- # the bucket that match at least one of these regular expressions will be
- # included in the set of files, except for those that also match an item in
- # `exclude_regex`. Leaving this field empty will match all files by default
- # (this is equivalent to including `.*` in the list).
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- },
- "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
- #
- # If the url ends in a trailing slash, the bucket or directory represented
- # by the url will be scanned non-recursively (content in sub-directories
- # will not be scanned). This means that `gs://mybucket/` is equivalent to
- # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
- # `gs://mybucket/directory/*`.
- #
- # Exactly one of `url` or `regex_file_set` must be set.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
- "sampleMethod": "A String",
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "datasetId": "A String", # Dataset ID of the table.
- "tableId": "A String", # Name of the table.
- },
- "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
- # `actions.saveFindings.outputConfig.table` is specified, the values of
- # columns specified here are available in the output table under
- # `location.content_locations.record_location.record_key.id_values`. Nested
- # fields such as `person.birthdate.year` are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- },
- "hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
- # Early access feature is in a pre-release state and might change or have
- # limited support. For more information, see
- # https://cloud.google.com/products#product-launch-stages.
- # of Google Cloud Platform.
- "labels": { # To organize findings, these labels will be added to each finding.
- #
- # Label keys must be between 1 and 63 characters long and must conform
- # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
- #
- # Label values must be between 0 and 63 characters long and must conform
- # to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
- #
- # No more than 10 labels can be associated with a given finding.
- #
- # Examples:
- # * `"environment" : "production"`
- # * `"pipeline" : "etl"`
- "a_key": "A String",
- },
- "description": "A String", # A short description of where the data is coming from. Will be stored once
- # in the job. 256 max length.
- "tableOptions": { # Instructions regarding the table content being inspected. # If the container is a table, additional information to make findings
- # meaningful such as the columns that are primary keys.
- "identifyingFields": [ # The columns that are the primary keys for table objects included in
- # ContentItem. A copy of this cell's value will stored alongside alongside
- # each finding so that the finding can be traced to the specific row it came
- # from. No more than 3 may be provided.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- },
- "requiredFindingLabelKeys": [ # These are labels that each inspection request must include within their
- # 'finding_labels' map. Request may contain others, but any missing one of
- # these will be rejected.
- #
- # Label keys must be between 1 and 63 characters long and must conform
- # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
- #
- # No more than 10 keys can be required.
- "A String",
- ],
- },
- },
- "inspectTemplateName": "A String", # If provided, will be used as the default for all values in InspectConfig.
- # `inspect_config` will be merged into the values persisted as part of the
- # template.
- },
- },
- "result": { # All result fields mentioned below are updated while the job is processing. # A summary of the outcome of this inspect job.
- "hybridStats": { # Statistics related to processing hybrid inspect requests. # Statistics related to the processing of hybrid inspect.
- # Early access feature is in a pre-release state and might change or have
- # limited support. For more information, see
- # https://cloud.google.com/products#product-launch-stages.
- "abortedCount": "A String", # The number of hybrid inspection requests aborted because the job ran
- # out of quota or was ended before they could be processed.
- "pendingCount": "A String", # The number of hybrid requests currently being processed. Only populated
- # when called via method `getDlpJob`.
- # A burst of traffic may cause hybrid inspect requests to be enqueued.
- # Processing will take place as quickly as possible, but resource limitations
- # may impact how long a request is enqueued for.
- "processedCount": "A String", # The number of hybrid inspection requests processed within this job.
- },
- "infoTypeStats": [ # Statistics of how many instances of each info type were found during
- # inspect job.
- { # Statistics regarding a specific InfoType.
- "count": "A String", # Number of findings for this infoType.
- "infoType": { # Type of information detected by the API. # The type of finding this stat is for.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- },
- ],
- "totalEstimatedBytes": "A String", # Estimate of the number of bytes to process.
- "processedBytes": "A String", # Total size in bytes that were processed.
- },
- },
}</pre>
</div>
@@ -1887,8 +1887,44 @@
The object takes the form of:
{ # Request message for CreateJobTrigger.
+ "locationId": "A String", # The geographic location to store the job trigger. Reserved for
+ # future extensions.
"jobTrigger": { # Contains a configuration to make dlp api calls on a repeating basis. # Required. The JobTrigger to create.
# See https://cloud.google.com/dlp/docs/concepts-job-triggers to learn more.
+ "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
+ "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
+ # triggeredJob is created, for example
+ # `projects/dlp-test-project/jobTriggers/53234423`.
+ "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
+ # errors may result in the JobTrigger automatically being paused.
+ # Will return the last 100 errors. Whenever the JobTrigger is modified
+ # this list will be cleared.
+ { # Details information about an error encountered during job execution or
+ # the results of an unsuccessful activation of the JobTrigger.
+ "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
+ # different programming environments, including REST APIs and RPC APIs. It is
+ # used by [gRPC](https://github.com/grpc). Each `Status` message contains
+ # three pieces of data: error code, error message, and error details.
+ #
+ # You can find out more about this error model and how to work with it in the
+ # [API Design Guide](https://cloud.google.com/apis/design/errors).
+ "message": "A String", # A developer-facing error message, which should be in English. Any
+ # user-facing error message should be localized and sent in the
+ # google.rpc.Status.details field, or localized by the client.
+ "details": [ # A list of messages that carry the error details. There is a common set of
+ # message types for APIs to use.
+ {
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ ],
+ "code": 42, # The status code, which should be an enum value of google.rpc.Code.
+ },
+ "timestamps": [ # The times the error occurred.
+ "A String",
+ ],
+ },
+ ],
+ "description": "A String", # User provided description (max 256 chars)
"triggers": [ # A list of triggers which will be OR'ed together. Only one in the list
# needs to trigger for a job to be started. The list may contain only
# a single Schedule trigger and must have at least one object.
@@ -1918,6 +1954,164 @@
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
+ "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
+ # POSSIBLE.
+ # See https://cloud.google.com/dlp/docs/likelihood to learn more.
+ "contentOptions": [ # List of options defining data content to scan.
+ # If empty, text, images, and other content will be included.
+ "A String",
+ ],
+ "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
+ # InfoType values returned by ListInfoTypes or listed at
+ # https://cloud.google.com/dlp/docs/infotypes-reference.
+ #
+ # When no InfoTypes or CustomInfoTypes are specified in a request, the
+ # system may automatically choose what detectors to run. By default this may
+ # be all types, but may change over time as detectors are updated.
+ #
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
+ "customInfoTypes": [ # CustomInfoTypes provided by the user. See
+ # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
+ { # Custom information type provided by the user. Used to find domain-specific
+ # sensitive information configurable to the data in question.
+ "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
+ # support reversing.
+ # such as
+ # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
+ # These types of transformations are
+ # those that perform pseudonymization, thereby producing a "surrogate" as
+ # output. This should be used in conjunction with a field on the
+ # transformation such as `surrogate_info_type`. This CustomInfoType does
+ # not support the use of `detection_rules`.
+ },
+ "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
+ # altered by a detection rule if the finding meets the criteria specified by
+ # the rule. Defaults to `VERY_LIKELY` if not specified.
+ "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
+ # infoType, when the name matches one of existing infoTypes and that infoType
+ # is specified in `InspectContent.info_types` field. Specifying the latter
+ # adds findings to the one detected by the system. If built-in info type is
+ # not specified in `InspectContent.info_types` list then the name is treated
+ # as a custom info type.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
+ # Rules are applied in order that they are specified. Not supported for the
+ # `surrogate_type` CustomInfoType.
+ { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
+ # `CustomInfoType` to alter behavior under certain circumstances, depending
+ # on the specific details of the rule. Not supported for the `surrogate_type`
+ # custom infoType.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
+ },
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
+ },
+ },
+ },
+ ],
+ "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
+ # to be returned. It still can be used for rules matching.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
+ # `InspectDataSource`. Not currently supported in `InspectContent`.
+ "name": "A String", # Resource name of the requested `StoredInfoType`, for example
+ # `organizations/433245324/storedInfoTypes/432452342` or
+ # `projects/project-id/storedInfoTypes/432452342`.
+ "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
+ # inspection was created. Output-only field, populated by the system.
+ },
+ },
+ ],
"includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
# included in the response; see Finding.quote.
"ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
@@ -2069,164 +2263,6 @@
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
},
- "contentOptions": [ # List of options defining data content to scan.
- # If empty, text, images, and other content will be included.
- "A String",
- ],
- "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
- # InfoType values returned by ListInfoTypes or listed at
- # https://cloud.google.com/dlp/docs/infotypes-reference.
- #
- # When no InfoTypes or CustomInfoTypes are specified in a request, the
- # system may automatically choose what detectors to run. By default this may
- # be all types, but may change over time as detectors are updated.
- #
- # If you need precise control and predictability as to what detectors are
- # run you should specify specific InfoTypes listed in the reference,
- # otherwise a default list will be used, which may change over time.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
- # POSSIBLE.
- # See https://cloud.google.com/dlp/docs/likelihood to learn more.
- "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "customInfoTypes": [ # CustomInfoTypes provided by the user. See
- # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
- { # Custom information type provided by the user. Used to find domain-specific
- # sensitive information configurable to the data in question.
- "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
- # altered by a detection rule if the finding meets the criteria specified by
- # the rule. Defaults to `VERY_LIKELY` if not specified.
- "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
- # infoType, when the name matches one of existing infoTypes and that infoType
- # is specified in `InspectContent.info_types` field. Specifying the latter
- # adds findings to the one detected by the system. If built-in info type is
- # not specified in `InspectContent.info_types` list then the name is treated
- # as a custom info type.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
- # Rules are applied in order that they are specified. Not supported for the
- # `surrogate_type` CustomInfoType.
- { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
- # `CustomInfoType` to alter behavior under certain circumstances, depending
- # on the specific details of the rule. Not supported for the `surrogate_type`
- # custom infoType.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- },
- ],
- "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
- # to be returned. It still can be used for rules matching.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
- # `InspectDataSource`. Not currently supported in `InspectContent`.
- "name": "A String", # Resource name of the requested `StoredInfoType`, for example
- # `organizations/433245324/storedInfoTypes/432452342` or
- # `projects/project-id/storedInfoTypes/432452342`.
- "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
- # inspection was created. Output-only field, populated by the system.
- },
- "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
- # support reversing.
- # such as
- # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
- # These types of transformations are
- # those that perform pseudonymization, thereby producing a "surrogate" as
- # output. This should be used in conjunction with a field on the
- # transformation such as `surrogate_info_type`. This CustomInfoType does
- # not support the use of `detection_rules`.
- },
- },
- ],
},
"actions": [ # Actions to execute at the completion of the job.
{ # A task to execute on the completion of a job.
@@ -2251,16 +2287,6 @@
# specified.
# Compatible with: Inspect, Risk
"outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
- "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
- # used for Inspect and must be unspecified for Risk jobs. Columns are derived
- # from the `Finding` object. If appending to an existing table, any columns
- # from the predefined schema that are missing will be added. No columns in
- # the existing table will be deleted.
- #
- # If unspecified, then all available columns will be used for a new table or
- # an (existing) table with no schema, and no changes will be made to an
- # existing table that has a schema.
- # Only for use with external storage.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -2284,6 +2310,16 @@
"datasetId": "A String", # Dataset ID of the table.
"tableId": "A String", # Name of the table.
},
+ "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
+ # used for Inspect and must be unspecified for Risk jobs. Columns are derived
+ # from the `Finding` object. If appending to an existing table, any columns
+ # from the predefined schema that are missing will be added. No columns in
+ # the existing table will be deleted.
+ #
+ # If unspecified, then all available columns will be used for a new table or
+ # an (existing) table with no schema, and no changes will be made to an
+ # existing table that has a schema.
+ # Only for use with external storage.
},
},
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
@@ -2315,174 +2351,6 @@
},
],
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- "projectId": "A String", # The ID of the project to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore and BigQuery.
- #
- # For BigQuery:
- # Required to filter out rows based on the given start and
- # end times. If not specified and the table was modified between the given
- # start and end times, the entire table will be scanned.
- # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
- # `TIMESTAMP`, or `DATETIME` BigQuery column.
- #
- # For Datastore.
- # Valid data types of the timestamp field are: `TIMESTAMP`.
- # Datastore entity will be scanned if the timestamp property does not
- # exist or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
- # bucket.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
- "fileTypes": [ # List of file type groups to include in the scan.
- # If empty, all files are scanned and available data format processors
- # are applied. In addition, the binary content of the selected files
- # is always scanned as well.
- # Images are scanned only as binary if the specified region
- # does not support image inspection and no file_types were specified.
- # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
- "A String",
- ],
- "sampleMethod": "A String",
- "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
- # number of bytes scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
- # than this value then the rest of the bytes are omitted. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "fileSet": { # Set of files to scan. # The set of one or more files to scan.
- "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
- # `regex_file_set` must be set.
- # expressions are used to allow fine-grained control over which files in the
- # bucket to include.
- #
- # Included files are those that match at least one item in `include_regex` and
- # do not match any items in `exclude_regex`. Note that a file that matches
- # items from both lists will _not_ be included. For a match to occur, the
- # entire file path (i.e., everything in the url after the bucket name) must
- # match the regular expression.
- #
- # For example, given the input `{bucket_name: "mybucket", include_regex:
- # ["directory1/.*"], exclude_regex:
- # ["directory1/excluded.*"]}`:
- #
- # * `gs://mybucket/directory1/myfile` will be included
- # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
- # across `/`)
- # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
- # full path doesn't match any items in `include_regex`)
- # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
- # matches an item in `exclude_regex`)
- #
- # If `include_regex` is left empty, it will match all files by default
- # (this is equivalent to setting `include_regex: [".*"]`).
- #
- # Some other common use cases:
- #
- # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
- # files in `mybucket` except for .pdf files
- # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
- # include all files directly under `gs://mybucket/directory/`, without matching
- # across `/`
- "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
- # the bucket that match at least one of these regular expressions will be
- # excluded from the scan.
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
- "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
- # the bucket that match at least one of these regular expressions will be
- # included in the set of files, except for those that also match an item in
- # `exclude_regex`. Leaving this field empty will match all files by default
- # (this is equivalent to including `.*` in the list).
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- },
- "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
- #
- # If the url ends in a trailing slash, the bucket or directory represented
- # by the url will be scanned non-recursively (content in sub-directories
- # will not be scanned). This means that `gs://mybucket/` is equivalent to
- # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
- # `gs://mybucket/directory/*`.
- #
- # Exactly one of `url` or `regex_file_set` must be set.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
- "sampleMethod": "A String",
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "datasetId": "A String", # Dataset ID of the table.
- "tableId": "A String", # Name of the table.
- },
- "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
- # `actions.saveFindings.outputConfig.table` is specified, the values of
- # columns specified here are available in the output table under
- # `location.content_locations.record_location.record_key.id_values`. Nested
- # fields such as `person.birthdate.year` are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- },
"hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
# Early access feature is in a pre-release state and might change or have
# limited support. For more information, see
@@ -2527,53 +2395,185 @@
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ },
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
+ # bucket.
+ "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
+ # than this value then the rest of the bytes are omitted. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ "fileSet": { # Set of files to scan. # The set of one or more files to scan.
+ "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
+ # `regex_file_set` must be set.
+ # expressions are used to allow fine-grained control over which files in the
+ # bucket to include.
+ #
+ # Included files are those that match at least one item in `include_regex` and
+ # do not match any items in `exclude_regex`. Note that a file that matches
+ # items from both lists will _not_ be included. For a match to occur, the
+ # entire file path (i.e., everything in the url after the bucket name) must
+ # match the regular expression.
+ #
+ # For example, given the input `{bucket_name: "mybucket", include_regex:
+ # ["directory1/.*"], exclude_regex:
+ # ["directory1/excluded.*"]}`:
+ #
+ # * `gs://mybucket/directory1/myfile` will be included
+ # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
+ # across `/`)
+ # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
+ # full path doesn't match any items in `include_regex`)
+ # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
+ # matches an item in `exclude_regex`)
+ #
+ # If `include_regex` is left empty, it will match all files by default
+ # (this is equivalent to setting `include_regex: [".*"]`).
+ #
+ # Some other common use cases:
+ #
+ # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
+ # files in `mybucket` except for .pdf files
+ # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
+ # include all files directly under `gs://mybucket/directory/`, without matching
+ # across `/`
+ "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
+ "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # included in the set of files, except for those that also match an item in
+ # `exclude_regex`. Leaving this field empty will match all files by default
+ # (this is equivalent to including `.*` in the list).
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # excluded from the scan.
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ },
+ "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ #
+ # If the url ends in a trailing slash, the bucket or directory represented
+ # by the url will be scanned non-recursively (content in sub-directories
+ # will not be scanned). This means that `gs://mybucket/` is equivalent to
+ # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
+ # `gs://mybucket/directory/*`.
+ #
+ # Exactly one of `url` or `regex_file_set` must be set.
+ },
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
+ "fileTypes": [ # List of file type groups to include in the scan.
+ # If empty, all files are scanned and available data format processors
+ # are applied. In addition, the binary content of the selected files
+ # is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
+ "A String",
+ ],
+ "sampleMethod": "A String",
+ "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
+ # number of bytes scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "datasetId": "A String", # Dataset ID of the table.
+ "tableId": "A String", # Name of the table.
+ },
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "sampleMethod": "A String",
+ },
},
"inspectTemplateName": "A String", # If provided, will be used as the default for all values in InspectConfig.
# `inspect_config` will be merged into the values persisted as part of the
# template.
},
"status": "A String", # Required. A status for this trigger.
- "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
- "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
- # triggeredJob is created, for example
- # `projects/dlp-test-project/jobTriggers/53234423`.
- "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
- # errors may result in the JobTrigger automatically being paused.
- # Will return the last 100 errors. Whenever the JobTrigger is modified
- # this list will be cleared.
- { # Details information about an error encountered during job execution or
- # the results of an unsuccessful activation of the JobTrigger.
- "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
- # different programming environments, including REST APIs and RPC APIs. It is
- # used by [gRPC](https://github.com/grpc). Each `Status` message contains
- # three pieces of data: error code, error message, and error details.
- #
- # You can find out more about this error model and how to work with it in the
- # [API Design Guide](https://cloud.google.com/apis/design/errors).
- "code": 42, # The status code, which should be an enum value of google.rpc.Code.
- "message": "A String", # A developer-facing error message, which should be in English. Any
- # user-facing error message should be localized and sent in the
- # google.rpc.Status.details field, or localized by the client.
- "details": [ # A list of messages that carry the error details. There is a common set of
- # message types for APIs to use.
- {
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- ],
- },
- "timestamps": [ # The times the error occurred.
- "A String",
- ],
- },
- ],
- "description": "A String", # User provided description (max 256 chars)
},
"triggerId": "A String", # The trigger id can contain uppercase and lowercase letters,
# numbers, and hyphens; that is, it must match the regular
# expression: `[a-zA-Z\\d-_]+`. The maximum length is 100
# characters. Can be empty to allow the system to generate one.
- "locationId": "A String", # The geographic location to store the job trigger. Reserved for
- # future extensions.
}
x__xgafv: string, V1 error format.
@@ -2586,6 +2586,40 @@
{ # Contains a configuration to make dlp api calls on a repeating basis.
# See https://cloud.google.com/dlp/docs/concepts-job-triggers to learn more.
+ "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
+ "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
+ # triggeredJob is created, for example
+ # `projects/dlp-test-project/jobTriggers/53234423`.
+ "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
+ # errors may result in the JobTrigger automatically being paused.
+ # Will return the last 100 errors. Whenever the JobTrigger is modified
+ # this list will be cleared.
+ { # Details information about an error encountered during job execution or
+ # the results of an unsuccessful activation of the JobTrigger.
+ "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
+ # different programming environments, including REST APIs and RPC APIs. It is
+ # used by [gRPC](https://github.com/grpc). Each `Status` message contains
+ # three pieces of data: error code, error message, and error details.
+ #
+ # You can find out more about this error model and how to work with it in the
+ # [API Design Guide](https://cloud.google.com/apis/design/errors).
+ "message": "A String", # A developer-facing error message, which should be in English. Any
+ # user-facing error message should be localized and sent in the
+ # google.rpc.Status.details field, or localized by the client.
+ "details": [ # A list of messages that carry the error details. There is a common set of
+ # message types for APIs to use.
+ {
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ ],
+ "code": 42, # The status code, which should be an enum value of google.rpc.Code.
+ },
+ "timestamps": [ # The times the error occurred.
+ "A String",
+ ],
+ },
+ ],
+ "description": "A String", # User provided description (max 256 chars)
"triggers": [ # A list of triggers which will be OR'ed together. Only one in the list
# needs to trigger for a job to be started. The list may contain only
# a single Schedule trigger and must have at least one object.
@@ -2615,6 +2649,164 @@
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
+ "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
+ # POSSIBLE.
+ # See https://cloud.google.com/dlp/docs/likelihood to learn more.
+ "contentOptions": [ # List of options defining data content to scan.
+ # If empty, text, images, and other content will be included.
+ "A String",
+ ],
+ "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
+ # InfoType values returned by ListInfoTypes or listed at
+ # https://cloud.google.com/dlp/docs/infotypes-reference.
+ #
+ # When no InfoTypes or CustomInfoTypes are specified in a request, the
+ # system may automatically choose what detectors to run. By default this may
+ # be all types, but may change over time as detectors are updated.
+ #
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
+ "customInfoTypes": [ # CustomInfoTypes provided by the user. See
+ # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
+ { # Custom information type provided by the user. Used to find domain-specific
+ # sensitive information configurable to the data in question.
+ "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
+ # support reversing.
+ # such as
+ # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
+ # These types of transformations are
+ # those that perform pseudonymization, thereby producing a "surrogate" as
+ # output. This should be used in conjunction with a field on the
+ # transformation such as `surrogate_info_type`. This CustomInfoType does
+ # not support the use of `detection_rules`.
+ },
+ "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
+ # altered by a detection rule if the finding meets the criteria specified by
+ # the rule. Defaults to `VERY_LIKELY` if not specified.
+ "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
+ # infoType, when the name matches one of existing infoTypes and that infoType
+ # is specified in `InspectContent.info_types` field. Specifying the latter
+ # adds findings to the one detected by the system. If built-in info type is
+ # not specified in `InspectContent.info_types` list then the name is treated
+ # as a custom info type.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
+ # Rules are applied in order that they are specified. Not supported for the
+ # `surrogate_type` CustomInfoType.
+ { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
+ # `CustomInfoType` to alter behavior under certain circumstances, depending
+ # on the specific details of the rule. Not supported for the `surrogate_type`
+ # custom infoType.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
+ },
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
+ },
+ },
+ },
+ ],
+ "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
+ # to be returned. It still can be used for rules matching.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
+ # `InspectDataSource`. Not currently supported in `InspectContent`.
+ "name": "A String", # Resource name of the requested `StoredInfoType`, for example
+ # `organizations/433245324/storedInfoTypes/432452342` or
+ # `projects/project-id/storedInfoTypes/432452342`.
+ "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
+ # inspection was created. Output-only field, populated by the system.
+ },
+ },
+ ],
"includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
# included in the response; see Finding.quote.
"ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
@@ -2766,164 +2958,6 @@
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
},
- "contentOptions": [ # List of options defining data content to scan.
- # If empty, text, images, and other content will be included.
- "A String",
- ],
- "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
- # InfoType values returned by ListInfoTypes or listed at
- # https://cloud.google.com/dlp/docs/infotypes-reference.
- #
- # When no InfoTypes or CustomInfoTypes are specified in a request, the
- # system may automatically choose what detectors to run. By default this may
- # be all types, but may change over time as detectors are updated.
- #
- # If you need precise control and predictability as to what detectors are
- # run you should specify specific InfoTypes listed in the reference,
- # otherwise a default list will be used, which may change over time.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
- # POSSIBLE.
- # See https://cloud.google.com/dlp/docs/likelihood to learn more.
- "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "customInfoTypes": [ # CustomInfoTypes provided by the user. See
- # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
- { # Custom information type provided by the user. Used to find domain-specific
- # sensitive information configurable to the data in question.
- "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
- # altered by a detection rule if the finding meets the criteria specified by
- # the rule. Defaults to `VERY_LIKELY` if not specified.
- "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
- # infoType, when the name matches one of existing infoTypes and that infoType
- # is specified in `InspectContent.info_types` field. Specifying the latter
- # adds findings to the one detected by the system. If built-in info type is
- # not specified in `InspectContent.info_types` list then the name is treated
- # as a custom info type.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
- # Rules are applied in order that they are specified. Not supported for the
- # `surrogate_type` CustomInfoType.
- { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
- # `CustomInfoType` to alter behavior under certain circumstances, depending
- # on the specific details of the rule. Not supported for the `surrogate_type`
- # custom infoType.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- },
- ],
- "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
- # to be returned. It still can be used for rules matching.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
- # `InspectDataSource`. Not currently supported in `InspectContent`.
- "name": "A String", # Resource name of the requested `StoredInfoType`, for example
- # `organizations/433245324/storedInfoTypes/432452342` or
- # `projects/project-id/storedInfoTypes/432452342`.
- "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
- # inspection was created. Output-only field, populated by the system.
- },
- "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
- # support reversing.
- # such as
- # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
- # These types of transformations are
- # those that perform pseudonymization, thereby producing a "surrogate" as
- # output. This should be used in conjunction with a field on the
- # transformation such as `surrogate_info_type`. This CustomInfoType does
- # not support the use of `detection_rules`.
- },
- },
- ],
},
"actions": [ # Actions to execute at the completion of the job.
{ # A task to execute on the completion of a job.
@@ -2948,16 +2982,6 @@
# specified.
# Compatible with: Inspect, Risk
"outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
- "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
- # used for Inspect and must be unspecified for Risk jobs. Columns are derived
- # from the `Finding` object. If appending to an existing table, any columns
- # from the predefined schema that are missing will be added. No columns in
- # the existing table will be deleted.
- #
- # If unspecified, then all available columns will be used for a new table or
- # an (existing) table with no schema, and no changes will be made to an
- # existing table that has a schema.
- # Only for use with external storage.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -2981,6 +3005,16 @@
"datasetId": "A String", # Dataset ID of the table.
"tableId": "A String", # Name of the table.
},
+ "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
+ # used for Inspect and must be unspecified for Risk jobs. Columns are derived
+ # from the `Finding` object. If appending to an existing table, any columns
+ # from the predefined schema that are missing will be added. No columns in
+ # the existing table will be deleted.
+ #
+ # If unspecified, then all available columns will be used for a new table or
+ # an (existing) table with no schema, and no changes will be made to an
+ # existing table that has a schema.
+ # Only for use with external storage.
},
},
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
@@ -3012,174 +3046,6 @@
},
],
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- "projectId": "A String", # The ID of the project to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore and BigQuery.
- #
- # For BigQuery:
- # Required to filter out rows based on the given start and
- # end times. If not specified and the table was modified between the given
- # start and end times, the entire table will be scanned.
- # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
- # `TIMESTAMP`, or `DATETIME` BigQuery column.
- #
- # For Datastore.
- # Valid data types of the timestamp field are: `TIMESTAMP`.
- # Datastore entity will be scanned if the timestamp property does not
- # exist or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
- # bucket.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
- "fileTypes": [ # List of file type groups to include in the scan.
- # If empty, all files are scanned and available data format processors
- # are applied. In addition, the binary content of the selected files
- # is always scanned as well.
- # Images are scanned only as binary if the specified region
- # does not support image inspection and no file_types were specified.
- # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
- "A String",
- ],
- "sampleMethod": "A String",
- "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
- # number of bytes scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
- # than this value then the rest of the bytes are omitted. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "fileSet": { # Set of files to scan. # The set of one or more files to scan.
- "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
- # `regex_file_set` must be set.
- # expressions are used to allow fine-grained control over which files in the
- # bucket to include.
- #
- # Included files are those that match at least one item in `include_regex` and
- # do not match any items in `exclude_regex`. Note that a file that matches
- # items from both lists will _not_ be included. For a match to occur, the
- # entire file path (i.e., everything in the url after the bucket name) must
- # match the regular expression.
- #
- # For example, given the input `{bucket_name: "mybucket", include_regex:
- # ["directory1/.*"], exclude_regex:
- # ["directory1/excluded.*"]}`:
- #
- # * `gs://mybucket/directory1/myfile` will be included
- # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
- # across `/`)
- # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
- # full path doesn't match any items in `include_regex`)
- # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
- # matches an item in `exclude_regex`)
- #
- # If `include_regex` is left empty, it will match all files by default
- # (this is equivalent to setting `include_regex: [".*"]`).
- #
- # Some other common use cases:
- #
- # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
- # files in `mybucket` except for .pdf files
- # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
- # include all files directly under `gs://mybucket/directory/`, without matching
- # across `/`
- "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
- # the bucket that match at least one of these regular expressions will be
- # excluded from the scan.
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
- "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
- # the bucket that match at least one of these regular expressions will be
- # included in the set of files, except for those that also match an item in
- # `exclude_regex`. Leaving this field empty will match all files by default
- # (this is equivalent to including `.*` in the list).
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- },
- "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
- #
- # If the url ends in a trailing slash, the bucket or directory represented
- # by the url will be scanned non-recursively (content in sub-directories
- # will not be scanned). This means that `gs://mybucket/` is equivalent to
- # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
- # `gs://mybucket/directory/*`.
- #
- # Exactly one of `url` or `regex_file_set` must be set.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
- "sampleMethod": "A String",
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "datasetId": "A String", # Dataset ID of the table.
- "tableId": "A String", # Name of the table.
- },
- "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
- # `actions.saveFindings.outputConfig.table` is specified, the values of
- # columns specified here are available in the output table under
- # `location.content_locations.record_location.record_key.id_values`. Nested
- # fields such as `person.birthdate.year` are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- },
"hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
# Early access feature is in a pre-release state and might change or have
# limited support. For more information, see
@@ -3224,46 +3090,180 @@
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ },
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
+ # bucket.
+ "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
+ # than this value then the rest of the bytes are omitted. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ "fileSet": { # Set of files to scan. # The set of one or more files to scan.
+ "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
+ # `regex_file_set` must be set.
+ # expressions are used to allow fine-grained control over which files in the
+ # bucket to include.
+ #
+ # Included files are those that match at least one item in `include_regex` and
+ # do not match any items in `exclude_regex`. Note that a file that matches
+ # items from both lists will _not_ be included. For a match to occur, the
+ # entire file path (i.e., everything in the url after the bucket name) must
+ # match the regular expression.
+ #
+ # For example, given the input `{bucket_name: "mybucket", include_regex:
+ # ["directory1/.*"], exclude_regex:
+ # ["directory1/excluded.*"]}`:
+ #
+ # * `gs://mybucket/directory1/myfile` will be included
+ # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
+ # across `/`)
+ # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
+ # full path doesn't match any items in `include_regex`)
+ # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
+ # matches an item in `exclude_regex`)
+ #
+ # If `include_regex` is left empty, it will match all files by default
+ # (this is equivalent to setting `include_regex: [".*"]`).
+ #
+ # Some other common use cases:
+ #
+ # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
+ # files in `mybucket` except for .pdf files
+ # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
+ # include all files directly under `gs://mybucket/directory/`, without matching
+ # across `/`
+ "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
+ "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # included in the set of files, except for those that also match an item in
+ # `exclude_regex`. Leaving this field empty will match all files by default
+ # (this is equivalent to including `.*` in the list).
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # excluded from the scan.
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ },
+ "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ #
+ # If the url ends in a trailing slash, the bucket or directory represented
+ # by the url will be scanned non-recursively (content in sub-directories
+ # will not be scanned). This means that `gs://mybucket/` is equivalent to
+ # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
+ # `gs://mybucket/directory/*`.
+ #
+ # Exactly one of `url` or `regex_file_set` must be set.
+ },
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
+ "fileTypes": [ # List of file type groups to include in the scan.
+ # If empty, all files are scanned and available data format processors
+ # are applied. In addition, the binary content of the selected files
+ # is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
+ "A String",
+ ],
+ "sampleMethod": "A String",
+ "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
+ # number of bytes scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "datasetId": "A String", # Dataset ID of the table.
+ "tableId": "A String", # Name of the table.
+ },
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "sampleMethod": "A String",
+ },
},
"inspectTemplateName": "A String", # If provided, will be used as the default for all values in InspectConfig.
# `inspect_config` will be merged into the values persisted as part of the
# template.
},
"status": "A String", # Required. A status for this trigger.
- "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
- "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
- # triggeredJob is created, for example
- # `projects/dlp-test-project/jobTriggers/53234423`.
- "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
- # errors may result in the JobTrigger automatically being paused.
- # Will return the last 100 errors. Whenever the JobTrigger is modified
- # this list will be cleared.
- { # Details information about an error encountered during job execution or
- # the results of an unsuccessful activation of the JobTrigger.
- "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
- # different programming environments, including REST APIs and RPC APIs. It is
- # used by [gRPC](https://github.com/grpc). Each `Status` message contains
- # three pieces of data: error code, error message, and error details.
- #
- # You can find out more about this error model and how to work with it in the
- # [API Design Guide](https://cloud.google.com/apis/design/errors).
- "code": 42, # The status code, which should be an enum value of google.rpc.Code.
- "message": "A String", # A developer-facing error message, which should be in English. Any
- # user-facing error message should be localized and sent in the
- # google.rpc.Status.details field, or localized by the client.
- "details": [ # A list of messages that carry the error details. There is a common set of
- # message types for APIs to use.
- {
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- ],
- },
- "timestamps": [ # The times the error occurred.
- "A String",
- ],
- },
- ],
- "description": "A String", # User provided description (max 256 chars)
}</pre>
</div>
@@ -3313,6 +3313,40 @@
{ # Contains a configuration to make dlp api calls on a repeating basis.
# See https://cloud.google.com/dlp/docs/concepts-job-triggers to learn more.
+ "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
+ "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
+ # triggeredJob is created, for example
+ # `projects/dlp-test-project/jobTriggers/53234423`.
+ "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
+ # errors may result in the JobTrigger automatically being paused.
+ # Will return the last 100 errors. Whenever the JobTrigger is modified
+ # this list will be cleared.
+ { # Details information about an error encountered during job execution or
+ # the results of an unsuccessful activation of the JobTrigger.
+ "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
+ # different programming environments, including REST APIs and RPC APIs. It is
+ # used by [gRPC](https://github.com/grpc). Each `Status` message contains
+ # three pieces of data: error code, error message, and error details.
+ #
+ # You can find out more about this error model and how to work with it in the
+ # [API Design Guide](https://cloud.google.com/apis/design/errors).
+ "message": "A String", # A developer-facing error message, which should be in English. Any
+ # user-facing error message should be localized and sent in the
+ # google.rpc.Status.details field, or localized by the client.
+ "details": [ # A list of messages that carry the error details. There is a common set of
+ # message types for APIs to use.
+ {
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ ],
+ "code": 42, # The status code, which should be an enum value of google.rpc.Code.
+ },
+ "timestamps": [ # The times the error occurred.
+ "A String",
+ ],
+ },
+ ],
+ "description": "A String", # User provided description (max 256 chars)
"triggers": [ # A list of triggers which will be OR'ed together. Only one in the list
# needs to trigger for a job to be started. The list may contain only
# a single Schedule trigger and must have at least one object.
@@ -3342,6 +3376,164 @@
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
+ "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
+ # POSSIBLE.
+ # See https://cloud.google.com/dlp/docs/likelihood to learn more.
+ "contentOptions": [ # List of options defining data content to scan.
+ # If empty, text, images, and other content will be included.
+ "A String",
+ ],
+ "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
+ # InfoType values returned by ListInfoTypes or listed at
+ # https://cloud.google.com/dlp/docs/infotypes-reference.
+ #
+ # When no InfoTypes or CustomInfoTypes are specified in a request, the
+ # system may automatically choose what detectors to run. By default this may
+ # be all types, but may change over time as detectors are updated.
+ #
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
+ "customInfoTypes": [ # CustomInfoTypes provided by the user. See
+ # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
+ { # Custom information type provided by the user. Used to find domain-specific
+ # sensitive information configurable to the data in question.
+ "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
+ # support reversing.
+ # such as
+ # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
+ # These types of transformations are
+ # those that perform pseudonymization, thereby producing a "surrogate" as
+ # output. This should be used in conjunction with a field on the
+ # transformation such as `surrogate_info_type`. This CustomInfoType does
+ # not support the use of `detection_rules`.
+ },
+ "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
+ # altered by a detection rule if the finding meets the criteria specified by
+ # the rule. Defaults to `VERY_LIKELY` if not specified.
+ "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
+ # infoType, when the name matches one of existing infoTypes and that infoType
+ # is specified in `InspectContent.info_types` field. Specifying the latter
+ # adds findings to the one detected by the system. If built-in info type is
+ # not specified in `InspectContent.info_types` list then the name is treated
+ # as a custom info type.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
+ # Rules are applied in order that they are specified. Not supported for the
+ # `surrogate_type` CustomInfoType.
+ { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
+ # `CustomInfoType` to alter behavior under certain circumstances, depending
+ # on the specific details of the rule. Not supported for the `surrogate_type`
+ # custom infoType.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
+ },
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
+ },
+ },
+ },
+ ],
+ "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
+ # to be returned. It still can be used for rules matching.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
+ # `InspectDataSource`. Not currently supported in `InspectContent`.
+ "name": "A String", # Resource name of the requested `StoredInfoType`, for example
+ # `organizations/433245324/storedInfoTypes/432452342` or
+ # `projects/project-id/storedInfoTypes/432452342`.
+ "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
+ # inspection was created. Output-only field, populated by the system.
+ },
+ },
+ ],
"includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
# included in the response; see Finding.quote.
"ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
@@ -3493,164 +3685,6 @@
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
},
- "contentOptions": [ # List of options defining data content to scan.
- # If empty, text, images, and other content will be included.
- "A String",
- ],
- "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
- # InfoType values returned by ListInfoTypes or listed at
- # https://cloud.google.com/dlp/docs/infotypes-reference.
- #
- # When no InfoTypes or CustomInfoTypes are specified in a request, the
- # system may automatically choose what detectors to run. By default this may
- # be all types, but may change over time as detectors are updated.
- #
- # If you need precise control and predictability as to what detectors are
- # run you should specify specific InfoTypes listed in the reference,
- # otherwise a default list will be used, which may change over time.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
- # POSSIBLE.
- # See https://cloud.google.com/dlp/docs/likelihood to learn more.
- "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "customInfoTypes": [ # CustomInfoTypes provided by the user. See
- # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
- { # Custom information type provided by the user. Used to find domain-specific
- # sensitive information configurable to the data in question.
- "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
- # altered by a detection rule if the finding meets the criteria specified by
- # the rule. Defaults to `VERY_LIKELY` if not specified.
- "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
- # infoType, when the name matches one of existing infoTypes and that infoType
- # is specified in `InspectContent.info_types` field. Specifying the latter
- # adds findings to the one detected by the system. If built-in info type is
- # not specified in `InspectContent.info_types` list then the name is treated
- # as a custom info type.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
- # Rules are applied in order that they are specified. Not supported for the
- # `surrogate_type` CustomInfoType.
- { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
- # `CustomInfoType` to alter behavior under certain circumstances, depending
- # on the specific details of the rule. Not supported for the `surrogate_type`
- # custom infoType.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- },
- ],
- "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
- # to be returned. It still can be used for rules matching.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
- # `InspectDataSource`. Not currently supported in `InspectContent`.
- "name": "A String", # Resource name of the requested `StoredInfoType`, for example
- # `organizations/433245324/storedInfoTypes/432452342` or
- # `projects/project-id/storedInfoTypes/432452342`.
- "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
- # inspection was created. Output-only field, populated by the system.
- },
- "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
- # support reversing.
- # such as
- # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
- # These types of transformations are
- # those that perform pseudonymization, thereby producing a "surrogate" as
- # output. This should be used in conjunction with a field on the
- # transformation such as `surrogate_info_type`. This CustomInfoType does
- # not support the use of `detection_rules`.
- },
- },
- ],
},
"actions": [ # Actions to execute at the completion of the job.
{ # A task to execute on the completion of a job.
@@ -3675,16 +3709,6 @@
# specified.
# Compatible with: Inspect, Risk
"outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
- "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
- # used for Inspect and must be unspecified for Risk jobs. Columns are derived
- # from the `Finding` object. If appending to an existing table, any columns
- # from the predefined schema that are missing will be added. No columns in
- # the existing table will be deleted.
- #
- # If unspecified, then all available columns will be used for a new table or
- # an (existing) table with no schema, and no changes will be made to an
- # existing table that has a schema.
- # Only for use with external storage.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -3708,6 +3732,16 @@
"datasetId": "A String", # Dataset ID of the table.
"tableId": "A String", # Name of the table.
},
+ "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
+ # used for Inspect and must be unspecified for Risk jobs. Columns are derived
+ # from the `Finding` object. If appending to an existing table, any columns
+ # from the predefined schema that are missing will be added. No columns in
+ # the existing table will be deleted.
+ #
+ # If unspecified, then all available columns will be used for a new table or
+ # an (existing) table with no schema, and no changes will be made to an
+ # existing table that has a schema.
+ # Only for use with external storage.
},
},
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
@@ -3739,174 +3773,6 @@
},
],
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- "projectId": "A String", # The ID of the project to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore and BigQuery.
- #
- # For BigQuery:
- # Required to filter out rows based on the given start and
- # end times. If not specified and the table was modified between the given
- # start and end times, the entire table will be scanned.
- # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
- # `TIMESTAMP`, or `DATETIME` BigQuery column.
- #
- # For Datastore.
- # Valid data types of the timestamp field are: `TIMESTAMP`.
- # Datastore entity will be scanned if the timestamp property does not
- # exist or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
- # bucket.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
- "fileTypes": [ # List of file type groups to include in the scan.
- # If empty, all files are scanned and available data format processors
- # are applied. In addition, the binary content of the selected files
- # is always scanned as well.
- # Images are scanned only as binary if the specified region
- # does not support image inspection and no file_types were specified.
- # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
- "A String",
- ],
- "sampleMethod": "A String",
- "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
- # number of bytes scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
- # than this value then the rest of the bytes are omitted. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "fileSet": { # Set of files to scan. # The set of one or more files to scan.
- "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
- # `regex_file_set` must be set.
- # expressions are used to allow fine-grained control over which files in the
- # bucket to include.
- #
- # Included files are those that match at least one item in `include_regex` and
- # do not match any items in `exclude_regex`. Note that a file that matches
- # items from both lists will _not_ be included. For a match to occur, the
- # entire file path (i.e., everything in the url after the bucket name) must
- # match the regular expression.
- #
- # For example, given the input `{bucket_name: "mybucket", include_regex:
- # ["directory1/.*"], exclude_regex:
- # ["directory1/excluded.*"]}`:
- #
- # * `gs://mybucket/directory1/myfile` will be included
- # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
- # across `/`)
- # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
- # full path doesn't match any items in `include_regex`)
- # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
- # matches an item in `exclude_regex`)
- #
- # If `include_regex` is left empty, it will match all files by default
- # (this is equivalent to setting `include_regex: [".*"]`).
- #
- # Some other common use cases:
- #
- # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
- # files in `mybucket` except for .pdf files
- # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
- # include all files directly under `gs://mybucket/directory/`, without matching
- # across `/`
- "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
- # the bucket that match at least one of these regular expressions will be
- # excluded from the scan.
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
- "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
- # the bucket that match at least one of these regular expressions will be
- # included in the set of files, except for those that also match an item in
- # `exclude_regex`. Leaving this field empty will match all files by default
- # (this is equivalent to including `.*` in the list).
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- },
- "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
- #
- # If the url ends in a trailing slash, the bucket or directory represented
- # by the url will be scanned non-recursively (content in sub-directories
- # will not be scanned). This means that `gs://mybucket/` is equivalent to
- # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
- # `gs://mybucket/directory/*`.
- #
- # Exactly one of `url` or `regex_file_set` must be set.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
- "sampleMethod": "A String",
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "datasetId": "A String", # Dataset ID of the table.
- "tableId": "A String", # Name of the table.
- },
- "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
- # `actions.saveFindings.outputConfig.table` is specified, the values of
- # columns specified here are available in the output table under
- # `location.content_locations.record_location.record_key.id_values`. Nested
- # fields such as `person.birthdate.year` are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- },
"hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
# Early access feature is in a pre-release state and might change or have
# limited support. For more information, see
@@ -3951,46 +3817,180 @@
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ },
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
+ # bucket.
+ "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
+ # than this value then the rest of the bytes are omitted. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ "fileSet": { # Set of files to scan. # The set of one or more files to scan.
+ "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
+ # `regex_file_set` must be set.
+ # expressions are used to allow fine-grained control over which files in the
+ # bucket to include.
+ #
+ # Included files are those that match at least one item in `include_regex` and
+ # do not match any items in `exclude_regex`. Note that a file that matches
+ # items from both lists will _not_ be included. For a match to occur, the
+ # entire file path (i.e., everything in the url after the bucket name) must
+ # match the regular expression.
+ #
+ # For example, given the input `{bucket_name: "mybucket", include_regex:
+ # ["directory1/.*"], exclude_regex:
+ # ["directory1/excluded.*"]}`:
+ #
+ # * `gs://mybucket/directory1/myfile` will be included
+ # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
+ # across `/`)
+ # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
+ # full path doesn't match any items in `include_regex`)
+ # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
+ # matches an item in `exclude_regex`)
+ #
+ # If `include_regex` is left empty, it will match all files by default
+ # (this is equivalent to setting `include_regex: [".*"]`).
+ #
+ # Some other common use cases:
+ #
+ # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
+ # files in `mybucket` except for .pdf files
+ # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
+ # include all files directly under `gs://mybucket/directory/`, without matching
+ # across `/`
+ "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
+ "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # included in the set of files, except for those that also match an item in
+ # `exclude_regex`. Leaving this field empty will match all files by default
+ # (this is equivalent to including `.*` in the list).
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # excluded from the scan.
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ },
+ "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ #
+ # If the url ends in a trailing slash, the bucket or directory represented
+ # by the url will be scanned non-recursively (content in sub-directories
+ # will not be scanned). This means that `gs://mybucket/` is equivalent to
+ # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
+ # `gs://mybucket/directory/*`.
+ #
+ # Exactly one of `url` or `regex_file_set` must be set.
+ },
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
+ "fileTypes": [ # List of file type groups to include in the scan.
+ # If empty, all files are scanned and available data format processors
+ # are applied. In addition, the binary content of the selected files
+ # is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
+ "A String",
+ ],
+ "sampleMethod": "A String",
+ "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
+ # number of bytes scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "datasetId": "A String", # Dataset ID of the table.
+ "tableId": "A String", # Name of the table.
+ },
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "sampleMethod": "A String",
+ },
},
"inspectTemplateName": "A String", # If provided, will be used as the default for all values in InspectConfig.
# `inspect_config` will be merged into the values persisted as part of the
# template.
},
"status": "A String", # Required. A status for this trigger.
- "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
- "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
- # triggeredJob is created, for example
- # `projects/dlp-test-project/jobTriggers/53234423`.
- "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
- # errors may result in the JobTrigger automatically being paused.
- # Will return the last 100 errors. Whenever the JobTrigger is modified
- # this list will be cleared.
- { # Details information about an error encountered during job execution or
- # the results of an unsuccessful activation of the JobTrigger.
- "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
- # different programming environments, including REST APIs and RPC APIs. It is
- # used by [gRPC](https://github.com/grpc). Each `Status` message contains
- # three pieces of data: error code, error message, and error details.
- #
- # You can find out more about this error model and how to work with it in the
- # [API Design Guide](https://cloud.google.com/apis/design/errors).
- "code": 42, # The status code, which should be an enum value of google.rpc.Code.
- "message": "A String", # A developer-facing error message, which should be in English. Any
- # user-facing error message should be localized and sent in the
- # google.rpc.Status.details field, or localized by the client.
- "details": [ # A list of messages that carry the error details. There is a common set of
- # message types for APIs to use.
- {
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- ],
- },
- "timestamps": [ # The times the error occurred.
- "A String",
- ],
- },
- ],
- "description": "A String", # User provided description (max 256 chars)
}</pre>
</div>
@@ -4058,6 +4058,40 @@
"jobTriggers": [ # List of triggeredJobs, up to page_size in ListJobTriggersRequest.
{ # Contains a configuration to make dlp api calls on a repeating basis.
# See https://cloud.google.com/dlp/docs/concepts-job-triggers to learn more.
+ "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
+ "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
+ # triggeredJob is created, for example
+ # `projects/dlp-test-project/jobTriggers/53234423`.
+ "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
+ # errors may result in the JobTrigger automatically being paused.
+ # Will return the last 100 errors. Whenever the JobTrigger is modified
+ # this list will be cleared.
+ { # Details information about an error encountered during job execution or
+ # the results of an unsuccessful activation of the JobTrigger.
+ "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
+ # different programming environments, including REST APIs and RPC APIs. It is
+ # used by [gRPC](https://github.com/grpc). Each `Status` message contains
+ # three pieces of data: error code, error message, and error details.
+ #
+ # You can find out more about this error model and how to work with it in the
+ # [API Design Guide](https://cloud.google.com/apis/design/errors).
+ "message": "A String", # A developer-facing error message, which should be in English. Any
+ # user-facing error message should be localized and sent in the
+ # google.rpc.Status.details field, or localized by the client.
+ "details": [ # A list of messages that carry the error details. There is a common set of
+ # message types for APIs to use.
+ {
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ ],
+ "code": 42, # The status code, which should be an enum value of google.rpc.Code.
+ },
+ "timestamps": [ # The times the error occurred.
+ "A String",
+ ],
+ },
+ ],
+ "description": "A String", # User provided description (max 256 chars)
"triggers": [ # A list of triggers which will be OR'ed together. Only one in the list
# needs to trigger for a job to be started. The list may contain only
# a single Schedule trigger and must have at least one object.
@@ -4087,6 +4121,164 @@
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
+ "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
+ # POSSIBLE.
+ # See https://cloud.google.com/dlp/docs/likelihood to learn more.
+ "contentOptions": [ # List of options defining data content to scan.
+ # If empty, text, images, and other content will be included.
+ "A String",
+ ],
+ "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
+ # InfoType values returned by ListInfoTypes or listed at
+ # https://cloud.google.com/dlp/docs/infotypes-reference.
+ #
+ # When no InfoTypes or CustomInfoTypes are specified in a request, the
+ # system may automatically choose what detectors to run. By default this may
+ # be all types, but may change over time as detectors are updated.
+ #
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
+ "customInfoTypes": [ # CustomInfoTypes provided by the user. See
+ # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
+ { # Custom information type provided by the user. Used to find domain-specific
+ # sensitive information configurable to the data in question.
+ "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
+ # support reversing.
+ # such as
+ # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
+ # These types of transformations are
+ # those that perform pseudonymization, thereby producing a "surrogate" as
+ # output. This should be used in conjunction with a field on the
+ # transformation such as `surrogate_info_type`. This CustomInfoType does
+ # not support the use of `detection_rules`.
+ },
+ "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
+ # altered by a detection rule if the finding meets the criteria specified by
+ # the rule. Defaults to `VERY_LIKELY` if not specified.
+ "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
+ # infoType, when the name matches one of existing infoTypes and that infoType
+ # is specified in `InspectContent.info_types` field. Specifying the latter
+ # adds findings to the one detected by the system. If built-in info type is
+ # not specified in `InspectContent.info_types` list then the name is treated
+ # as a custom info type.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
+ # Rules are applied in order that they are specified. Not supported for the
+ # `surrogate_type` CustomInfoType.
+ { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
+ # `CustomInfoType` to alter behavior under certain circumstances, depending
+ # on the specific details of the rule. Not supported for the `surrogate_type`
+ # custom infoType.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
+ },
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
+ },
+ },
+ },
+ ],
+ "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
+ # to be returned. It still can be used for rules matching.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
+ # `InspectDataSource`. Not currently supported in `InspectContent`.
+ "name": "A String", # Resource name of the requested `StoredInfoType`, for example
+ # `organizations/433245324/storedInfoTypes/432452342` or
+ # `projects/project-id/storedInfoTypes/432452342`.
+ "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
+ # inspection was created. Output-only field, populated by the system.
+ },
+ },
+ ],
"includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
# included in the response; see Finding.quote.
"ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
@@ -4238,164 +4430,6 @@
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
},
- "contentOptions": [ # List of options defining data content to scan.
- # If empty, text, images, and other content will be included.
- "A String",
- ],
- "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
- # InfoType values returned by ListInfoTypes or listed at
- # https://cloud.google.com/dlp/docs/infotypes-reference.
- #
- # When no InfoTypes or CustomInfoTypes are specified in a request, the
- # system may automatically choose what detectors to run. By default this may
- # be all types, but may change over time as detectors are updated.
- #
- # If you need precise control and predictability as to what detectors are
- # run you should specify specific InfoTypes listed in the reference,
- # otherwise a default list will be used, which may change over time.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
- # POSSIBLE.
- # See https://cloud.google.com/dlp/docs/likelihood to learn more.
- "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "customInfoTypes": [ # CustomInfoTypes provided by the user. See
- # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
- { # Custom information type provided by the user. Used to find domain-specific
- # sensitive information configurable to the data in question.
- "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
- # altered by a detection rule if the finding meets the criteria specified by
- # the rule. Defaults to `VERY_LIKELY` if not specified.
- "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
- # infoType, when the name matches one of existing infoTypes and that infoType
- # is specified in `InspectContent.info_types` field. Specifying the latter
- # adds findings to the one detected by the system. If built-in info type is
- # not specified in `InspectContent.info_types` list then the name is treated
- # as a custom info type.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
- # Rules are applied in order that they are specified. Not supported for the
- # `surrogate_type` CustomInfoType.
- { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
- # `CustomInfoType` to alter behavior under certain circumstances, depending
- # on the specific details of the rule. Not supported for the `surrogate_type`
- # custom infoType.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- },
- ],
- "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
- # to be returned. It still can be used for rules matching.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
- # `InspectDataSource`. Not currently supported in `InspectContent`.
- "name": "A String", # Resource name of the requested `StoredInfoType`, for example
- # `organizations/433245324/storedInfoTypes/432452342` or
- # `projects/project-id/storedInfoTypes/432452342`.
- "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
- # inspection was created. Output-only field, populated by the system.
- },
- "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
- # support reversing.
- # such as
- # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
- # These types of transformations are
- # those that perform pseudonymization, thereby producing a "surrogate" as
- # output. This should be used in conjunction with a field on the
- # transformation such as `surrogate_info_type`. This CustomInfoType does
- # not support the use of `detection_rules`.
- },
- },
- ],
},
"actions": [ # Actions to execute at the completion of the job.
{ # A task to execute on the completion of a job.
@@ -4420,16 +4454,6 @@
# specified.
# Compatible with: Inspect, Risk
"outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
- "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
- # used for Inspect and must be unspecified for Risk jobs. Columns are derived
- # from the `Finding` object. If appending to an existing table, any columns
- # from the predefined schema that are missing will be added. No columns in
- # the existing table will be deleted.
- #
- # If unspecified, then all available columns will be used for a new table or
- # an (existing) table with no schema, and no changes will be made to an
- # existing table that has a schema.
- # Only for use with external storage.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -4453,6 +4477,16 @@
"datasetId": "A String", # Dataset ID of the table.
"tableId": "A String", # Name of the table.
},
+ "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
+ # used for Inspect and must be unspecified for Risk jobs. Columns are derived
+ # from the `Finding` object. If appending to an existing table, any columns
+ # from the predefined schema that are missing will be added. No columns in
+ # the existing table will be deleted.
+ #
+ # If unspecified, then all available columns will be used for a new table or
+ # an (existing) table with no schema, and no changes will be made to an
+ # existing table that has a schema.
+ # Only for use with external storage.
},
},
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
@@ -4484,174 +4518,6 @@
},
],
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- "projectId": "A String", # The ID of the project to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore and BigQuery.
- #
- # For BigQuery:
- # Required to filter out rows based on the given start and
- # end times. If not specified and the table was modified between the given
- # start and end times, the entire table will be scanned.
- # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
- # `TIMESTAMP`, or `DATETIME` BigQuery column.
- #
- # For Datastore.
- # Valid data types of the timestamp field are: `TIMESTAMP`.
- # Datastore entity will be scanned if the timestamp property does not
- # exist or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
- # bucket.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
- "fileTypes": [ # List of file type groups to include in the scan.
- # If empty, all files are scanned and available data format processors
- # are applied. In addition, the binary content of the selected files
- # is always scanned as well.
- # Images are scanned only as binary if the specified region
- # does not support image inspection and no file_types were specified.
- # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
- "A String",
- ],
- "sampleMethod": "A String",
- "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
- # number of bytes scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
- # than this value then the rest of the bytes are omitted. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "fileSet": { # Set of files to scan. # The set of one or more files to scan.
- "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
- # `regex_file_set` must be set.
- # expressions are used to allow fine-grained control over which files in the
- # bucket to include.
- #
- # Included files are those that match at least one item in `include_regex` and
- # do not match any items in `exclude_regex`. Note that a file that matches
- # items from both lists will _not_ be included. For a match to occur, the
- # entire file path (i.e., everything in the url after the bucket name) must
- # match the regular expression.
- #
- # For example, given the input `{bucket_name: "mybucket", include_regex:
- # ["directory1/.*"], exclude_regex:
- # ["directory1/excluded.*"]}`:
- #
- # * `gs://mybucket/directory1/myfile` will be included
- # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
- # across `/`)
- # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
- # full path doesn't match any items in `include_regex`)
- # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
- # matches an item in `exclude_regex`)
- #
- # If `include_regex` is left empty, it will match all files by default
- # (this is equivalent to setting `include_regex: [".*"]`).
- #
- # Some other common use cases:
- #
- # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
- # files in `mybucket` except for .pdf files
- # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
- # include all files directly under `gs://mybucket/directory/`, without matching
- # across `/`
- "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
- # the bucket that match at least one of these regular expressions will be
- # excluded from the scan.
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
- "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
- # the bucket that match at least one of these regular expressions will be
- # included in the set of files, except for those that also match an item in
- # `exclude_regex`. Leaving this field empty will match all files by default
- # (this is equivalent to including `.*` in the list).
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- },
- "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
- #
- # If the url ends in a trailing slash, the bucket or directory represented
- # by the url will be scanned non-recursively (content in sub-directories
- # will not be scanned). This means that `gs://mybucket/` is equivalent to
- # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
- # `gs://mybucket/directory/*`.
- #
- # Exactly one of `url` or `regex_file_set` must be set.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
- "sampleMethod": "A String",
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "datasetId": "A String", # Dataset ID of the table.
- "tableId": "A String", # Name of the table.
- },
- "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
- # `actions.saveFindings.outputConfig.table` is specified, the values of
- # columns specified here are available in the output table under
- # `location.content_locations.record_location.record_key.id_values`. Nested
- # fields such as `person.birthdate.year` are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- },
"hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
# Early access feature is in a pre-release state and might change or have
# limited support. For more information, see
@@ -4696,46 +4562,180 @@
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ },
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
+ # bucket.
+ "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
+ # than this value then the rest of the bytes are omitted. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ "fileSet": { # Set of files to scan. # The set of one or more files to scan.
+ "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
+ # `regex_file_set` must be set.
+ # expressions are used to allow fine-grained control over which files in the
+ # bucket to include.
+ #
+ # Included files are those that match at least one item in `include_regex` and
+ # do not match any items in `exclude_regex`. Note that a file that matches
+ # items from both lists will _not_ be included. For a match to occur, the
+ # entire file path (i.e., everything in the url after the bucket name) must
+ # match the regular expression.
+ #
+ # For example, given the input `{bucket_name: "mybucket", include_regex:
+ # ["directory1/.*"], exclude_regex:
+ # ["directory1/excluded.*"]}`:
+ #
+ # * `gs://mybucket/directory1/myfile` will be included
+ # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
+ # across `/`)
+ # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
+ # full path doesn't match any items in `include_regex`)
+ # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
+ # matches an item in `exclude_regex`)
+ #
+ # If `include_regex` is left empty, it will match all files by default
+ # (this is equivalent to setting `include_regex: [".*"]`).
+ #
+ # Some other common use cases:
+ #
+ # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
+ # files in `mybucket` except for .pdf files
+ # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
+ # include all files directly under `gs://mybucket/directory/`, without matching
+ # across `/`
+ "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
+ "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # included in the set of files, except for those that also match an item in
+ # `exclude_regex`. Leaving this field empty will match all files by default
+ # (this is equivalent to including `.*` in the list).
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # excluded from the scan.
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ },
+ "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ #
+ # If the url ends in a trailing slash, the bucket or directory represented
+ # by the url will be scanned non-recursively (content in sub-directories
+ # will not be scanned). This means that `gs://mybucket/` is equivalent to
+ # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
+ # `gs://mybucket/directory/*`.
+ #
+ # Exactly one of `url` or `regex_file_set` must be set.
+ },
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
+ "fileTypes": [ # List of file type groups to include in the scan.
+ # If empty, all files are scanned and available data format processors
+ # are applied. In addition, the binary content of the selected files
+ # is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
+ "A String",
+ ],
+ "sampleMethod": "A String",
+ "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
+ # number of bytes scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "datasetId": "A String", # Dataset ID of the table.
+ "tableId": "A String", # Name of the table.
+ },
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "sampleMethod": "A String",
+ },
},
"inspectTemplateName": "A String", # If provided, will be used as the default for all values in InspectConfig.
# `inspect_config` will be merged into the values persisted as part of the
# template.
},
"status": "A String", # Required. A status for this trigger.
- "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
- "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
- # triggeredJob is created, for example
- # `projects/dlp-test-project/jobTriggers/53234423`.
- "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
- # errors may result in the JobTrigger automatically being paused.
- # Will return the last 100 errors. Whenever the JobTrigger is modified
- # this list will be cleared.
- { # Details information about an error encountered during job execution or
- # the results of an unsuccessful activation of the JobTrigger.
- "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
- # different programming environments, including REST APIs and RPC APIs. It is
- # used by [gRPC](https://github.com/grpc). Each `Status` message contains
- # three pieces of data: error code, error message, and error details.
- #
- # You can find out more about this error model and how to work with it in the
- # [API Design Guide](https://cloud.google.com/apis/design/errors).
- "code": 42, # The status code, which should be an enum value of google.rpc.Code.
- "message": "A String", # A developer-facing error message, which should be in English. Any
- # user-facing error message should be localized and sent in the
- # google.rpc.Status.details field, or localized by the client.
- "details": [ # A list of messages that carry the error details. There is a common set of
- # message types for APIs to use.
- {
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- ],
- },
- "timestamps": [ # The times the error occurred.
- "A String",
- ],
- },
- ],
- "description": "A String", # User provided description (max 256 chars)
},
],
"nextPageToken": "A String", # If the next page is available then the next page token to be used
@@ -4769,9 +4769,42 @@
The object takes the form of:
{ # Request message for UpdateJobTrigger.
- "updateMask": "A String", # Mask to control which fields get updated.
"jobTrigger": { # Contains a configuration to make dlp api calls on a repeating basis. # New JobTrigger value.
# See https://cloud.google.com/dlp/docs/concepts-job-triggers to learn more.
+ "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
+ "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
+ # triggeredJob is created, for example
+ # `projects/dlp-test-project/jobTriggers/53234423`.
+ "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
+ # errors may result in the JobTrigger automatically being paused.
+ # Will return the last 100 errors. Whenever the JobTrigger is modified
+ # this list will be cleared.
+ { # Details information about an error encountered during job execution or
+ # the results of an unsuccessful activation of the JobTrigger.
+ "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
+ # different programming environments, including REST APIs and RPC APIs. It is
+ # used by [gRPC](https://github.com/grpc). Each `Status` message contains
+ # three pieces of data: error code, error message, and error details.
+ #
+ # You can find out more about this error model and how to work with it in the
+ # [API Design Guide](https://cloud.google.com/apis/design/errors).
+ "message": "A String", # A developer-facing error message, which should be in English. Any
+ # user-facing error message should be localized and sent in the
+ # google.rpc.Status.details field, or localized by the client.
+ "details": [ # A list of messages that carry the error details. There is a common set of
+ # message types for APIs to use.
+ {
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ ],
+ "code": 42, # The status code, which should be an enum value of google.rpc.Code.
+ },
+ "timestamps": [ # The times the error occurred.
+ "A String",
+ ],
+ },
+ ],
+ "description": "A String", # User provided description (max 256 chars)
"triggers": [ # A list of triggers which will be OR'ed together. Only one in the list
# needs to trigger for a job to be started. The list may contain only
# a single Schedule trigger and must have at least one object.
@@ -4801,6 +4834,164 @@
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
+ "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
+ # POSSIBLE.
+ # See https://cloud.google.com/dlp/docs/likelihood to learn more.
+ "contentOptions": [ # List of options defining data content to scan.
+ # If empty, text, images, and other content will be included.
+ "A String",
+ ],
+ "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
+ # InfoType values returned by ListInfoTypes or listed at
+ # https://cloud.google.com/dlp/docs/infotypes-reference.
+ #
+ # When no InfoTypes or CustomInfoTypes are specified in a request, the
+ # system may automatically choose what detectors to run. By default this may
+ # be all types, but may change over time as detectors are updated.
+ #
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
+ "customInfoTypes": [ # CustomInfoTypes provided by the user. See
+ # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
+ { # Custom information type provided by the user. Used to find domain-specific
+ # sensitive information configurable to the data in question.
+ "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
+ # support reversing.
+ # such as
+ # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
+ # These types of transformations are
+ # those that perform pseudonymization, thereby producing a "surrogate" as
+ # output. This should be used in conjunction with a field on the
+ # transformation such as `surrogate_info_type`. This CustomInfoType does
+ # not support the use of `detection_rules`.
+ },
+ "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
+ # altered by a detection rule if the finding meets the criteria specified by
+ # the rule. Defaults to `VERY_LIKELY` if not specified.
+ "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
+ # infoType, when the name matches one of existing infoTypes and that infoType
+ # is specified in `InspectContent.info_types` field. Specifying the latter
+ # adds findings to the one detected by the system. If built-in info type is
+ # not specified in `InspectContent.info_types` list then the name is treated
+ # as a custom info type.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
+ # Rules are applied in order that they are specified. Not supported for the
+ # `surrogate_type` CustomInfoType.
+ { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
+ # `CustomInfoType` to alter behavior under certain circumstances, depending
+ # on the specific details of the rule. Not supported for the `surrogate_type`
+ # custom infoType.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
+ },
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
+ },
+ },
+ },
+ ],
+ "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
+ # to be returned. It still can be used for rules matching.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
+ # `InspectDataSource`. Not currently supported in `InspectContent`.
+ "name": "A String", # Resource name of the requested `StoredInfoType`, for example
+ # `organizations/433245324/storedInfoTypes/432452342` or
+ # `projects/project-id/storedInfoTypes/432452342`.
+ "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
+ # inspection was created. Output-only field, populated by the system.
+ },
+ },
+ ],
"includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
# included in the response; see Finding.quote.
"ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
@@ -4952,164 +5143,6 @@
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
},
- "contentOptions": [ # List of options defining data content to scan.
- # If empty, text, images, and other content will be included.
- "A String",
- ],
- "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
- # InfoType values returned by ListInfoTypes or listed at
- # https://cloud.google.com/dlp/docs/infotypes-reference.
- #
- # When no InfoTypes or CustomInfoTypes are specified in a request, the
- # system may automatically choose what detectors to run. By default this may
- # be all types, but may change over time as detectors are updated.
- #
- # If you need precise control and predictability as to what detectors are
- # run you should specify specific InfoTypes listed in the reference,
- # otherwise a default list will be used, which may change over time.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
- # POSSIBLE.
- # See https://cloud.google.com/dlp/docs/likelihood to learn more.
- "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "customInfoTypes": [ # CustomInfoTypes provided by the user. See
- # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
- { # Custom information type provided by the user. Used to find domain-specific
- # sensitive information configurable to the data in question.
- "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
- # altered by a detection rule if the finding meets the criteria specified by
- # the rule. Defaults to `VERY_LIKELY` if not specified.
- "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
- # infoType, when the name matches one of existing infoTypes and that infoType
- # is specified in `InspectContent.info_types` field. Specifying the latter
- # adds findings to the one detected by the system. If built-in info type is
- # not specified in `InspectContent.info_types` list then the name is treated
- # as a custom info type.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
- # Rules are applied in order that they are specified. Not supported for the
- # `surrogate_type` CustomInfoType.
- { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
- # `CustomInfoType` to alter behavior under certain circumstances, depending
- # on the specific details of the rule. Not supported for the `surrogate_type`
- # custom infoType.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- },
- ],
- "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
- # to be returned. It still can be used for rules matching.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
- # `InspectDataSource`. Not currently supported in `InspectContent`.
- "name": "A String", # Resource name of the requested `StoredInfoType`, for example
- # `organizations/433245324/storedInfoTypes/432452342` or
- # `projects/project-id/storedInfoTypes/432452342`.
- "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
- # inspection was created. Output-only field, populated by the system.
- },
- "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
- # support reversing.
- # such as
- # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
- # These types of transformations are
- # those that perform pseudonymization, thereby producing a "surrogate" as
- # output. This should be used in conjunction with a field on the
- # transformation such as `surrogate_info_type`. This CustomInfoType does
- # not support the use of `detection_rules`.
- },
- },
- ],
},
"actions": [ # Actions to execute at the completion of the job.
{ # A task to execute on the completion of a job.
@@ -5134,16 +5167,6 @@
# specified.
# Compatible with: Inspect, Risk
"outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
- "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
- # used for Inspect and must be unspecified for Risk jobs. Columns are derived
- # from the `Finding` object. If appending to an existing table, any columns
- # from the predefined schema that are missing will be added. No columns in
- # the existing table will be deleted.
- #
- # If unspecified, then all available columns will be used for a new table or
- # an (existing) table with no schema, and no changes will be made to an
- # existing table that has a schema.
- # Only for use with external storage.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -5167,6 +5190,16 @@
"datasetId": "A String", # Dataset ID of the table.
"tableId": "A String", # Name of the table.
},
+ "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
+ # used for Inspect and must be unspecified for Risk jobs. Columns are derived
+ # from the `Finding` object. If appending to an existing table, any columns
+ # from the predefined schema that are missing will be added. No columns in
+ # the existing table will be deleted.
+ #
+ # If unspecified, then all available columns will be used for a new table or
+ # an (existing) table with no schema, and no changes will be made to an
+ # existing table that has a schema.
+ # Only for use with external storage.
},
},
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
@@ -5198,174 +5231,6 @@
},
],
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- "projectId": "A String", # The ID of the project to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore and BigQuery.
- #
- # For BigQuery:
- # Required to filter out rows based on the given start and
- # end times. If not specified and the table was modified between the given
- # start and end times, the entire table will be scanned.
- # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
- # `TIMESTAMP`, or `DATETIME` BigQuery column.
- #
- # For Datastore.
- # Valid data types of the timestamp field are: `TIMESTAMP`.
- # Datastore entity will be scanned if the timestamp property does not
- # exist or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
- # bucket.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
- "fileTypes": [ # List of file type groups to include in the scan.
- # If empty, all files are scanned and available data format processors
- # are applied. In addition, the binary content of the selected files
- # is always scanned as well.
- # Images are scanned only as binary if the specified region
- # does not support image inspection and no file_types were specified.
- # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
- "A String",
- ],
- "sampleMethod": "A String",
- "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
- # number of bytes scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
- # than this value then the rest of the bytes are omitted. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "fileSet": { # Set of files to scan. # The set of one or more files to scan.
- "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
- # `regex_file_set` must be set.
- # expressions are used to allow fine-grained control over which files in the
- # bucket to include.
- #
- # Included files are those that match at least one item in `include_regex` and
- # do not match any items in `exclude_regex`. Note that a file that matches
- # items from both lists will _not_ be included. For a match to occur, the
- # entire file path (i.e., everything in the url after the bucket name) must
- # match the regular expression.
- #
- # For example, given the input `{bucket_name: "mybucket", include_regex:
- # ["directory1/.*"], exclude_regex:
- # ["directory1/excluded.*"]}`:
- #
- # * `gs://mybucket/directory1/myfile` will be included
- # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
- # across `/`)
- # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
- # full path doesn't match any items in `include_regex`)
- # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
- # matches an item in `exclude_regex`)
- #
- # If `include_regex` is left empty, it will match all files by default
- # (this is equivalent to setting `include_regex: [".*"]`).
- #
- # Some other common use cases:
- #
- # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
- # files in `mybucket` except for .pdf files
- # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
- # include all files directly under `gs://mybucket/directory/`, without matching
- # across `/`
- "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
- # the bucket that match at least one of these regular expressions will be
- # excluded from the scan.
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
- "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
- # the bucket that match at least one of these regular expressions will be
- # included in the set of files, except for those that also match an item in
- # `exclude_regex`. Leaving this field empty will match all files by default
- # (this is equivalent to including `.*` in the list).
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- },
- "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
- #
- # If the url ends in a trailing slash, the bucket or directory represented
- # by the url will be scanned non-recursively (content in sub-directories
- # will not be scanned). This means that `gs://mybucket/` is equivalent to
- # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
- # `gs://mybucket/directory/*`.
- #
- # Exactly one of `url` or `regex_file_set` must be set.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
- "sampleMethod": "A String",
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "datasetId": "A String", # Dataset ID of the table.
- "tableId": "A String", # Name of the table.
- },
- "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
- # `actions.saveFindings.outputConfig.table` is specified, the values of
- # columns specified here are available in the output table under
- # `location.content_locations.record_location.record_key.id_values`. Nested
- # fields such as `person.birthdate.year` are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- },
"hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
# Early access feature is in a pre-release state and might change or have
# limited support. For more information, see
@@ -5410,47 +5275,182 @@
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ },
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
+ # bucket.
+ "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
+ # than this value then the rest of the bytes are omitted. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ "fileSet": { # Set of files to scan. # The set of one or more files to scan.
+ "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
+ # `regex_file_set` must be set.
+ # expressions are used to allow fine-grained control over which files in the
+ # bucket to include.
+ #
+ # Included files are those that match at least one item in `include_regex` and
+ # do not match any items in `exclude_regex`. Note that a file that matches
+ # items from both lists will _not_ be included. For a match to occur, the
+ # entire file path (i.e., everything in the url after the bucket name) must
+ # match the regular expression.
+ #
+ # For example, given the input `{bucket_name: "mybucket", include_regex:
+ # ["directory1/.*"], exclude_regex:
+ # ["directory1/excluded.*"]}`:
+ #
+ # * `gs://mybucket/directory1/myfile` will be included
+ # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
+ # across `/`)
+ # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
+ # full path doesn't match any items in `include_regex`)
+ # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
+ # matches an item in `exclude_regex`)
+ #
+ # If `include_regex` is left empty, it will match all files by default
+ # (this is equivalent to setting `include_regex: [".*"]`).
+ #
+ # Some other common use cases:
+ #
+ # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
+ # files in `mybucket` except for .pdf files
+ # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
+ # include all files directly under `gs://mybucket/directory/`, without matching
+ # across `/`
+ "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
+ "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # included in the set of files, except for those that also match an item in
+ # `exclude_regex`. Leaving this field empty will match all files by default
+ # (this is equivalent to including `.*` in the list).
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # excluded from the scan.
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ },
+ "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ #
+ # If the url ends in a trailing slash, the bucket or directory represented
+ # by the url will be scanned non-recursively (content in sub-directories
+ # will not be scanned). This means that `gs://mybucket/` is equivalent to
+ # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
+ # `gs://mybucket/directory/*`.
+ #
+ # Exactly one of `url` or `regex_file_set` must be set.
+ },
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
+ "fileTypes": [ # List of file type groups to include in the scan.
+ # If empty, all files are scanned and available data format processors
+ # are applied. In addition, the binary content of the selected files
+ # is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
+ "A String",
+ ],
+ "sampleMethod": "A String",
+ "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
+ # number of bytes scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "datasetId": "A String", # Dataset ID of the table.
+ "tableId": "A String", # Name of the table.
+ },
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "sampleMethod": "A String",
+ },
},
"inspectTemplateName": "A String", # If provided, will be used as the default for all values in InspectConfig.
# `inspect_config` will be merged into the values persisted as part of the
# template.
},
"status": "A String", # Required. A status for this trigger.
- "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
- "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
- # triggeredJob is created, for example
- # `projects/dlp-test-project/jobTriggers/53234423`.
- "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
- # errors may result in the JobTrigger automatically being paused.
- # Will return the last 100 errors. Whenever the JobTrigger is modified
- # this list will be cleared.
- { # Details information about an error encountered during job execution or
- # the results of an unsuccessful activation of the JobTrigger.
- "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
- # different programming environments, including REST APIs and RPC APIs. It is
- # used by [gRPC](https://github.com/grpc). Each `Status` message contains
- # three pieces of data: error code, error message, and error details.
- #
- # You can find out more about this error model and how to work with it in the
- # [API Design Guide](https://cloud.google.com/apis/design/errors).
- "code": 42, # The status code, which should be an enum value of google.rpc.Code.
- "message": "A String", # A developer-facing error message, which should be in English. Any
- # user-facing error message should be localized and sent in the
- # google.rpc.Status.details field, or localized by the client.
- "details": [ # A list of messages that carry the error details. There is a common set of
- # message types for APIs to use.
- {
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- ],
- },
- "timestamps": [ # The times the error occurred.
- "A String",
- ],
- },
- ],
- "description": "A String", # User provided description (max 256 chars)
},
+ "updateMask": "A String", # Mask to control which fields get updated.
}
x__xgafv: string, V1 error format.
@@ -5463,6 +5463,40 @@
{ # Contains a configuration to make dlp api calls on a repeating basis.
# See https://cloud.google.com/dlp/docs/concepts-job-triggers to learn more.
+ "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
+ "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
+ # triggeredJob is created, for example
+ # `projects/dlp-test-project/jobTriggers/53234423`.
+ "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
+ # errors may result in the JobTrigger automatically being paused.
+ # Will return the last 100 errors. Whenever the JobTrigger is modified
+ # this list will be cleared.
+ { # Details information about an error encountered during job execution or
+ # the results of an unsuccessful activation of the JobTrigger.
+ "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
+ # different programming environments, including REST APIs and RPC APIs. It is
+ # used by [gRPC](https://github.com/grpc). Each `Status` message contains
+ # three pieces of data: error code, error message, and error details.
+ #
+ # You can find out more about this error model and how to work with it in the
+ # [API Design Guide](https://cloud.google.com/apis/design/errors).
+ "message": "A String", # A developer-facing error message, which should be in English. Any
+ # user-facing error message should be localized and sent in the
+ # google.rpc.Status.details field, or localized by the client.
+ "details": [ # A list of messages that carry the error details. There is a common set of
+ # message types for APIs to use.
+ {
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ ],
+ "code": 42, # The status code, which should be an enum value of google.rpc.Code.
+ },
+ "timestamps": [ # The times the error occurred.
+ "A String",
+ ],
+ },
+ ],
+ "description": "A String", # User provided description (max 256 chars)
"triggers": [ # A list of triggers which will be OR'ed together. Only one in the list
# needs to trigger for a job to be started. The list may contain only
# a single Schedule trigger and must have at least one object.
@@ -5492,6 +5526,164 @@
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
+ "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
+ # POSSIBLE.
+ # See https://cloud.google.com/dlp/docs/likelihood to learn more.
+ "contentOptions": [ # List of options defining data content to scan.
+ # If empty, text, images, and other content will be included.
+ "A String",
+ ],
+ "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
+ # InfoType values returned by ListInfoTypes or listed at
+ # https://cloud.google.com/dlp/docs/infotypes-reference.
+ #
+ # When no InfoTypes or CustomInfoTypes are specified in a request, the
+ # system may automatically choose what detectors to run. By default this may
+ # be all types, but may change over time as detectors are updated.
+ #
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
+ { # Type of information detected by the API.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ ],
+ "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
+ "customInfoTypes": [ # CustomInfoTypes provided by the user. See
+ # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
+ { # Custom information type provided by the user. Used to find domain-specific
+ # sensitive information configurable to the data in question.
+ "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
+ # support reversing.
+ # such as
+ # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
+ # These types of transformations are
+ # those that perform pseudonymization, thereby producing a "surrogate" as
+ # output. This should be used in conjunction with a field on the
+ # transformation such as `surrogate_info_type`. This CustomInfoType does
+ # not support the use of `detection_rules`.
+ },
+ "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
+ # altered by a detection rule if the finding meets the criteria specified by
+ # the rule. Defaults to `VERY_LIKELY` if not specified.
+ "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
+ # infoType, when the name matches one of existing infoTypes and that infoType
+ # is specified in `InspectContent.info_types` field. Specifying the latter
+ # adds findings to the one detected by the system. If built-in info type is
+ # not specified in `InspectContent.info_types` list then the name is treated
+ # as a custom info type.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
+ # Rules are applied in order that they are specified. Not supported for the
+ # `surrogate_type` CustomInfoType.
+ { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
+ # `CustomInfoType` to alter behavior under certain circumstances, depending
+ # on the specific details of the rule. Not supported for the `surrogate_type`
+ # custom infoType.
+ "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
+ # proximity of hotwords.
+ "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
+ # part of a detection rule.
+ "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
+ # levels. For example, if a finding would be `POSSIBLE` without the
+ # detection rule and `relative_likelihood` is 1, then it is upgraded to
+ # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
+ # Likelihood may never drop below `VERY_UNLIKELY` or exceed
+ # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
+ # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
+ # a final likelihood of `LIKELY`.
+ "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
+ },
+ "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
+ "groupIndexes": [ # The index of the submatch to extract as findings. When not
+ # specified, the entire match is returned. No more than 3 may be included.
+ 42,
+ ],
+ "pattern": "A String", # Pattern defining the regular expression. Its syntax
+ # (https://github.com/google/re2/wiki/Syntax) can be found under the
+ # google/re2 repository on GitHub.
+ },
+ "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
+ # The total length of the window cannot exceed 1000 characters. Note that
+ # the finding itself will be included in the window, so that hotwords may
+ # be used to match substrings of the finding itself. For example, the
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
+ # adjusted upwards if the area code is known to be the local area code of
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
+ # is the area code in question.
+ # rule.
+ "windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
+ },
+ },
+ },
+ ],
+ "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
+ # to be returned. It still can be used for rules matching.
+ "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
+ # be used to match sensitive information specific to the data, such as a list
+ # of employee IDs or job titles.
+ #
+ # Dictionary words are case-insensitive and all characters other than letters
+ # and digits in the unicode [Basic Multilingual
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
+ # will be replaced with whitespace when scanning for matches, so the
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
+ # surrounding any match must be of a different type than the adjacent
+ # characters within the word, so letters must be next to non-letters and
+ # digits next to non-digits. For example, the dictionary word "jen" will
+ # match the first three letters of the text "jen123" but will return no
+ # matches for "jennifer".
+ #
+ # Dictionary words containing a large number of characters that are not
+ # letters or digits may result in unexpected findings because such characters
+ # are treated as whitespace. The
+ # [limits](https://cloud.google.com/dlp/limits) page contains details about
+ # the size limits of dictionaries. For dictionaries that do not fit within
+ # these constraints, consider using `LargeCustomDictionaryConfig` in the
+ # `StoredInfoType` API.
+ "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
+ "words": [ # Words or phrases defining the dictionary. The dictionary must contain
+ # at least one phrase and every phrase must contain at least 2 characters
+ # that are letters or digits. [required]
+ "A String",
+ ],
+ },
+ "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
+ # is accepted.
+ "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
+ # Example: gs://[BUCKET_NAME]/dictionary.txt
+ },
+ },
+ "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
+ # `InspectDataSource`. Not currently supported in `InspectContent`.
+ "name": "A String", # Resource name of the requested `StoredInfoType`, for example
+ # `organizations/433245324/storedInfoTypes/432452342` or
+ # `projects/project-id/storedInfoTypes/432452342`.
+ "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
+ # inspection was created. Output-only field, populated by the system.
+ },
+ },
+ ],
"includeQuote": True or False, # When true, a contextual quote from the data that triggered a finding is
# included in the response; see Finding.quote.
"ruleSet": [ # Set of rules to apply to the findings for this InspectConfig.
@@ -5643,164 +5835,6 @@
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
},
- "contentOptions": [ # List of options defining data content to scan.
- # If empty, text, images, and other content will be included.
- "A String",
- ],
- "infoTypes": [ # Restricts what info_types to look for. The values must correspond to
- # InfoType values returned by ListInfoTypes or listed at
- # https://cloud.google.com/dlp/docs/infotypes-reference.
- #
- # When no InfoTypes or CustomInfoTypes are specified in a request, the
- # system may automatically choose what detectors to run. By default this may
- # be all types, but may change over time as detectors are updated.
- #
- # If you need precise control and predictability as to what detectors are
- # run you should specify specific InfoTypes listed in the reference,
- # otherwise a default list will be used, which may change over time.
- { # Type of information detected by the API.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- ],
- "minLikelihood": "A String", # Only returns findings equal or above this threshold. The default is
- # POSSIBLE.
- # See https://cloud.google.com/dlp/docs/likelihood to learn more.
- "excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "customInfoTypes": [ # CustomInfoTypes provided by the user. See
- # https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
- { # Custom information type provided by the user. Used to find domain-specific
- # sensitive information configurable to the data in question.
- "likelihood": "A String", # Likelihood to return for this CustomInfoType. This base value can be
- # altered by a detection rule if the finding meets the criteria specified by
- # the rule. Defaults to `VERY_LIKELY` if not specified.
- "infoType": { # Type of information detected by the API. # CustomInfoType can either be a new infoType, or an extension of built-in
- # infoType, when the name matches one of existing infoTypes and that infoType
- # is specified in `InspectContent.info_types` field. Specifying the latter
- # adds findings to the one detected by the system. If built-in info type is
- # not specified in `InspectContent.info_types` list then the name is treated
- # as a custom info type.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # `[a-zA-Z0-9_]{1,64}`.
- },
- "regex": { # Message defining a custom regular expression. # Regular expression based CustomInfoType.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "detectionRules": [ # Set of detection rules to apply to all findings of this CustomInfoType.
- # Rules are applied in order that they are specified. Not supported for the
- # `surrogate_type` CustomInfoType.
- { # Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
- # `CustomInfoType` to alter behavior under certain circumstances, depending
- # on the specific details of the rule. Not supported for the `surrogate_type`
- # custom infoType.
- "hotwordRule": { # The rule that adjusts the likelihood of findings within a certain # Hotword-based detection rule.
- # proximity of hotwords.
- "likelihoodAdjustment": { # Message for specifying an adjustment to the likelihood of a finding as # Likelihood adjustment to apply to all matching findings.
- # part of a detection rule.
- "relativeLikelihood": 42, # Increase or decrease the likelihood by the specified number of
- # levels. For example, if a finding would be `POSSIBLE` without the
- # detection rule and `relative_likelihood` is 1, then it is upgraded to
- # `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
- # Likelihood may never drop below `VERY_UNLIKELY` or exceed
- # `VERY_LIKELY`, so applying an adjustment of 1 followed by an
- # adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
- # a final likelihood of `LIKELY`.
- "fixedLikelihood": "A String", # Set the likelihood of a finding to a fixed value.
- },
- "hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
- "groupIndexes": [ # The index of the submatch to extract as findings. When not
- # specified, the entire match is returned. No more than 3 may be included.
- 42,
- ],
- "pattern": "A String", # Pattern defining the regular expression. Its syntax
- # (https://github.com/google/re2/wiki/Syntax) can be found under the
- # google/re2 repository on GitHub.
- },
- "proximity": { # Message for specifying a window around a finding to apply a detection # Proximity of the finding within which the entire hotword must reside.
- # The total length of the window cannot exceed 1000 characters. Note that
- # the finding itself will be included in the window, so that hotwords may
- # be used to match substrings of the finding itself. For example, the
- # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
- # adjusted upwards if the area code is known to be the local area code of
- # a company office using the hotword regex "\(xxx\)", where "xxx"
- # is the area code in question.
- # rule.
- "windowBefore": 42, # Number of characters before the finding to consider.
- "windowAfter": 42, # Number of characters after the finding to consider.
- },
- },
- },
- ],
- "exclusionType": "A String", # If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
- # to be returned. It still can be used for rules matching.
- "dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
- # be used to match sensitive information specific to the data, such as a list
- # of employee IDs or job titles.
- #
- # Dictionary words are case-insensitive and all characters other than letters
- # and digits in the unicode [Basic Multilingual
- # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
- # will be replaced with whitespace when scanning for matches, so the
- # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
- # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
- # surrounding any match must be of a different type than the adjacent
- # characters within the word, so letters must be next to non-letters and
- # digits next to non-digits. For example, the dictionary word "jen" will
- # match the first three letters of the text "jen123" but will return no
- # matches for "jennifer".
- #
- # Dictionary words containing a large number of characters that are not
- # letters or digits may result in unexpected findings because such characters
- # are treated as whitespace. The
- # [limits](https://cloud.google.com/dlp/limits) page contains details about
- # the size limits of dictionaries. For dictionaries that do not fit within
- # these constraints, consider using `LargeCustomDictionaryConfig` in the
- # `StoredInfoType` API.
- "wordList": { # Message defining a list of words or phrases to search for in the data. # List of words or phrases to search for.
- "words": [ # Words or phrases defining the dictionary. The dictionary must contain
- # at least one phrase and every phrase must contain at least 2 characters
- # that are letters or digits. [required]
- "A String",
- ],
- },
- "cloudStoragePath": { # Message representing a single file or path in Cloud Storage. # Newline-delimited file of words in Cloud Storage. Only a single file
- # is accepted.
- "path": "A String", # A url representing a file or path (no wildcards) in Cloud Storage.
- # Example: gs://[BUCKET_NAME]/dictionary.txt
- },
- },
- "storedType": { # A reference to a StoredInfoType to use with scanning. # Load an existing `StoredInfoType` resource for use in
- # `InspectDataSource`. Not currently supported in `InspectContent`.
- "name": "A String", # Resource name of the requested `StoredInfoType`, for example
- # `organizations/433245324/storedInfoTypes/432452342` or
- # `projects/project-id/storedInfoTypes/432452342`.
- "createTime": "A String", # Timestamp indicating when the version of the `StoredInfoType` used for
- # inspection was created. Output-only field, populated by the system.
- },
- "surrogateType": { # Message for detecting output from deidentification transformations # Message for detecting output from deidentification transformations that
- # support reversing.
- # such as
- # [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
- # These types of transformations are
- # those that perform pseudonymization, thereby producing a "surrogate" as
- # output. This should be used in conjunction with a field on the
- # transformation such as `surrogate_info_type`. This CustomInfoType does
- # not support the use of `detection_rules`.
- },
- },
- ],
},
"actions": [ # Actions to execute at the completion of the job.
{ # A task to execute on the completion of a job.
@@ -5825,16 +5859,6 @@
# specified.
# Compatible with: Inspect, Risk
"outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
- "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
- # used for Inspect and must be unspecified for Risk jobs. Columns are derived
- # from the `Finding` object. If appending to an existing table, any columns
- # from the predefined schema that are missing will be added. No columns in
- # the existing table will be deleted.
- #
- # If unspecified, then all available columns will be used for a new table or
- # an (existing) table with no schema, and no changes will be made to an
- # existing table that has a schema.
- # Only for use with external storage.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -5858,6 +5882,16 @@
"datasetId": "A String", # Dataset ID of the table.
"tableId": "A String", # Name of the table.
},
+ "outputSchema": "A String", # Schema used for writing the findings for Inspect jobs. This field is only
+ # used for Inspect and must be unspecified for Risk jobs. Columns are derived
+ # from the `Finding` object. If appending to an existing table, any columns
+ # from the predefined schema that are missing will be added. No columns in
+ # the existing table will be deleted.
+ #
+ # If unspecified, then all available columns will be used for a new table or
+ # an (existing) table with no schema, and no changes will be made to an
+ # existing table that has a schema.
+ # Only for use with external storage.
},
},
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
@@ -5889,174 +5923,6 @@
},
],
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- "projectId": "A String", # The ID of the project to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore and BigQuery.
- #
- # For BigQuery:
- # Required to filter out rows based on the given start and
- # end times. If not specified and the table was modified between the given
- # start and end times, the entire table will be scanned.
- # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
- # `TIMESTAMP`, or `DATETIME` BigQuery column.
- #
- # For Datastore.
- # Valid data types of the timestamp field are: `TIMESTAMP`.
- # Datastore entity will be scanned if the timestamp property does not
- # exist or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
- # bucket.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
- "fileTypes": [ # List of file type groups to include in the scan.
- # If empty, all files are scanned and available data format processors
- # are applied. In addition, the binary content of the selected files
- # is always scanned as well.
- # Images are scanned only as binary if the specified region
- # does not support image inspection and no file_types were specified.
- # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
- "A String",
- ],
- "sampleMethod": "A String",
- "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
- # number of bytes scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
- # than this value then the rest of the bytes are omitted. Only one
- # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "fileSet": { # Set of files to scan. # The set of one or more files to scan.
- "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
- # `regex_file_set` must be set.
- # expressions are used to allow fine-grained control over which files in the
- # bucket to include.
- #
- # Included files are those that match at least one item in `include_regex` and
- # do not match any items in `exclude_regex`. Note that a file that matches
- # items from both lists will _not_ be included. For a match to occur, the
- # entire file path (i.e., everything in the url after the bucket name) must
- # match the regular expression.
- #
- # For example, given the input `{bucket_name: "mybucket", include_regex:
- # ["directory1/.*"], exclude_regex:
- # ["directory1/excluded.*"]}`:
- #
- # * `gs://mybucket/directory1/myfile` will be included
- # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
- # across `/`)
- # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
- # full path doesn't match any items in `include_regex`)
- # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
- # matches an item in `exclude_regex`)
- #
- # If `include_regex` is left empty, it will match all files by default
- # (this is equivalent to setting `include_regex: [".*"]`).
- #
- # Some other common use cases:
- #
- # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
- # files in `mybucket` except for .pdf files
- # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
- # include all files directly under `gs://mybucket/directory/`, without matching
- # across `/`
- "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
- # the bucket that match at least one of these regular expressions will be
- # excluded from the scan.
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
- "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
- # the bucket that match at least one of these regular expressions will be
- # included in the set of files, except for those that also match an item in
- # `exclude_regex`. Leaving this field empty will match all files by default
- # (this is equivalent to including `.*` in the list).
- #
- # Regular expressions use RE2
- # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
- # under the google/re2 repository on GitHub.
- "A String",
- ],
- },
- "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
- #
- # If the url ends in a trailing slash, the bucket or directory represented
- # by the url will be scanned non-recursively (content in sub-directories
- # will not be scanned). This means that `gs://mybucket/` is equivalent to
- # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
- # `gs://mybucket/directory/*`.
- #
- # Exactly one of `url` or `regex_file_set` must be set.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
- "sampleMethod": "A String",
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "datasetId": "A String", # Dataset ID of the table.
- "tableId": "A String", # Name of the table.
- },
- "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
- # `actions.saveFindings.outputConfig.table` is specified, the values of
- # columns specified here are available in the output table under
- # `location.content_locations.record_location.record_key.id_values`. Nested
- # fields such as `person.birthdate.year` are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- },
"hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
# Early access feature is in a pre-release state and might change or have
# limited support. For more information, see
@@ -6101,46 +5967,180 @@
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ },
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
+ # bucket.
+ "bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
+ # than this value then the rest of the bytes are omitted. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ "fileSet": { # Set of files to scan. # The set of one or more files to scan.
+ "regexFileSet": { # Message representing a set of files in a Cloud Storage bucket. Regular # The regex-filtered set of files to scan. Exactly one of `url` or
+ # `regex_file_set` must be set.
+ # expressions are used to allow fine-grained control over which files in the
+ # bucket to include.
+ #
+ # Included files are those that match at least one item in `include_regex` and
+ # do not match any items in `exclude_regex`. Note that a file that matches
+ # items from both lists will _not_ be included. For a match to occur, the
+ # entire file path (i.e., everything in the url after the bucket name) must
+ # match the regular expression.
+ #
+ # For example, given the input `{bucket_name: "mybucket", include_regex:
+ # ["directory1/.*"], exclude_regex:
+ # ["directory1/excluded.*"]}`:
+ #
+ # * `gs://mybucket/directory1/myfile` will be included
+ # * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
+ # across `/`)
+ # * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
+ # full path doesn't match any items in `include_regex`)
+ # * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
+ # matches an item in `exclude_regex`)
+ #
+ # If `include_regex` is left empty, it will match all files by default
+ # (this is equivalent to setting `include_regex: [".*"]`).
+ #
+ # Some other common use cases:
+ #
+ # * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
+ # files in `mybucket` except for .pdf files
+ # * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
+ # include all files directly under `gs://mybucket/directory/`, without matching
+ # across `/`
+ "bucketName": "A String", # The name of a Cloud Storage bucket. Required.
+ "includeRegex": [ # A list of regular expressions matching file paths to include. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # included in the set of files, except for those that also match an item in
+ # `exclude_regex`. Leaving this field empty will match all files by default
+ # (this is equivalent to including `.*` in the list).
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ "excludeRegex": [ # A list of regular expressions matching file paths to exclude. All files in
+ # the bucket that match at least one of these regular expressions will be
+ # excluded from the scan.
+ #
+ # Regular expressions use RE2
+ # [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
+ # under the google/re2 repository on GitHub.
+ "A String",
+ ],
+ },
+ "url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ #
+ # If the url ends in a trailing slash, the bucket or directory represented
+ # by the url will be scanned non-recursively (content in sub-directories
+ # will not be scanned). This means that `gs://mybucket/` is equivalent to
+ # `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
+ # `gs://mybucket/directory/*`.
+ #
+ # Exactly one of `url` or `regex_file_set` must be set.
+ },
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
+ "fileTypes": [ # List of file type groups to include in the scan.
+ # If empty, all files are scanned and available data format processors
+ # are applied. In addition, the binary content of the selected files
+ # is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
+ "A String",
+ ],
+ "sampleMethod": "A String",
+ "bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
+ # number of bytes scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
+ # of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "datasetId": "A String", # Dataset ID of the table.
+ "tableId": "A String", # Name of the table.
+ },
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "sampleMethod": "A String",
+ },
},
"inspectTemplateName": "A String", # If provided, will be used as the default for all values in InspectConfig.
# `inspect_config` will be merged into the values persisted as part of the
# template.
},
"status": "A String", # Required. A status for this trigger.
- "lastRunTime": "A String", # Output only. The timestamp of the last time this trigger executed.
- "name": "A String", # Unique resource name for the triggeredJob, assigned by the service when the
- # triggeredJob is created, for example
- # `projects/dlp-test-project/jobTriggers/53234423`.
- "errors": [ # Output only. A stream of errors encountered when the trigger was activated. Repeated
- # errors may result in the JobTrigger automatically being paused.
- # Will return the last 100 errors. Whenever the JobTrigger is modified
- # this list will be cleared.
- { # Details information about an error encountered during job execution or
- # the results of an unsuccessful activation of the JobTrigger.
- "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
- # different programming environments, including REST APIs and RPC APIs. It is
- # used by [gRPC](https://github.com/grpc). Each `Status` message contains
- # three pieces of data: error code, error message, and error details.
- #
- # You can find out more about this error model and how to work with it in the
- # [API Design Guide](https://cloud.google.com/apis/design/errors).
- "code": 42, # The status code, which should be an enum value of google.rpc.Code.
- "message": "A String", # A developer-facing error message, which should be in English. Any
- # user-facing error message should be localized and sent in the
- # google.rpc.Status.details field, or localized by the client.
- "details": [ # A list of messages that carry the error details. There is a common set of
- # message types for APIs to use.
- {
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- ],
- },
- "timestamps": [ # The times the error occurred.
- "A String",
- ],
- },
- ],
- "description": "A String", # User provided description (max 256 chars)
}</pre>
</div>