chore: regens API reference docs (#889)
diff --git a/docs/dyn/dlp_v2.projects.dlpJobs.html b/docs/dyn/dlp_v2.projects.dlpJobs.html
index 5d07ae5..1b7cdc0 100644
--- a/docs/dyn/dlp_v2.projects.dlpJobs.html
+++ b/docs/dyn/dlp_v2.projects.dlpJobs.html
@@ -78,7 +78,7 @@
<code><a href="#cancel">cancel(name, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Starts asynchronous cancellation on a long-running DlpJob. The server</p>
<p class="toc_element">
- <code><a href="#create">create(parent, body, x__xgafv=None)</a></code></p>
+ <code><a href="#create">create(parent, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Creates a new job to inspect storage or calculate risk metrics.</p>
<p class="toc_element">
<code><a href="#delete">delete(name, x__xgafv=None)</a></code></p>
@@ -87,7 +87,7 @@
<code><a href="#get">get(name, x__xgafv=None)</a></code></p>
<p class="firstline">Gets the latest state of a long-running DlpJob.</p>
<p class="toc_element">
- <code><a href="#list">list(parent, orderBy=None, type=None, pageSize=None, pageToken=None, x__xgafv=None, filter=None)</a></code></p>
+ <code><a href="#list">list(parent, orderBy=None, pageSize=None, x__xgafv=None, pageToken=None, type=None, locationId=None, filter=None)</a></code></p>
<p class="firstline">Lists DlpJobs that match the specified filter in the request.</p>
<p class="toc_element">
<code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
@@ -102,7 +102,7 @@
https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
Args:
- name: string, The name of the DlpJob resource to be cancelled. (required)
+ name: string, Required. The name of the DlpJob resource to be cancelled. (required)
body: object, The request body.
The object takes the form of:
@@ -130,7 +130,7 @@
</div>
<div class="method">
- <code class="details" id="create">create(parent, body, x__xgafv=None)</code>
+ <code class="details" id="create">create(parent, body=None, x__xgafv=None)</code>
<pre>Creates a new job to inspect storage or calculate risk metrics.
See https://cloud.google.com/dlp/docs/inspecting-storage and
https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
@@ -140,37 +140,36 @@
be all types, but may change over time as detectors are updated.
Args:
- parent: string, The parent resource name, for example projects/my-project-id. (required)
- body: object, The request body. (required)
+ parent: string, Required. The parent resource name, for example projects/my-project-id. (required)
+ body: object, The request body.
The object takes the form of:
{ # Request message for CreateDlpJobRequest. Used to initiate long running
# jobs such as calculating risk metrics or inspecting Google Cloud
# Storage.
- "riskJob": { # Configuration for a risk analysis job. See
+ "riskJob": { # Configuration for a risk analysis job. See # Set to choose what metric to calculate.
# https://cloud.google.com/dlp/docs/concepts-risk-analysis to learn more.
"privacyMetric": { # Privacy metric to compute for reidentification risk analysis. # Privacy metric to compute.
- "numericalStatsConfig": { # Compute numerical stats over an individual column, including
+ "numericalStatsConfig": { # Compute numerical stats over an individual column, including # Numerical stats
# min, max, and quantiles.
"field": { # General identifier of a data field in a storage service. # Field to compute numerical stats on. Supported types are
# integer, float, date, datetime, timestamp, time.
"name": "A String", # Name describing the field.
},
},
- "kMapEstimationConfig": { # Reidentifiability metric. This corresponds to a risk model similar to what
+ "kMapEstimationConfig": { # Reidentifiability metric. This corresponds to a risk model similar to what # k-map
# is called "journalist risk" in the literature, except the attack dataset is
# statistically modeled instead of being perfectly known. This can be done
# using publicly available data (like the US Census), or using a custom
# statistical model (indicated as one or several BigQuery tables), or by
# extrapolating from the distribution of values in the input dataset.
- # A column with a semantic tag attached.
"regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
- # Required if no column is tagged with a region-specific InfoType (like
+ # Set if no column is tagged with a region-specific InfoType (like
# US_ZIP_5) or a region code.
- "quasiIds": [ # Fields considered to be quasi-identifiers. No two columns can have the
- # same tag. [required]
- {
- "field": { # General identifier of a data field in a storage service. # Identifies the column. [required]
+ "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two columns can have the
+ # same tag.
+ { # A column with a semantic tag attached.
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
"name": "A String", # Name describing the field.
},
"customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
@@ -185,7 +184,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
# the distribution of values in the input data
@@ -210,34 +209,33 @@
# If a tuple is present in the data but not in the auxiliary table, the
# corresponding relative frequency is assumed to be zero (and thus, the
# tuple is highly reidentifiable).
- "relativeFrequency": { # General identifier of a data field in a storage service. # The relative frequency column must contain a floating-point number
- # between 0 and 1 (inclusive). Null values are assumed to be zero.
- # [required]
- "name": "A String", # Name describing the field.
- },
- "quasiIds": [ # Quasi-identifier columns. [required]
- { # A quasi-identifier column has a custom_tag, used to know which column
- # in the data corresponds to which column in the statistical model.
- "field": { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String",
- },
- ],
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Auxiliary table location. [required]
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
"datasetId": "A String", # Dataset ID of the table.
},
+ "quasiIds": [ # Required. Quasi-identifier columns.
+ { # A quasi-identifier column has a custom_tag, used to know which column
+ # in the data corresponds to which column in the statistical model.
+ "field": { # General identifier of a data field in a storage service. # Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A auxiliary field.
+ },
+ ],
+ "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
+ # between 0 and 1 (inclusive). Null values are assumed to be zero.
+ "name": "A String", # Name describing the field.
+ },
},
],
},
- "lDiversityConfig": { # l-diversity metric, used for analysis of reidentification risk.
+ "lDiversityConfig": { # l-diversity metric, used for analysis of reidentification risk. # l-diversity
"sensitiveAttribute": { # General identifier of a data field in a storage service. # Sensitive field for computing the l-value.
"name": "A String", # Name describing the field.
},
@@ -249,94 +247,8 @@
},
],
},
- "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to
- # figure out that one given individual appears in a de-identified dataset.
- # Similarly to the k-map metric, we cannot compute δ-presence exactly without
- # knowing the attack dataset, so we use a statistical model instead.
- "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
- # Required if no column is tagged with a region-specific InfoType (like
- # US_ZIP_5) or a region code.
- "quasiIds": [ # Fields considered to be quasi-identifiers. No two fields can have the
- # same tag. [required]
- { # A column with a semantic tag attached.
- "field": { # General identifier of a data field in a storage service. # Identifies the column. [required]
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
- # indicate an auxiliary table that contains statistical information on
- # the possible values of this column (below).
- "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
- # dataset as a statistical model of population, if available. We
- # currently support US ZIP codes, region codes, ages and genders.
- # To programmatically obtain the list of supported InfoTypes, use
- # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
- },
- "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
- # the distribution of values in the input data
- # empty messages in your APIs. A typical example is to use it as the request
- # or the response type of an API method. For instance:
- #
- # service Foo {
- # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
- # }
- #
- # The JSON representation for `Empty` is empty JSON object `{}`.
- },
- },
- ],
- "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
- # used to tag a quasi-identifiers field must appear in exactly one
- # field of one auxiliary table.
- { # An auxiliary table containing statistical information on the relative
- # frequency of different quasi-identifiers values. It has one or several
- # quasi-identifiers columns, and one column that indicates the relative
- # frequency of each quasi-identifier tuple.
- # If a tuple is present in the data but not in the auxiliary table, the
- # corresponding relative frequency is assumed to be zero (and thus, the
- # tuple is highly reidentifiable).
- "relativeFrequency": { # General identifier of a data field in a storage service. # The relative frequency column must contain a floating-point number
- # between 0 and 1 (inclusive). Null values are assumed to be zero.
- # [required]
- "name": "A String", # Name describing the field.
- },
- "quasiIds": [ # Quasi-identifier columns. [required]
- { # A quasi-identifier column has a custom_tag, used to know which column
- # in the data corresponds to which column in the statistical model.
- "field": { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String",
- },
- ],
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Auxiliary table location. [required]
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "tableId": "A String", # Name of the table.
- "datasetId": "A String", # Dataset ID of the table.
- },
- },
- ],
- },
- "categoricalStatsConfig": { # Compute numerical stats over an individual column, including
- # number of distinct values and value count distribution.
- "field": { # General identifier of a data field in a storage service. # Field to compute categorical stats on. All column types are
- # supported except for arrays and structs. However, it may be more
- # informative to use NumericalStats when the field type is supported,
- # depending on the data.
- "name": "A String", # Name describing the field.
- },
- },
- "kAnonymityConfig": { # k-anonymity metric, used for analysis of reidentification risk.
- "entityId": { # An entity in a dataset is a field or set of fields that correspond to a # Optional message indicating that multiple rows might be associated to a
+ "kAnonymityConfig": { # k-anonymity metric, used for analysis of reidentification risk. # K-anonymity
+ "entityId": { # An entity in a dataset is a field or set of fields that correspond to a # Message indicating that multiple rows might be associated to a
# single individual. If the same entity_id is associated to multiple
# quasi-identifier tuples over distinct rows, we consider the entire
# collection of tuples as the composite quasi-identifier. This collection
@@ -364,16 +276,93 @@
},
],
},
- },
- "sourceTable": { # Message defining the location of a BigQuery table. A table is uniquely # Input dataset to compute metrics over.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "tableId": "A String", # Name of the table.
- "datasetId": "A String", # Dataset ID of the table.
+ "categoricalStatsConfig": { # Compute numerical stats over an individual column, including # Categorical stats
+ # number of distinct values and value count distribution.
+ "field": { # General identifier of a data field in a storage service. # Field to compute categorical stats on. All column types are
+ # supported except for arrays and structs. However, it may be more
+ # informative to use NumericalStats when the field type is supported,
+ # depending on the data.
+ "name": "A String", # Name describing the field.
+ },
+ },
+ "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to # delta-presence
+ # figure out that one given individual appears in a de-identified dataset.
+ # Similarly to the k-map metric, we cannot compute δ-presence exactly without
+ # knowing the attack dataset, so we use a statistical model instead.
+ "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
+ # Set if no column is tagged with a region-specific InfoType (like
+ # US_ZIP_5) or a region code.
+ "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two fields can have the
+ # same tag.
+ { # A column with a semantic tag attached.
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
+ # dataset as a statistical model of population, if available. We
+ # currently support US ZIP codes, region codes, ages and genders.
+ # To programmatically obtain the list of supported InfoTypes, use
+ # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
+ # the distribution of values in the input data
+ # empty messages in your APIs. A typical example is to use it as the request
+ # or the response type of an API method. For instance:
+ #
+ # service Foo {
+ # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
+ # }
+ #
+ # The JSON representation for `Empty` is empty JSON object `{}`.
+ },
+ },
+ ],
+ "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
+ # used to tag a quasi-identifiers field must appear in exactly one
+ # field of one auxiliary table.
+ { # An auxiliary table containing statistical information on the relative
+ # frequency of different quasi-identifiers values. It has one or several
+ # quasi-identifiers columns, and one column that indicates the relative
+ # frequency of each quasi-identifier tuple.
+ # If a tuple is present in the data but not in the auxiliary table, the
+ # corresponding relative frequency is assumed to be zero (and thus, the
+ # tuple is highly reidentifiable).
+ "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
+ # between 0 and 1 (inclusive). Null values are assumed to be zero.
+ "name": "A String", # Name describing the field.
+ },
+ "quasiIds": [ # Required. Quasi-identifier columns.
+ { # A quasi-identifier column has a custom_tag, used to know which column
+ # in the data corresponds to which column in the statistical model.
+ "field": { # General identifier of a data field in a storage service. # Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ },
+ ],
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "tableId": "A String", # Name of the table.
+ "datasetId": "A String", # Dataset ID of the table.
+ },
+ },
+ ],
+ },
},
"actions": [ # Actions to execute at the completion of the job. Are executed in the order
# provided.
@@ -383,7 +372,7 @@
# OutputStorageConfig. Only a single instance of this action can be
# specified.
# Compatible with: Inspect, Risk
- "outputConfig": { # Cloud repository for storing output.
+ "outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -400,8 +389,8 @@
# quasi-identifiers, cannot store their results in the same table.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
@@ -416,9 +405,10 @@
# If unspecified, then all available columns will be used for a new table or
# an (existing) table with no schema, and no changes will be made to an
# existing table that has a schema.
+ # Only for use with external storage.
},
},
- "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification to project owners and editors on job's
+ "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification for project owners and editors on job's
# completion/failure.
# completion/failure.
},
@@ -433,6 +423,22 @@
# Only a single instance of this action can be specified.
# Compatible with: Inspect
},
+ "publishToStackdriver": { # Enable Stackdriver metric dlp.googleapis.com/finding_count. This # Enable Stackdriver metric dlp.googleapis.com/finding_count.
+ # will publish a metric to stack driver on each infotype requested and
+ # how many findings were found for it. CustomDetectors will be bucketed
+ # as 'Custom' under the Stackdriver label 'info_type'.
+ },
+ "publishFindingsToCloudDataCatalog": { # Publish findings of a DlpJob to Cloud Data Catalog. Labels summarizing the # Publish findings to Cloud Datahub.
+ # results of the DlpJob will be applied to the entry for the resource scanned
+ # in Cloud Data Catalog. Any labels previously written by another DlpJob will
+ # be deleted. InfoType naming patterns are strictly enforced when using this
+ # feature. Note that the findings will be persisted in Cloud Data Catalog
+ # storage and are governed by Data Catalog service-specific policy, see
+ # https://cloud.google.com/terms/service-terms
+ # Only a single instance of this action can be specified and only allowed if
+ # all resources being scanned are BigQuery tables.
+ # Compatible with: Inspect
+ },
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
# message contains a single field, `DlpJobName`, which is equal to the
# finished job's
@@ -445,84 +451,20 @@
},
},
],
+ "sourceTable": { # Message defining the location of a BigQuery table. A table is uniquely # Input dataset to compute metrics over.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "tableId": "A String", # Name of the table.
+ "datasetId": "A String", # Dataset ID of the table.
+ },
},
- "jobId": "A String", # The job id can contain uppercase and lowercase letters,
- # numbers, and hyphens; that is, it must match the regular
- # expression: `[a-zA-Z\\d-_]+`. The maximum length is 100
- # characters. Can be empty to allow the system to generate one.
- "inspectJob": {
+ "inspectJob": { # Controls what and how to inspect for findings. # Set to control what and how to inspect.
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options specification.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "projectId": "A String", # The ID of the project to which the entities belong.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options specification.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "sampleMethod": "A String",
- "identifyingFields": [ # References to fields uniquely identifying rows within the table.
- # Nested fields in the format, like `person.birthdate.year`, are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "tableId": "A String", # Name of the table.
- "datasetId": "A String", # Dataset ID of the table.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore or BigQuery.
- # If not specified for BigQuery, table last modification timestamp
- # is checked against given time span.
- # The valid data types of the timestamp field are:
- # for BigQuery - timestamp, date, datetime;
- # for Datastore - timestamp.
- # Datastore entity will be scanned if the timestamp property does not exist
- # or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options specification.
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
# bucket.
"bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
# than this value then the rest of the bytes are omitted. Only one
@@ -530,7 +472,7 @@
"sampleMethod": "A String",
"fileSet": { # Set of files to scan. # The set of one or more files to scan.
"url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
#
# If the url ends in a trailing slash, the bucket or directory represented
# by the url will be scanned non-recursively (content in sub-directories
@@ -595,26 +537,151 @@
],
},
},
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
"bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
# number of bytes scanned is rounded down. Must be between 0 and 100,
# inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
# of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
"fileTypes": [ # List of file type groups to include in the scan.
# If empty, all files are scanned and available data format processors
# are applied. In addition, the binary content of the selected files
# is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "sampleMethod": "A String",
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "tableId": "A String", # Name of the table.
+ "datasetId": "A String", # Dataset ID of the table.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ },
+ "hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
+ # Early access feature is in a pre-release state and might change or have
+ # limited support. For more information, see
+ # https://cloud.google.com/products#product-launch-stages.
+ # of Google Cloud Platform.
+ "tableOptions": { # Instructions regarding the table content being inspected. # If the container is a table, additional information to make findings
+ # meaningful such as the columns that are primary keys.
+ "identifyingFields": [ # The columns that are the primary keys for table objects included in
+ # ContentItem. A copy of this cell's value will stored alongside alongside
+ # each finding so that the finding can be traced to the specific row it came
+ # from. No more than 3 may be provided.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ },
+ "labels": { # To organize findings, these labels will be added to each finding.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # Label values must be between 0 and 63 characters long and must conform
+ # to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
+ #
+ # No more than 10 labels can be associated with a given finding.
+ #
+ # Examples:
+ # * `"environment" : "production"`
+ # * `"pipeline" : "etl"`
+ "a_key": "A String",
+ },
+ "requiredFindingLabelKeys": [ # These are labels that each inspection request must include within their
+ # 'finding_labels' map. Request may contain others, but any missing one of
+ # these will be rejected.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # No more than 10 keys can be required.
+ "A String",
+ ],
+ "description": "A String", # A short description of where the data is coming from. Will be stored once
+ # in the job. 256 max length.
+ },
},
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
"excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "limits": {
+ "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
"maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
@@ -629,13 +696,13 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"maxFindings": 42, # Max findings limit for the given infoType.
},
],
"maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
- # When set within `InspectDataSourceRequest`,
+ # When set within `InspectJobConfig`,
# the maximum returned is 2000 regardless if this is set higher.
# When set within `InspectContentRequest`, this field is ignored.
},
@@ -675,7 +742,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
# be used to match sensitive information specific to the data, such as a list
@@ -739,8 +806,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -794,8 +861,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -844,7 +911,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -894,7 +961,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -911,16 +978,15 @@
# system may automatically choose what detectors to run. By default this may
# be all types, but may change over time as detectors are updated.
#
- # The special InfoType name "ALL_BASIC" can be used to trigger all detectors,
- # but may change over time as new InfoTypes are added. If you need precise
- # control and predictability as to what detectors are run you should specify
- # specific InfoTypes listed in the reference.
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
{ # Type of information detected by the API.
"name": "A String", # Name of the information type. Either a name of your choosing when
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -934,7 +1000,7 @@
# OutputStorageConfig. Only a single instance of this action can be
# specified.
# Compatible with: Inspect, Risk
- "outputConfig": { # Cloud repository for storing output.
+ "outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -951,8 +1017,8 @@
# quasi-identifiers, cannot store their results in the same table.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
@@ -967,9 +1033,10 @@
# If unspecified, then all available columns will be used for a new table or
# an (existing) table with no schema, and no changes will be made to an
# existing table that has a schema.
+ # Only for use with external storage.
},
},
- "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification to project owners and editors on job's
+ "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification for project owners and editors on job's
# completion/failure.
# completion/failure.
},
@@ -984,6 +1051,22 @@
# Only a single instance of this action can be specified.
# Compatible with: Inspect
},
+ "publishToStackdriver": { # Enable Stackdriver metric dlp.googleapis.com/finding_count. This # Enable Stackdriver metric dlp.googleapis.com/finding_count.
+ # will publish a metric to stack driver on each infotype requested and
+ # how many findings were found for it. CustomDetectors will be bucketed
+ # as 'Custom' under the Stackdriver label 'info_type'.
+ },
+ "publishFindingsToCloudDataCatalog": { # Publish findings of a DlpJob to Cloud Data Catalog. Labels summarizing the # Publish findings to Cloud Datahub.
+ # results of the DlpJob will be applied to the entry for the resource scanned
+ # in Cloud Data Catalog. Any labels previously written by another DlpJob will
+ # be deleted. InfoType naming patterns are strictly enforced when using this
+ # feature. Note that the findings will be persisted in Cloud Data Catalog
+ # storage and are governed by Data Catalog service-specific policy, see
+ # https://cloud.google.com/terms/service-terms
+ # Only a single instance of this action can be specified and only allowed if
+ # all resources being scanned are BigQuery tables.
+ # Compatible with: Inspect
+ },
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
# message contains a single field, `DlpJobName`, which is equal to the
# finished job's
@@ -997,6 +1080,12 @@
},
],
},
+ "locationId": "A String", # The geographic location to store and process the job. Reserved for
+ # future extensions.
+ "jobId": "A String", # The job id can contain uppercase and lowercase letters,
+ # numbers, and hyphens; that is, it must match the regular
+ # expression: `[a-zA-Z\\d-_]+`. The maximum length is 100
+ # characters. Can be empty to allow the system to generate one.
}
x__xgafv: string, V1 error format.
@@ -1011,11 +1100,10 @@
"errors": [ # A stream of errors encountered running the job.
{ # Details information about an error encountered during job execution or
# the results of an unsuccessful activation of the JobTrigger.
- # Output only field.
"timestamps": [ # The times the error occurred.
"A String",
],
- "details": { # The `Status` type defines a logical error model that is suitable for
+ "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
# different programming environments, including REST APIs and RPC APIs. It is
# used by [gRPC](https://github.com/grpc). Each `Status` message contains
# three pieces of data: error code, error message, and error details.
@@ -1037,20 +1125,20 @@
],
"name": "A String", # The server-assigned name.
"inspectDetails": { # The results of an inspect DataSource job. # Results from inspecting a data source.
- "requestedOptions": { # The configuration used for this job.
+ "requestedOptions": { # Snapshot of the inspection configuration. # The configuration used for this job.
"snapshotInspectTemplate": { # The inspectTemplate contains a configuration (set of types of sensitive data # If run with an InspectTemplate, a snapshot of its state at the time of
# this run.
# to be detected) to be used anywhere you otherwise would normally specify
# InspectConfig. See https://cloud.google.com/dlp/docs/concepts-templates
# to learn more.
- "updateTime": "A String", # The last update timestamp of a inspectTemplate, output only field.
+ "updateTime": "A String", # Output only. The last update timestamp of an inspectTemplate.
"displayName": "A String", # Display name (max 256 chars).
"description": "A String", # Short description (max 256 chars).
"inspectConfig": { # Configuration description of the scanning process. # The core content of the template. Configuration of the scanning process.
# When used with redactContent only info_types and min_likelihood are currently
# used.
"excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "limits": {
+ "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
"maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
@@ -1065,13 +1153,13 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"maxFindings": 42, # Max findings limit for the given infoType.
},
],
"maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
- # When set within `InspectDataSourceRequest`,
+ # When set within `InspectJobConfig`,
# the maximum returned is 2000 regardless if this is set higher.
# When set within `InspectContentRequest`, this field is ignored.
},
@@ -1111,7 +1199,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
# be used to match sensitive information specific to the data, such as a list
@@ -1175,8 +1263,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -1230,8 +1318,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -1280,7 +1368,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -1330,7 +1418,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -1347,99 +1435,28 @@
# system may automatically choose what detectors to run. By default this may
# be all types, but may change over time as detectors are updated.
#
- # The special InfoType name "ALL_BASIC" can be used to trigger all detectors,
- # but may change over time as new InfoTypes are added. If you need precise
- # control and predictability as to what detectors are run you should specify
- # specific InfoTypes listed in the reference.
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
{ # Type of information detected by the API.
"name": "A String", # Name of the information type. Either a name of your choosing when
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
- "createTime": "A String", # The creation timestamp of a inspectTemplate, output only field.
- "name": "A String", # The template name. Output only.
+ "createTime": "A String", # Output only. The creation timestamp of an inspectTemplate.
+ "name": "A String", # Output only. The template name.
#
# The template will have one of the following formats:
# `projects/PROJECT_ID/inspectTemplates/TEMPLATE_ID` OR
- # `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`
+ # `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`;
},
- "jobConfig": {
+ "jobConfig": { # Controls what and how to inspect for findings. # Inspect config.
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options specification.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "projectId": "A String", # The ID of the project to which the entities belong.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options specification.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "sampleMethod": "A String",
- "identifyingFields": [ # References to fields uniquely identifying rows within the table.
- # Nested fields in the format, like `person.birthdate.year`, are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "tableId": "A String", # Name of the table.
- "datasetId": "A String", # Dataset ID of the table.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore or BigQuery.
- # If not specified for BigQuery, table last modification timestamp
- # is checked against given time span.
- # The valid data types of the timestamp field are:
- # for BigQuery - timestamp, date, datetime;
- # for Datastore - timestamp.
- # Datastore entity will be scanned if the timestamp property does not exist
- # or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options specification.
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
# bucket.
"bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
# than this value then the rest of the bytes are omitted. Only one
@@ -1447,7 +1464,7 @@
"sampleMethod": "A String",
"fileSet": { # Set of files to scan. # The set of one or more files to scan.
"url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
#
# If the url ends in a trailing slash, the bucket or directory represented
# by the url will be scanned non-recursively (content in sub-directories
@@ -1512,26 +1529,151 @@
],
},
},
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
"bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
# number of bytes scanned is rounded down. Must be between 0 and 100,
# inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
# of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
"fileTypes": [ # List of file type groups to include in the scan.
# If empty, all files are scanned and available data format processors
# are applied. In addition, the binary content of the selected files
# is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "sampleMethod": "A String",
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "tableId": "A String", # Name of the table.
+ "datasetId": "A String", # Dataset ID of the table.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ },
+ "hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
+ # Early access feature is in a pre-release state and might change or have
+ # limited support. For more information, see
+ # https://cloud.google.com/products#product-launch-stages.
+ # of Google Cloud Platform.
+ "tableOptions": { # Instructions regarding the table content being inspected. # If the container is a table, additional information to make findings
+ # meaningful such as the columns that are primary keys.
+ "identifyingFields": [ # The columns that are the primary keys for table objects included in
+ # ContentItem. A copy of this cell's value will stored alongside alongside
+ # each finding so that the finding can be traced to the specific row it came
+ # from. No more than 3 may be provided.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ },
+ "labels": { # To organize findings, these labels will be added to each finding.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # Label values must be between 0 and 63 characters long and must conform
+ # to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
+ #
+ # No more than 10 labels can be associated with a given finding.
+ #
+ # Examples:
+ # * `"environment" : "production"`
+ # * `"pipeline" : "etl"`
+ "a_key": "A String",
+ },
+ "requiredFindingLabelKeys": [ # These are labels that each inspection request must include within their
+ # 'finding_labels' map. Request may contain others, but any missing one of
+ # these will be rejected.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # No more than 10 keys can be required.
+ "A String",
+ ],
+ "description": "A String", # A short description of where the data is coming from. Will be stored once
+ # in the job. 256 max length.
+ },
},
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
"excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "limits": {
+ "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
"maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
@@ -1546,13 +1688,13 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"maxFindings": 42, # Max findings limit for the given infoType.
},
],
"maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
- # When set within `InspectDataSourceRequest`,
+ # When set within `InspectJobConfig`,
# the maximum returned is 2000 regardless if this is set higher.
# When set within `InspectContentRequest`, this field is ignored.
},
@@ -1592,7 +1734,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
# be used to match sensitive information specific to the data, such as a list
@@ -1656,8 +1798,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -1711,8 +1853,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -1761,7 +1903,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -1811,7 +1953,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -1828,16 +1970,15 @@
# system may automatically choose what detectors to run. By default this may
# be all types, but may change over time as detectors are updated.
#
- # The special InfoType name "ALL_BASIC" can be used to trigger all detectors,
- # but may change over time as new InfoTypes are added. If you need precise
- # control and predictability as to what detectors are run you should specify
- # specific InfoTypes listed in the reference.
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
{ # Type of information detected by the API.
"name": "A String", # Name of the information type. Either a name of your choosing when
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -1851,7 +1992,7 @@
# OutputStorageConfig. Only a single instance of this action can be
# specified.
# Compatible with: Inspect, Risk
- "outputConfig": { # Cloud repository for storing output.
+ "outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -1868,8 +2009,8 @@
# quasi-identifiers, cannot store their results in the same table.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
@@ -1884,9 +2025,10 @@
# If unspecified, then all available columns will be used for a new table or
# an (existing) table with no schema, and no changes will be made to an
# existing table that has a schema.
+ # Only for use with external storage.
},
},
- "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification to project owners and editors on job's
+ "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification for project owners and editors on job's
# completion/failure.
# completion/failure.
},
@@ -1901,6 +2043,22 @@
# Only a single instance of this action can be specified.
# Compatible with: Inspect
},
+ "publishToStackdriver": { # Enable Stackdriver metric dlp.googleapis.com/finding_count. This # Enable Stackdriver metric dlp.googleapis.com/finding_count.
+ # will publish a metric to stack driver on each infotype requested and
+ # how many findings were found for it. CustomDetectors will be bucketed
+ # as 'Custom' under the Stackdriver label 'info_type'.
+ },
+ "publishFindingsToCloudDataCatalog": { # Publish findings of a DlpJob to Cloud Data Catalog. Labels summarizing the # Publish findings to Cloud Datahub.
+ # results of the DlpJob will be applied to the entry for the resource scanned
+ # in Cloud Data Catalog. Any labels previously written by another DlpJob will
+ # be deleted. InfoType naming patterns are strictly enforced when using this
+ # feature. Note that the findings will be persisted in Cloud Data Catalog
+ # storage and are governed by Data Catalog service-specific policy, see
+ # https://cloud.google.com/terms/service-terms
+ # Only a single instance of this action can be specified and only allowed if
+ # all resources being scanned are BigQuery tables.
+ # Compatible with: Inspect
+ },
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
# message contains a single field, `DlpJobName`, which is equal to the
# finished job's
@@ -1925,16 +2083,29 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
},
],
"totalEstimatedBytes": "A String", # Estimate of the number of bytes to process.
"processedBytes": "A String", # Total size in bytes that were processed.
+ "hybridStats": { # Statistics related to processing hybrid inspect requests. # Statistics related to the processing of hybrid inspect.
+ # Early access feature is in a pre-release state and might change or have
+ # limited support. For more information, see
+ # https://cloud.google.com/products#product-launch-stages.
+ "abortedCount": "A String", # The number of hybrid inspection requests aborted because the job ran
+ # out of quota or was ended before they could be processed.
+ "pendingCount": "A String", # The number of hybrid requests currently being processed. Only populated
+ # when called via method `getDlpJob`.
+ # A burst of traffic may cause hybrid inspect requests to be enqueued.
+ # Processing will take place as quickly as possible, but resource limitations
+ # may impact how long a request is enqueued for.
+ "processedCount": "A String", # The number of hybrid inspection requests processed within this job.
+ },
},
},
"riskDetails": { # Result of a risk analysis operation request. # Results from analyzing risk of a data source.
- "numericalStatsResult": { # Result of the numerical stats computation.
+ "numericalStatsResult": { # Result of the numerical stats computation. # Numerical stats result
"quantileValues": [ # List of 99 values that partition the set of field values into 100 equal
# sized buckets.
{ # Set of primitive values supported by the system.
@@ -1943,10 +2114,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -1956,7 +2127,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -1966,17 +2137,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"maxValue": { # Set of primitive values supported by the system. # Maximum value appearing in the column.
@@ -1985,10 +2156,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -1998,7 +2169,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -2008,17 +2179,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
"minValue": { # Set of primitive values supported by the system. # Minimum value appearing in the column.
# Note that for the purposes of inspection or transformation, the number
@@ -2026,10 +2197,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -2039,7 +2210,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -2049,20 +2220,20 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
},
- "kMapEstimationResult": { # Result of the reidentifiability analysis. Note that these results are an
+ "kMapEstimationResult": { # Result of the reidentifiability analysis. Note that these results are an # K-map result
# estimation, not exact values.
"kMapEstimationHistogram": [ # The intervals [min_anonymity, max_anonymity] do not overlap. If a value
# doesn't correspond to any such interval, the associated frequency is
@@ -2091,10 +2262,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -2104,7 +2275,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -2114,17 +2285,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
},
@@ -2136,9 +2307,9 @@
},
],
},
- "kAnonymityResult": { # Result of the k-anonymity computation.
+ "kAnonymityResult": { # Result of the k-anonymity computation. # K-anonymity result
"equivalenceClassHistogramBuckets": [ # Histogram of k-anonymity equivalence classes.
- {
+ { # Histogram of k-anonymity equivalence classes.
"bucketValues": [ # Sample of equivalence classes in this bucket. The total number of
# classes returned per bucket is capped at 20.
{ # The set of columns' values that share the same ldiversity value
@@ -2151,10 +2322,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -2164,7 +2335,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -2174,17 +2345,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"equivalenceClassSize": "A String", # Size of the equivalence class, for example number of rows with the
@@ -2198,9 +2369,9 @@
},
],
},
- "lDiversityResult": { # Result of the l-diversity computation.
+ "lDiversityResult": { # Result of the l-diversity computation. # L-divesity result
"sensitiveValueFrequencyHistogramBuckets": [ # Histogram of l-diversity equivalence class sensitive value frequencies.
- {
+ { # Histogram of l-diversity equivalence class sensitive value frequencies.
"bucketValues": [ # Sample of equivalence classes in this bucket. The total number of
# classes returned per bucket is capped at 20.
{ # The set of columns' values that share the same ldiversity value.
@@ -2213,10 +2384,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -2226,7 +2397,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -2236,17 +2407,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"topSensitiveValues": [ # Estimated frequencies of top sensitive values.
@@ -2258,10 +2429,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -2271,7 +2442,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -2281,17 +2452,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
},
],
@@ -2308,27 +2479,26 @@
],
},
"requestedPrivacyMetric": { # Privacy metric to compute for reidentification risk analysis. # Privacy metric to compute.
- "numericalStatsConfig": { # Compute numerical stats over an individual column, including
+ "numericalStatsConfig": { # Compute numerical stats over an individual column, including # Numerical stats
# min, max, and quantiles.
"field": { # General identifier of a data field in a storage service. # Field to compute numerical stats on. Supported types are
# integer, float, date, datetime, timestamp, time.
"name": "A String", # Name describing the field.
},
},
- "kMapEstimationConfig": { # Reidentifiability metric. This corresponds to a risk model similar to what
+ "kMapEstimationConfig": { # Reidentifiability metric. This corresponds to a risk model similar to what # k-map
# is called "journalist risk" in the literature, except the attack dataset is
# statistically modeled instead of being perfectly known. This can be done
# using publicly available data (like the US Census), or using a custom
# statistical model (indicated as one or several BigQuery tables), or by
# extrapolating from the distribution of values in the input dataset.
- # A column with a semantic tag attached.
"regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
- # Required if no column is tagged with a region-specific InfoType (like
+ # Set if no column is tagged with a region-specific InfoType (like
# US_ZIP_5) or a region code.
- "quasiIds": [ # Fields considered to be quasi-identifiers. No two columns can have the
- # same tag. [required]
- {
- "field": { # General identifier of a data field in a storage service. # Identifies the column. [required]
+ "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two columns can have the
+ # same tag.
+ { # A column with a semantic tag attached.
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
"name": "A String", # Name describing the field.
},
"customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
@@ -2343,7 +2513,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
# the distribution of values in the input data
@@ -2368,34 +2538,33 @@
# If a tuple is present in the data but not in the auxiliary table, the
# corresponding relative frequency is assumed to be zero (and thus, the
# tuple is highly reidentifiable).
- "relativeFrequency": { # General identifier of a data field in a storage service. # The relative frequency column must contain a floating-point number
- # between 0 and 1 (inclusive). Null values are assumed to be zero.
- # [required]
- "name": "A String", # Name describing the field.
- },
- "quasiIds": [ # Quasi-identifier columns. [required]
- { # A quasi-identifier column has a custom_tag, used to know which column
- # in the data corresponds to which column in the statistical model.
- "field": { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String",
- },
- ],
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Auxiliary table location. [required]
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
"datasetId": "A String", # Dataset ID of the table.
},
+ "quasiIds": [ # Required. Quasi-identifier columns.
+ { # A quasi-identifier column has a custom_tag, used to know which column
+ # in the data corresponds to which column in the statistical model.
+ "field": { # General identifier of a data field in a storage service. # Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A auxiliary field.
+ },
+ ],
+ "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
+ # between 0 and 1 (inclusive). Null values are assumed to be zero.
+ "name": "A String", # Name describing the field.
+ },
},
],
},
- "lDiversityConfig": { # l-diversity metric, used for analysis of reidentification risk.
+ "lDiversityConfig": { # l-diversity metric, used for analysis of reidentification risk. # l-diversity
"sensitiveAttribute": { # General identifier of a data field in a storage service. # Sensitive field for computing the l-value.
"name": "A String", # Name describing the field.
},
@@ -2407,94 +2576,8 @@
},
],
},
- "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to
- # figure out that one given individual appears in a de-identified dataset.
- # Similarly to the k-map metric, we cannot compute δ-presence exactly without
- # knowing the attack dataset, so we use a statistical model instead.
- "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
- # Required if no column is tagged with a region-specific InfoType (like
- # US_ZIP_5) or a region code.
- "quasiIds": [ # Fields considered to be quasi-identifiers. No two fields can have the
- # same tag. [required]
- { # A column with a semantic tag attached.
- "field": { # General identifier of a data field in a storage service. # Identifies the column. [required]
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
- # indicate an auxiliary table that contains statistical information on
- # the possible values of this column (below).
- "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
- # dataset as a statistical model of population, if available. We
- # currently support US ZIP codes, region codes, ages and genders.
- # To programmatically obtain the list of supported InfoTypes, use
- # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
- },
- "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
- # the distribution of values in the input data
- # empty messages in your APIs. A typical example is to use it as the request
- # or the response type of an API method. For instance:
- #
- # service Foo {
- # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
- # }
- #
- # The JSON representation for `Empty` is empty JSON object `{}`.
- },
- },
- ],
- "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
- # used to tag a quasi-identifiers field must appear in exactly one
- # field of one auxiliary table.
- { # An auxiliary table containing statistical information on the relative
- # frequency of different quasi-identifiers values. It has one or several
- # quasi-identifiers columns, and one column that indicates the relative
- # frequency of each quasi-identifier tuple.
- # If a tuple is present in the data but not in the auxiliary table, the
- # corresponding relative frequency is assumed to be zero (and thus, the
- # tuple is highly reidentifiable).
- "relativeFrequency": { # General identifier of a data field in a storage service. # The relative frequency column must contain a floating-point number
- # between 0 and 1 (inclusive). Null values are assumed to be zero.
- # [required]
- "name": "A String", # Name describing the field.
- },
- "quasiIds": [ # Quasi-identifier columns. [required]
- { # A quasi-identifier column has a custom_tag, used to know which column
- # in the data corresponds to which column in the statistical model.
- "field": { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String",
- },
- ],
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Auxiliary table location. [required]
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "tableId": "A String", # Name of the table.
- "datasetId": "A String", # Dataset ID of the table.
- },
- },
- ],
- },
- "categoricalStatsConfig": { # Compute numerical stats over an individual column, including
- # number of distinct values and value count distribution.
- "field": { # General identifier of a data field in a storage service. # Field to compute categorical stats on. All column types are
- # supported except for arrays and structs. However, it may be more
- # informative to use NumericalStats when the field type is supported,
- # depending on the data.
- "name": "A String", # Name describing the field.
- },
- },
- "kAnonymityConfig": { # k-anonymity metric, used for analysis of reidentification risk.
- "entityId": { # An entity in a dataset is a field or set of fields that correspond to a # Optional message indicating that multiple rows might be associated to a
+ "kAnonymityConfig": { # k-anonymity metric, used for analysis of reidentification risk. # K-anonymity
+ "entityId": { # An entity in a dataset is a field or set of fields that correspond to a # Message indicating that multiple rows might be associated to a
# single individual. If the same entity_id is associated to multiple
# quasi-identifier tuples over distinct rows, we consider the entire
# collection of tuples as the composite quasi-identifier. This collection
@@ -2522,10 +2605,97 @@
},
],
},
+ "categoricalStatsConfig": { # Compute numerical stats over an individual column, including # Categorical stats
+ # number of distinct values and value count distribution.
+ "field": { # General identifier of a data field in a storage service. # Field to compute categorical stats on. All column types are
+ # supported except for arrays and structs. However, it may be more
+ # informative to use NumericalStats when the field type is supported,
+ # depending on the data.
+ "name": "A String", # Name describing the field.
+ },
+ },
+ "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to # delta-presence
+ # figure out that one given individual appears in a de-identified dataset.
+ # Similarly to the k-map metric, we cannot compute δ-presence exactly without
+ # knowing the attack dataset, so we use a statistical model instead.
+ "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
+ # Set if no column is tagged with a region-specific InfoType (like
+ # US_ZIP_5) or a region code.
+ "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two fields can have the
+ # same tag.
+ { # A column with a semantic tag attached.
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
+ # dataset as a statistical model of population, if available. We
+ # currently support US ZIP codes, region codes, ages and genders.
+ # To programmatically obtain the list of supported InfoTypes, use
+ # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
+ # the distribution of values in the input data
+ # empty messages in your APIs. A typical example is to use it as the request
+ # or the response type of an API method. For instance:
+ #
+ # service Foo {
+ # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
+ # }
+ #
+ # The JSON representation for `Empty` is empty JSON object `{}`.
+ },
+ },
+ ],
+ "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
+ # used to tag a quasi-identifiers field must appear in exactly one
+ # field of one auxiliary table.
+ { # An auxiliary table containing statistical information on the relative
+ # frequency of different quasi-identifiers values. It has one or several
+ # quasi-identifiers columns, and one column that indicates the relative
+ # frequency of each quasi-identifier tuple.
+ # If a tuple is present in the data but not in the auxiliary table, the
+ # corresponding relative frequency is assumed to be zero (and thus, the
+ # tuple is highly reidentifiable).
+ "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
+ # between 0 and 1 (inclusive). Null values are assumed to be zero.
+ "name": "A String", # Name describing the field.
+ },
+ "quasiIds": [ # Required. Quasi-identifier columns.
+ { # A quasi-identifier column has a custom_tag, used to know which column
+ # in the data corresponds to which column in the statistical model.
+ "field": { # General identifier of a data field in a storage service. # Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ },
+ ],
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "tableId": "A String", # Name of the table.
+ "datasetId": "A String", # Dataset ID of the table.
+ },
+ },
+ ],
+ },
},
- "categoricalStatsResult": { # Result of the categorical stats computation.
+ "categoricalStatsResult": { # Result of the categorical stats computation. # Categorical stats result
"valueFrequencyHistogramBuckets": [ # Histogram of value frequencies in the column.
- {
+ { # Histogram of value frequencies in the column.
"bucketValues": [ # Sample of value frequencies in this bucket. The total number of
# values returned per bucket is capped at 20.
{ # A value of a field, including its frequency.
@@ -2536,10 +2706,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -2549,7 +2719,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -2559,17 +2729,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
},
],
@@ -2580,7 +2750,7 @@
},
],
},
- "deltaPresenceEstimationResult": { # Result of the δ-presence computation. Note that these results are an
+ "deltaPresenceEstimationResult": { # Result of the δ-presence computation. Note that these results are an # Delta-presence result
# estimation, not exact values.
"deltaPresenceEstimationHistogram": [ # The intervals [min_probability, max_probability) do not overlap. If a
# value doesn't correspond to any such interval, the associated frequency
@@ -2609,10 +2779,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -2622,7 +2792,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -2632,17 +2802,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"estimatedProbability": 3.14, # The estimated probability that a given individual sharing these
@@ -2665,8 +2835,8 @@
"requestedSourceTable": { # Message defining the location of a BigQuery table. A table is uniquely # Input dataset to compute metrics over.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
@@ -2692,7 +2862,7 @@
https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
Args:
- name: string, The name of the DlpJob resource to be deleted. (required)
+ name: string, Required. The name of the DlpJob resource to be deleted. (required)
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
@@ -2720,7 +2890,7 @@
https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
Args:
- name: string, The name of the DlpJob resource. (required)
+ name: string, Required. The name of the DlpJob resource. (required)
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
@@ -2733,11 +2903,10 @@
"errors": [ # A stream of errors encountered running the job.
{ # Details information about an error encountered during job execution or
# the results of an unsuccessful activation of the JobTrigger.
- # Output only field.
"timestamps": [ # The times the error occurred.
"A String",
],
- "details": { # The `Status` type defines a logical error model that is suitable for
+ "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
# different programming environments, including REST APIs and RPC APIs. It is
# used by [gRPC](https://github.com/grpc). Each `Status` message contains
# three pieces of data: error code, error message, and error details.
@@ -2759,20 +2928,20 @@
],
"name": "A String", # The server-assigned name.
"inspectDetails": { # The results of an inspect DataSource job. # Results from inspecting a data source.
- "requestedOptions": { # The configuration used for this job.
+ "requestedOptions": { # Snapshot of the inspection configuration. # The configuration used for this job.
"snapshotInspectTemplate": { # The inspectTemplate contains a configuration (set of types of sensitive data # If run with an InspectTemplate, a snapshot of its state at the time of
# this run.
# to be detected) to be used anywhere you otherwise would normally specify
# InspectConfig. See https://cloud.google.com/dlp/docs/concepts-templates
# to learn more.
- "updateTime": "A String", # The last update timestamp of a inspectTemplate, output only field.
+ "updateTime": "A String", # Output only. The last update timestamp of an inspectTemplate.
"displayName": "A String", # Display name (max 256 chars).
"description": "A String", # Short description (max 256 chars).
"inspectConfig": { # Configuration description of the scanning process. # The core content of the template. Configuration of the scanning process.
# When used with redactContent only info_types and min_likelihood are currently
# used.
"excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "limits": {
+ "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
"maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
@@ -2787,13 +2956,13 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"maxFindings": 42, # Max findings limit for the given infoType.
},
],
"maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
- # When set within `InspectDataSourceRequest`,
+ # When set within `InspectJobConfig`,
# the maximum returned is 2000 regardless if this is set higher.
# When set within `InspectContentRequest`, this field is ignored.
},
@@ -2833,7 +3002,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
# be used to match sensitive information specific to the data, such as a list
@@ -2897,8 +3066,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -2952,8 +3121,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -3002,7 +3171,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -3052,7 +3221,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -3069,99 +3238,28 @@
# system may automatically choose what detectors to run. By default this may
# be all types, but may change over time as detectors are updated.
#
- # The special InfoType name "ALL_BASIC" can be used to trigger all detectors,
- # but may change over time as new InfoTypes are added. If you need precise
- # control and predictability as to what detectors are run you should specify
- # specific InfoTypes listed in the reference.
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
{ # Type of information detected by the API.
"name": "A String", # Name of the information type. Either a name of your choosing when
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
- "createTime": "A String", # The creation timestamp of a inspectTemplate, output only field.
- "name": "A String", # The template name. Output only.
+ "createTime": "A String", # Output only. The creation timestamp of an inspectTemplate.
+ "name": "A String", # Output only. The template name.
#
# The template will have one of the following formats:
# `projects/PROJECT_ID/inspectTemplates/TEMPLATE_ID` OR
- # `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`
+ # `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`;
},
- "jobConfig": {
+ "jobConfig": { # Controls what and how to inspect for findings. # Inspect config.
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options specification.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "projectId": "A String", # The ID of the project to which the entities belong.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options specification.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "sampleMethod": "A String",
- "identifyingFields": [ # References to fields uniquely identifying rows within the table.
- # Nested fields in the format, like `person.birthdate.year`, are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "tableId": "A String", # Name of the table.
- "datasetId": "A String", # Dataset ID of the table.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore or BigQuery.
- # If not specified for BigQuery, table last modification timestamp
- # is checked against given time span.
- # The valid data types of the timestamp field are:
- # for BigQuery - timestamp, date, datetime;
- # for Datastore - timestamp.
- # Datastore entity will be scanned if the timestamp property does not exist
- # or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options specification.
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
# bucket.
"bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
# than this value then the rest of the bytes are omitted. Only one
@@ -3169,7 +3267,7 @@
"sampleMethod": "A String",
"fileSet": { # Set of files to scan. # The set of one or more files to scan.
"url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
#
# If the url ends in a trailing slash, the bucket or directory represented
# by the url will be scanned non-recursively (content in sub-directories
@@ -3234,26 +3332,151 @@
],
},
},
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
"bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
# number of bytes scanned is rounded down. Must be between 0 and 100,
# inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
# of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
"fileTypes": [ # List of file type groups to include in the scan.
# If empty, all files are scanned and available data format processors
# are applied. In addition, the binary content of the selected files
# is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "sampleMethod": "A String",
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "tableId": "A String", # Name of the table.
+ "datasetId": "A String", # Dataset ID of the table.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ },
+ "hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
+ # Early access feature is in a pre-release state and might change or have
+ # limited support. For more information, see
+ # https://cloud.google.com/products#product-launch-stages.
+ # of Google Cloud Platform.
+ "tableOptions": { # Instructions regarding the table content being inspected. # If the container is a table, additional information to make findings
+ # meaningful such as the columns that are primary keys.
+ "identifyingFields": [ # The columns that are the primary keys for table objects included in
+ # ContentItem. A copy of this cell's value will stored alongside alongside
+ # each finding so that the finding can be traced to the specific row it came
+ # from. No more than 3 may be provided.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ },
+ "labels": { # To organize findings, these labels will be added to each finding.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # Label values must be between 0 and 63 characters long and must conform
+ # to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
+ #
+ # No more than 10 labels can be associated with a given finding.
+ #
+ # Examples:
+ # * `"environment" : "production"`
+ # * `"pipeline" : "etl"`
+ "a_key": "A String",
+ },
+ "requiredFindingLabelKeys": [ # These are labels that each inspection request must include within their
+ # 'finding_labels' map. Request may contain others, but any missing one of
+ # these will be rejected.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # No more than 10 keys can be required.
+ "A String",
+ ],
+ "description": "A String", # A short description of where the data is coming from. Will be stored once
+ # in the job. 256 max length.
+ },
},
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
"excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "limits": {
+ "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
"maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
@@ -3268,13 +3491,13 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"maxFindings": 42, # Max findings limit for the given infoType.
},
],
"maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
- # When set within `InspectDataSourceRequest`,
+ # When set within `InspectJobConfig`,
# the maximum returned is 2000 regardless if this is set higher.
# When set within `InspectContentRequest`, this field is ignored.
},
@@ -3314,7 +3537,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
# be used to match sensitive information specific to the data, such as a list
@@ -3378,8 +3601,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -3433,8 +3656,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -3483,7 +3706,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -3533,7 +3756,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -3550,16 +3773,15 @@
# system may automatically choose what detectors to run. By default this may
# be all types, but may change over time as detectors are updated.
#
- # The special InfoType name "ALL_BASIC" can be used to trigger all detectors,
- # but may change over time as new InfoTypes are added. If you need precise
- # control and predictability as to what detectors are run you should specify
- # specific InfoTypes listed in the reference.
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
{ # Type of information detected by the API.
"name": "A String", # Name of the information type. Either a name of your choosing when
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -3573,7 +3795,7 @@
# OutputStorageConfig. Only a single instance of this action can be
# specified.
# Compatible with: Inspect, Risk
- "outputConfig": { # Cloud repository for storing output.
+ "outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -3590,8 +3812,8 @@
# quasi-identifiers, cannot store their results in the same table.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
@@ -3606,9 +3828,10 @@
# If unspecified, then all available columns will be used for a new table or
# an (existing) table with no schema, and no changes will be made to an
# existing table that has a schema.
+ # Only for use with external storage.
},
},
- "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification to project owners and editors on job's
+ "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification for project owners and editors on job's
# completion/failure.
# completion/failure.
},
@@ -3623,6 +3846,22 @@
# Only a single instance of this action can be specified.
# Compatible with: Inspect
},
+ "publishToStackdriver": { # Enable Stackdriver metric dlp.googleapis.com/finding_count. This # Enable Stackdriver metric dlp.googleapis.com/finding_count.
+ # will publish a metric to stack driver on each infotype requested and
+ # how many findings were found for it. CustomDetectors will be bucketed
+ # as 'Custom' under the Stackdriver label 'info_type'.
+ },
+ "publishFindingsToCloudDataCatalog": { # Publish findings of a DlpJob to Cloud Data Catalog. Labels summarizing the # Publish findings to Cloud Datahub.
+ # results of the DlpJob will be applied to the entry for the resource scanned
+ # in Cloud Data Catalog. Any labels previously written by another DlpJob will
+ # be deleted. InfoType naming patterns are strictly enforced when using this
+ # feature. Note that the findings will be persisted in Cloud Data Catalog
+ # storage and are governed by Data Catalog service-specific policy, see
+ # https://cloud.google.com/terms/service-terms
+ # Only a single instance of this action can be specified and only allowed if
+ # all resources being scanned are BigQuery tables.
+ # Compatible with: Inspect
+ },
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
# message contains a single field, `DlpJobName`, which is equal to the
# finished job's
@@ -3647,16 +3886,29 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
},
],
"totalEstimatedBytes": "A String", # Estimate of the number of bytes to process.
"processedBytes": "A String", # Total size in bytes that were processed.
+ "hybridStats": { # Statistics related to processing hybrid inspect requests. # Statistics related to the processing of hybrid inspect.
+ # Early access feature is in a pre-release state and might change or have
+ # limited support. For more information, see
+ # https://cloud.google.com/products#product-launch-stages.
+ "abortedCount": "A String", # The number of hybrid inspection requests aborted because the job ran
+ # out of quota or was ended before they could be processed.
+ "pendingCount": "A String", # The number of hybrid requests currently being processed. Only populated
+ # when called via method `getDlpJob`.
+ # A burst of traffic may cause hybrid inspect requests to be enqueued.
+ # Processing will take place as quickly as possible, but resource limitations
+ # may impact how long a request is enqueued for.
+ "processedCount": "A String", # The number of hybrid inspection requests processed within this job.
+ },
},
},
"riskDetails": { # Result of a risk analysis operation request. # Results from analyzing risk of a data source.
- "numericalStatsResult": { # Result of the numerical stats computation.
+ "numericalStatsResult": { # Result of the numerical stats computation. # Numerical stats result
"quantileValues": [ # List of 99 values that partition the set of field values into 100 equal
# sized buckets.
{ # Set of primitive values supported by the system.
@@ -3665,10 +3917,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -3678,7 +3930,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -3688,17 +3940,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"maxValue": { # Set of primitive values supported by the system. # Maximum value appearing in the column.
@@ -3707,10 +3959,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -3720,7 +3972,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -3730,17 +3982,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
"minValue": { # Set of primitive values supported by the system. # Minimum value appearing in the column.
# Note that for the purposes of inspection or transformation, the number
@@ -3748,10 +4000,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -3761,7 +4013,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -3771,20 +4023,20 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
},
- "kMapEstimationResult": { # Result of the reidentifiability analysis. Note that these results are an
+ "kMapEstimationResult": { # Result of the reidentifiability analysis. Note that these results are an # K-map result
# estimation, not exact values.
"kMapEstimationHistogram": [ # The intervals [min_anonymity, max_anonymity] do not overlap. If a value
# doesn't correspond to any such interval, the associated frequency is
@@ -3813,10 +4065,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -3826,7 +4078,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -3836,17 +4088,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
},
@@ -3858,9 +4110,9 @@
},
],
},
- "kAnonymityResult": { # Result of the k-anonymity computation.
+ "kAnonymityResult": { # Result of the k-anonymity computation. # K-anonymity result
"equivalenceClassHistogramBuckets": [ # Histogram of k-anonymity equivalence classes.
- {
+ { # Histogram of k-anonymity equivalence classes.
"bucketValues": [ # Sample of equivalence classes in this bucket. The total number of
# classes returned per bucket is capped at 20.
{ # The set of columns' values that share the same ldiversity value
@@ -3873,10 +4125,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -3886,7 +4138,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -3896,17 +4148,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"equivalenceClassSize": "A String", # Size of the equivalence class, for example number of rows with the
@@ -3920,9 +4172,9 @@
},
],
},
- "lDiversityResult": { # Result of the l-diversity computation.
+ "lDiversityResult": { # Result of the l-diversity computation. # L-divesity result
"sensitiveValueFrequencyHistogramBuckets": [ # Histogram of l-diversity equivalence class sensitive value frequencies.
- {
+ { # Histogram of l-diversity equivalence class sensitive value frequencies.
"bucketValues": [ # Sample of equivalence classes in this bucket. The total number of
# classes returned per bucket is capped at 20.
{ # The set of columns' values that share the same ldiversity value.
@@ -3935,10 +4187,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -3948,7 +4200,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -3958,17 +4210,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"topSensitiveValues": [ # Estimated frequencies of top sensitive values.
@@ -3980,10 +4232,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -3993,7 +4245,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -4003,17 +4255,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
},
],
@@ -4030,27 +4282,26 @@
],
},
"requestedPrivacyMetric": { # Privacy metric to compute for reidentification risk analysis. # Privacy metric to compute.
- "numericalStatsConfig": { # Compute numerical stats over an individual column, including
+ "numericalStatsConfig": { # Compute numerical stats over an individual column, including # Numerical stats
# min, max, and quantiles.
"field": { # General identifier of a data field in a storage service. # Field to compute numerical stats on. Supported types are
# integer, float, date, datetime, timestamp, time.
"name": "A String", # Name describing the field.
},
},
- "kMapEstimationConfig": { # Reidentifiability metric. This corresponds to a risk model similar to what
+ "kMapEstimationConfig": { # Reidentifiability metric. This corresponds to a risk model similar to what # k-map
# is called "journalist risk" in the literature, except the attack dataset is
# statistically modeled instead of being perfectly known. This can be done
# using publicly available data (like the US Census), or using a custom
# statistical model (indicated as one or several BigQuery tables), or by
# extrapolating from the distribution of values in the input dataset.
- # A column with a semantic tag attached.
"regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
- # Required if no column is tagged with a region-specific InfoType (like
+ # Set if no column is tagged with a region-specific InfoType (like
# US_ZIP_5) or a region code.
- "quasiIds": [ # Fields considered to be quasi-identifiers. No two columns can have the
- # same tag. [required]
- {
- "field": { # General identifier of a data field in a storage service. # Identifies the column. [required]
+ "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two columns can have the
+ # same tag.
+ { # A column with a semantic tag attached.
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
"name": "A String", # Name describing the field.
},
"customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
@@ -4065,7 +4316,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
# the distribution of values in the input data
@@ -4090,34 +4341,33 @@
# If a tuple is present in the data but not in the auxiliary table, the
# corresponding relative frequency is assumed to be zero (and thus, the
# tuple is highly reidentifiable).
- "relativeFrequency": { # General identifier of a data field in a storage service. # The relative frequency column must contain a floating-point number
- # between 0 and 1 (inclusive). Null values are assumed to be zero.
- # [required]
- "name": "A String", # Name describing the field.
- },
- "quasiIds": [ # Quasi-identifier columns. [required]
- { # A quasi-identifier column has a custom_tag, used to know which column
- # in the data corresponds to which column in the statistical model.
- "field": { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String",
- },
- ],
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Auxiliary table location. [required]
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
"datasetId": "A String", # Dataset ID of the table.
},
+ "quasiIds": [ # Required. Quasi-identifier columns.
+ { # A quasi-identifier column has a custom_tag, used to know which column
+ # in the data corresponds to which column in the statistical model.
+ "field": { # General identifier of a data field in a storage service. # Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A auxiliary field.
+ },
+ ],
+ "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
+ # between 0 and 1 (inclusive). Null values are assumed to be zero.
+ "name": "A String", # Name describing the field.
+ },
},
],
},
- "lDiversityConfig": { # l-diversity metric, used for analysis of reidentification risk.
+ "lDiversityConfig": { # l-diversity metric, used for analysis of reidentification risk. # l-diversity
"sensitiveAttribute": { # General identifier of a data field in a storage service. # Sensitive field for computing the l-value.
"name": "A String", # Name describing the field.
},
@@ -4129,94 +4379,8 @@
},
],
},
- "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to
- # figure out that one given individual appears in a de-identified dataset.
- # Similarly to the k-map metric, we cannot compute δ-presence exactly without
- # knowing the attack dataset, so we use a statistical model instead.
- "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
- # Required if no column is tagged with a region-specific InfoType (like
- # US_ZIP_5) or a region code.
- "quasiIds": [ # Fields considered to be quasi-identifiers. No two fields can have the
- # same tag. [required]
- { # A column with a semantic tag attached.
- "field": { # General identifier of a data field in a storage service. # Identifies the column. [required]
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
- # indicate an auxiliary table that contains statistical information on
- # the possible values of this column (below).
- "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
- # dataset as a statistical model of population, if available. We
- # currently support US ZIP codes, region codes, ages and genders.
- # To programmatically obtain the list of supported InfoTypes, use
- # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
- },
- "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
- # the distribution of values in the input data
- # empty messages in your APIs. A typical example is to use it as the request
- # or the response type of an API method. For instance:
- #
- # service Foo {
- # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
- # }
- #
- # The JSON representation for `Empty` is empty JSON object `{}`.
- },
- },
- ],
- "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
- # used to tag a quasi-identifiers field must appear in exactly one
- # field of one auxiliary table.
- { # An auxiliary table containing statistical information on the relative
- # frequency of different quasi-identifiers values. It has one or several
- # quasi-identifiers columns, and one column that indicates the relative
- # frequency of each quasi-identifier tuple.
- # If a tuple is present in the data but not in the auxiliary table, the
- # corresponding relative frequency is assumed to be zero (and thus, the
- # tuple is highly reidentifiable).
- "relativeFrequency": { # General identifier of a data field in a storage service. # The relative frequency column must contain a floating-point number
- # between 0 and 1 (inclusive). Null values are assumed to be zero.
- # [required]
- "name": "A String", # Name describing the field.
- },
- "quasiIds": [ # Quasi-identifier columns. [required]
- { # A quasi-identifier column has a custom_tag, used to know which column
- # in the data corresponds to which column in the statistical model.
- "field": { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String",
- },
- ],
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Auxiliary table location. [required]
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "tableId": "A String", # Name of the table.
- "datasetId": "A String", # Dataset ID of the table.
- },
- },
- ],
- },
- "categoricalStatsConfig": { # Compute numerical stats over an individual column, including
- # number of distinct values and value count distribution.
- "field": { # General identifier of a data field in a storage service. # Field to compute categorical stats on. All column types are
- # supported except for arrays and structs. However, it may be more
- # informative to use NumericalStats when the field type is supported,
- # depending on the data.
- "name": "A String", # Name describing the field.
- },
- },
- "kAnonymityConfig": { # k-anonymity metric, used for analysis of reidentification risk.
- "entityId": { # An entity in a dataset is a field or set of fields that correspond to a # Optional message indicating that multiple rows might be associated to a
+ "kAnonymityConfig": { # k-anonymity metric, used for analysis of reidentification risk. # K-anonymity
+ "entityId": { # An entity in a dataset is a field or set of fields that correspond to a # Message indicating that multiple rows might be associated to a
# single individual. If the same entity_id is associated to multiple
# quasi-identifier tuples over distinct rows, we consider the entire
# collection of tuples as the composite quasi-identifier. This collection
@@ -4244,10 +4408,97 @@
},
],
},
+ "categoricalStatsConfig": { # Compute numerical stats over an individual column, including # Categorical stats
+ # number of distinct values and value count distribution.
+ "field": { # General identifier of a data field in a storage service. # Field to compute categorical stats on. All column types are
+ # supported except for arrays and structs. However, it may be more
+ # informative to use NumericalStats when the field type is supported,
+ # depending on the data.
+ "name": "A String", # Name describing the field.
+ },
+ },
+ "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to # delta-presence
+ # figure out that one given individual appears in a de-identified dataset.
+ # Similarly to the k-map metric, we cannot compute δ-presence exactly without
+ # knowing the attack dataset, so we use a statistical model instead.
+ "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
+ # Set if no column is tagged with a region-specific InfoType (like
+ # US_ZIP_5) or a region code.
+ "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two fields can have the
+ # same tag.
+ { # A column with a semantic tag attached.
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
+ # dataset as a statistical model of population, if available. We
+ # currently support US ZIP codes, region codes, ages and genders.
+ # To programmatically obtain the list of supported InfoTypes, use
+ # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
+ # the distribution of values in the input data
+ # empty messages in your APIs. A typical example is to use it as the request
+ # or the response type of an API method. For instance:
+ #
+ # service Foo {
+ # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
+ # }
+ #
+ # The JSON representation for `Empty` is empty JSON object `{}`.
+ },
+ },
+ ],
+ "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
+ # used to tag a quasi-identifiers field must appear in exactly one
+ # field of one auxiliary table.
+ { # An auxiliary table containing statistical information on the relative
+ # frequency of different quasi-identifiers values. It has one or several
+ # quasi-identifiers columns, and one column that indicates the relative
+ # frequency of each quasi-identifier tuple.
+ # If a tuple is present in the data but not in the auxiliary table, the
+ # corresponding relative frequency is assumed to be zero (and thus, the
+ # tuple is highly reidentifiable).
+ "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
+ # between 0 and 1 (inclusive). Null values are assumed to be zero.
+ "name": "A String", # Name describing the field.
+ },
+ "quasiIds": [ # Required. Quasi-identifier columns.
+ { # A quasi-identifier column has a custom_tag, used to know which column
+ # in the data corresponds to which column in the statistical model.
+ "field": { # General identifier of a data field in a storage service. # Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ },
+ ],
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "tableId": "A String", # Name of the table.
+ "datasetId": "A String", # Dataset ID of the table.
+ },
+ },
+ ],
+ },
},
- "categoricalStatsResult": { # Result of the categorical stats computation.
+ "categoricalStatsResult": { # Result of the categorical stats computation. # Categorical stats result
"valueFrequencyHistogramBuckets": [ # Histogram of value frequencies in the column.
- {
+ { # Histogram of value frequencies in the column.
"bucketValues": [ # Sample of value frequencies in this bucket. The total number of
# values returned per bucket is capped at 20.
{ # A value of a field, including its frequency.
@@ -4258,10 +4509,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -4271,7 +4522,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -4281,17 +4532,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
},
],
@@ -4302,7 +4553,7 @@
},
],
},
- "deltaPresenceEstimationResult": { # Result of the δ-presence computation. Note that these results are an
+ "deltaPresenceEstimationResult": { # Result of the δ-presence computation. Note that these results are an # Delta-presence result
# estimation, not exact values.
"deltaPresenceEstimationHistogram": [ # The intervals [min_probability, max_probability) do not overlap. If a
# value doesn't correspond to any such interval, the associated frequency
@@ -4331,10 +4582,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -4344,7 +4595,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -4354,17 +4605,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"estimatedProbability": 3.14, # The estimated probability that a given individual sharing these
@@ -4387,8 +4638,8 @@
"requestedSourceTable": { # Message defining the location of a BigQuery table. A table is uniquely # Input dataset to compute metrics over.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
@@ -4406,14 +4657,14 @@
</div>
<div class="method">
- <code class="details" id="list">list(parent, orderBy=None, type=None, pageSize=None, pageToken=None, x__xgafv=None, filter=None)</code>
+ <code class="details" id="list">list(parent, orderBy=None, pageSize=None, x__xgafv=None, pageToken=None, type=None, locationId=None, filter=None)</code>
<pre>Lists DlpJobs that match the specified filter in the request.
See https://cloud.google.com/dlp/docs/inspecting-storage and
https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
Args:
- parent: string, The parent resource name, for example projects/my-project-id. (required)
- orderBy: string, Optional comma separated list of fields to order by,
+ parent: string, Required. The parent resource name, for example projects/my-project-id. (required)
+ orderBy: string, Comma separated list of fields to order by,
followed by `asc` or `desc` postfix. This list is case-insensitive,
default sorting order is ascending, redundant space characters are
insignificant.
@@ -4426,21 +4677,23 @@
- `end_time`: corresponds to time the job ended.
- `name`: corresponds to job's name.
- `state`: corresponds to `state`
- type: string, The type of job. Defaults to `DlpJobType.INSPECT`
pageSize: integer, The standard list page size.
- pageToken: string, The standard list page token.
x__xgafv: string, V1 error format.
Allowed values
1 - v1 error format
2 - v2 error format
- filter: string, Optional. Allows filtering.
+ pageToken: string, The standard list page token.
+ type: string, The type of job. Defaults to `DlpJobType.INSPECT`
+ locationId: string, The geographic location where jobs will be retrieved from.
+Use `-` for all locations. Reserved for future extensions.
+ filter: string, Allows filtering.
Supported syntax:
* Filter expressions are made up of one or more restrictions.
* Restrictions can be combined by `AND` or `OR` logical operators. A
sequence of restrictions implicitly uses `AND`.
-* A restriction has the form of `<field> <operator> <value>`.
+* A restriction has the form of `{field} {operator} {value}`.
* Supported fields/values for inspect jobs:
- `state` - PENDING|RUNNING|CANCELED|FINISHED|FAILED
- `inspected_storage` - DATASTORE|CLOUD_STORAGE|BIGQUERY
@@ -4458,7 +4711,7 @@
* inspected_storage = cloud_storage AND state = done
* inspected_storage = cloud_storage OR inspected_storage = bigquery
* inspected_storage = cloud_storage AND (state = done OR state = canceled)
-* end_time > \"2017-12-12T00:00:00+00:00\"
+* end_time > \"2017-12-12T00:00:00+00:00\"
The length of this field should be no more than 500 characters.
@@ -4472,11 +4725,10 @@
"errors": [ # A stream of errors encountered running the job.
{ # Details information about an error encountered during job execution or
# the results of an unsuccessful activation of the JobTrigger.
- # Output only field.
"timestamps": [ # The times the error occurred.
"A String",
],
- "details": { # The `Status` type defines a logical error model that is suitable for
+ "details": { # The `Status` type defines a logical error model that is suitable for # Detailed error codes and messages.
# different programming environments, including REST APIs and RPC APIs. It is
# used by [gRPC](https://github.com/grpc). Each `Status` message contains
# three pieces of data: error code, error message, and error details.
@@ -4498,20 +4750,20 @@
],
"name": "A String", # The server-assigned name.
"inspectDetails": { # The results of an inspect DataSource job. # Results from inspecting a data source.
- "requestedOptions": { # The configuration used for this job.
+ "requestedOptions": { # Snapshot of the inspection configuration. # The configuration used for this job.
"snapshotInspectTemplate": { # The inspectTemplate contains a configuration (set of types of sensitive data # If run with an InspectTemplate, a snapshot of its state at the time of
# this run.
# to be detected) to be used anywhere you otherwise would normally specify
# InspectConfig. See https://cloud.google.com/dlp/docs/concepts-templates
# to learn more.
- "updateTime": "A String", # The last update timestamp of a inspectTemplate, output only field.
+ "updateTime": "A String", # Output only. The last update timestamp of an inspectTemplate.
"displayName": "A String", # Display name (max 256 chars).
"description": "A String", # Short description (max 256 chars).
"inspectConfig": { # Configuration description of the scanning process. # The core content of the template. Configuration of the scanning process.
# When used with redactContent only info_types and min_likelihood are currently
# used.
"excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "limits": {
+ "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
"maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
@@ -4526,13 +4778,13 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"maxFindings": 42, # Max findings limit for the given infoType.
},
],
"maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
- # When set within `InspectDataSourceRequest`,
+ # When set within `InspectJobConfig`,
# the maximum returned is 2000 regardless if this is set higher.
# When set within `InspectContentRequest`, this field is ignored.
},
@@ -4572,7 +4824,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
# be used to match sensitive information specific to the data, such as a list
@@ -4636,8 +4888,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -4691,8 +4943,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -4741,7 +4993,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -4791,7 +5043,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -4808,99 +5060,28 @@
# system may automatically choose what detectors to run. By default this may
# be all types, but may change over time as detectors are updated.
#
- # The special InfoType name "ALL_BASIC" can be used to trigger all detectors,
- # but may change over time as new InfoTypes are added. If you need precise
- # control and predictability as to what detectors are run you should specify
- # specific InfoTypes listed in the reference.
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
{ # Type of information detected by the API.
"name": "A String", # Name of the information type. Either a name of your choosing when
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
- "createTime": "A String", # The creation timestamp of a inspectTemplate, output only field.
- "name": "A String", # The template name. Output only.
+ "createTime": "A String", # Output only. The creation timestamp of an inspectTemplate.
+ "name": "A String", # Output only. The template name.
#
# The template will have one of the following formats:
# `projects/PROJECT_ID/inspectTemplates/TEMPLATE_ID` OR
- # `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`
+ # `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`;
},
- "jobConfig": {
+ "jobConfig": { # Controls what and how to inspect for findings. # Inspect config.
"storageConfig": { # Shared message indicating Cloud storage type. # The data to scan.
- "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options specification.
- "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- # A partition ID identifies a grouping of entities. The grouping is always
- # by project and namespace, however the namespace ID may be empty.
- #
- # A partition ID contains several dimensions:
- # project ID and namespace ID.
- "projectId": "A String", # The ID of the project to which the entities belong.
- "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
- },
- "kind": { # A representation of a Datastore kind. # The kind to process.
- "name": "A String", # The name of the kind.
- },
- },
- "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options specification.
- "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
- # inspection of entire columns which you know have no findings.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
- # rest of the rows are omitted. If not set, or if set to 0, all rows will be
- # scanned. Only one of rows_limit and rows_limit_percent can be specified.
- # Cannot be used in conjunction with TimespanConfig.
- "sampleMethod": "A String",
- "identifyingFields": [ # References to fields uniquely identifying rows within the table.
- # Nested fields in the format, like `person.birthdate.year`, are allowed.
- { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- ],
- "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
- # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
- # 100 means no limit. Defaults to 0. Only one of rows_limit and
- # rows_limit_percent can be specified. Cannot be used in conjunction with
- # TimespanConfig.
- "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "tableId": "A String", # Name of the table.
- "datasetId": "A String", # Dataset ID of the table.
- },
- },
- "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
- # Currently only supported when inspecting Google Cloud Storage and BigQuery.
- "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
- # Used for data sources like Datastore or BigQuery.
- # If not specified for BigQuery, table last modification timestamp
- # is checked against given time span.
- # The valid data types of the timestamp field are:
- # for BigQuery - timestamp, date, datetime;
- # for Datastore - timestamp.
- # Datastore entity will be scanned if the timestamp property does not exist
- # or its value is empty or invalid.
- "name": "A String", # Name describing the field.
- },
- "endTime": "A String", # Exclude files or rows newer than this value.
- # If set to zero, no upper time limit is applied.
- "startTime": "A String", # Exclude files or rows older than this value.
- "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
- # a valid start_time to avoid scanning files that have not been modified
- # since the last time the JobTrigger executed. This will be based on the
- # time of the execution of the last run of the JobTrigger.
- },
- "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options specification.
+ "cloudStorageOptions": { # Options defining a file or a set of files within a Google Cloud Storage # Google Cloud Storage options.
# bucket.
"bytesLimitPerFile": "A String", # Max number of bytes to scan from a file. If a scanned file's size is bigger
# than this value then the rest of the bytes are omitted. Only one
@@ -4908,7 +5089,7 @@
"sampleMethod": "A String",
"fileSet": { # Set of files to scan. # The set of one or more files to scan.
"url": "A String", # The Cloud Storage url of the file(s) to scan, in the format
- # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
+ # `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
#
# If the url ends in a trailing slash, the bucket or directory represented
# by the url will be scanned non-recursively (content in sub-directories
@@ -4973,26 +5154,151 @@
],
},
},
+ "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
+ # Number of files scanned is rounded down. Must be between 0 and 100,
+ # inclusively. Both 0 and 100 means no limit. Defaults to 0.
"bytesLimitPerFilePercent": 42, # Max percentage of bytes to scan from a file. The rest are omitted. The
# number of bytes scanned is rounded down. Must be between 0 and 100,
# inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
# of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
- "filesLimitPercent": 42, # Limits the number of files to scan to this percentage of the input FileSet.
- # Number of files scanned is rounded down. Must be between 0 and 100,
- # inclusively. Both 0 and 100 means no limit. Defaults to 0.
"fileTypes": [ # List of file type groups to include in the scan.
# If empty, all files are scanned and available data format processors
# are applied. In addition, the binary content of the selected files
# is always scanned as well.
+ # Images are scanned only as binary if the specified region
+ # does not support image inspection and no file_types were specified.
+ # Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
"A String",
],
},
+ "datastoreOptions": { # Options defining a data set within Google Cloud Datastore. # Google Cloud Datastore options.
+ "partitionId": { # Datastore partition ID. # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ # A partition ID identifies a grouping of entities. The grouping is always
+ # by project and namespace, however the namespace ID may be empty.
+ #
+ # A partition ID contains several dimensions:
+ # project ID and namespace ID.
+ "projectId": "A String", # The ID of the project to which the entities belong.
+ "namespaceId": "A String", # If not empty, the ID of the namespace to which the entities belong.
+ },
+ "kind": { # A representation of a Datastore kind. # The kind to process.
+ "name": "A String", # The name of the kind.
+ },
+ },
+ "bigQueryOptions": { # Options defining BigQuery table and row identifiers. # BigQuery options.
+ "excludedFields": [ # References to fields excluded from scanning. This allows you to skip
+ # inspection of entire columns which you know have no findings.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimit": "A String", # Max number of rows to scan. If the table has more rows than this value, the
+ # rest of the rows are omitted. If not set, or if set to 0, all rows will be
+ # scanned. Only one of rows_limit and rows_limit_percent can be specified.
+ # Cannot be used in conjunction with TimespanConfig.
+ "sampleMethod": "A String",
+ "identifyingFields": [ # Table fields that may uniquely identify a row within the table. When
+ # `actions.saveFindings.outputConfig.table` is specified, the values of
+ # columns specified here are available in the output table under
+ # `location.content_locations.record_location.record_key.id_values`. Nested
+ # fields such as `person.birthdate.year` are allowed.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ "rowsLimitPercent": 42, # Max percentage of rows to scan. The rest are omitted. The number of rows
+ # scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
+ # 100 means no limit. Defaults to 0. Only one of rows_limit and
+ # rows_limit_percent can be specified. Cannot be used in conjunction with
+ # TimespanConfig.
+ "tableReference": { # Message defining the location of a BigQuery table. A table is uniquely # Complete BigQuery table reference.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "tableId": "A String", # Name of the table.
+ "datasetId": "A String", # Dataset ID of the table.
+ },
+ },
+ "timespanConfig": { # Configuration of the timespan of the items to include in scanning.
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
+ "timestampField": { # General identifier of a data field in a storage service. # Specification of the field containing the timestamp of scanned items.
+ # Used for data sources like Datastore and BigQuery.
+ #
+ # For BigQuery:
+ # Required to filter out rows based on the given start and
+ # end times. If not specified and the table was modified between the given
+ # start and end times, the entire table will be scanned.
+ # The valid data types of the timestamp field are: `INTEGER`, `DATE`,
+ # `TIMESTAMP`, or `DATETIME` BigQuery column.
+ #
+ # For Datastore.
+ # Valid data types of the timestamp field are: `TIMESTAMP`.
+ # Datastore entity will be scanned if the timestamp property does not
+ # exist or its value is empty or invalid.
+ "name": "A String", # Name describing the field.
+ },
+ "endTime": "A String", # Exclude files or rows newer than this value.
+ # If set to zero, no upper time limit is applied.
+ "startTime": "A String", # Exclude files or rows older than this value.
+ "enableAutoPopulationOfTimespanConfig": True or False, # When the job is started by a JobTrigger we will automatically figure out
+ # a valid start_time to avoid scanning files that have not been modified
+ # since the last time the JobTrigger executed. This will be based on the
+ # time of the execution of the last run of the JobTrigger.
+ },
+ "hybridOptions": { # Configuration to control jobs where the content being inspected is outside # Hybrid inspection options.
+ # Early access feature is in a pre-release state and might change or have
+ # limited support. For more information, see
+ # https://cloud.google.com/products#product-launch-stages.
+ # of Google Cloud Platform.
+ "tableOptions": { # Instructions regarding the table content being inspected. # If the container is a table, additional information to make findings
+ # meaningful such as the columns that are primary keys.
+ "identifyingFields": [ # The columns that are the primary keys for table objects included in
+ # ContentItem. A copy of this cell's value will stored alongside alongside
+ # each finding so that the finding can be traced to the specific row it came
+ # from. No more than 3 may be provided.
+ { # General identifier of a data field in a storage service.
+ "name": "A String", # Name describing the field.
+ },
+ ],
+ },
+ "labels": { # To organize findings, these labels will be added to each finding.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # Label values must be between 0 and 63 characters long and must conform
+ # to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
+ #
+ # No more than 10 labels can be associated with a given finding.
+ #
+ # Examples:
+ # * `"environment" : "production"`
+ # * `"pipeline" : "etl"`
+ "a_key": "A String",
+ },
+ "requiredFindingLabelKeys": [ # These are labels that each inspection request must include within their
+ # 'finding_labels' map. Request may contain others, but any missing one of
+ # these will be rejected.
+ #
+ # Label keys must be between 1 and 63 characters long and must conform
+ # to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
+ #
+ # No more than 10 keys can be required.
+ "A String",
+ ],
+ "description": "A String", # A short description of where the data is coming from. Will be stored once
+ # in the job. 256 max length.
+ },
},
"inspectConfig": { # Configuration description of the scanning process. # How and what to scan for.
# When used with redactContent only info_types and min_likelihood are currently
# used.
"excludeInfoTypes": True or False, # When true, excludes type information of the findings.
- "limits": {
+ "limits": { # Configuration to control the number of findings returned. # Configuration to control the number of findings returned.
"maxFindingsPerRequest": 42, # Max number of findings that will be returned per request/job.
# When set within `InspectContentRequest`, the maximum returned is 2000
# regardless if this is set higher.
@@ -5007,13 +5313,13 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"maxFindings": 42, # Max findings limit for the given infoType.
},
],
"maxFindingsPerItem": 42, # Max number of findings that will be returned for each item scanned.
- # When set within `InspectDataSourceRequest`,
+ # When set within `InspectJobConfig`,
# the maximum returned is 2000 regardless if this is set higher.
# When set within `InspectContentRequest`, this field is ignored.
},
@@ -5053,7 +5359,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"dictionary": { # Custom information type based on a dictionary of words or phrases. This can # A list of phrases to detect as a CustomInfoType.
# be used to match sensitive information specific to the data, such as a list
@@ -5117,8 +5423,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -5172,8 +5478,8 @@
# a company office using the hotword regex "\(xxx\)", where "xxx"
# is the area code in question.
# rule.
- "windowAfter": 42, # Number of characters after the finding to consider.
"windowBefore": 42, # Number of characters before the finding to consider.
+ "windowAfter": 42, # Number of characters after the finding to consider.
},
"hotwordRegex": { # Message defining a custom regular expression. # Regular expression pattern defining what qualifies as a hotword.
"pattern": "A String", # Pattern defining the regular expression. Its syntax
@@ -5222,7 +5528,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -5272,7 +5578,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -5289,16 +5595,15 @@
# system may automatically choose what detectors to run. By default this may
# be all types, but may change over time as detectors are updated.
#
- # The special InfoType name "ALL_BASIC" can be used to trigger all detectors,
- # but may change over time as new InfoTypes are added. If you need precise
- # control and predictability as to what detectors are run you should specify
- # specific InfoTypes listed in the reference.
+ # If you need precise control and predictability as to what detectors are
+ # run you should specify specific InfoTypes listed in the reference,
+ # otherwise a default list will be used, which may change over time.
{ # Type of information detected by the API.
"name": "A String", # Name of the information type. Either a name of your choosing when
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
],
},
@@ -5312,7 +5617,7 @@
# OutputStorageConfig. Only a single instance of this action can be
# specified.
# Compatible with: Inspect, Risk
- "outputConfig": { # Cloud repository for storing output.
+ "outputConfig": { # Cloud repository for storing output. # Location to store findings outside of DLP.
"table": { # Message defining the location of a BigQuery table. A table is uniquely # Store findings in an existing table or a new table in an existing
# dataset. If table_id is not set a new one will be generated
# for you with the following format:
@@ -5329,8 +5634,8 @@
# quasi-identifiers, cannot store their results in the same table.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
@@ -5345,9 +5650,10 @@
# If unspecified, then all available columns will be used for a new table or
# an (existing) table with no schema, and no changes will be made to an
# existing table that has a schema.
+ # Only for use with external storage.
},
},
- "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification to project owners and editors on job's
+ "jobNotificationEmails": { # Enable email notification to project owners and editors on jobs's # Enable email notification for project owners and editors on job's
# completion/failure.
# completion/failure.
},
@@ -5362,6 +5668,22 @@
# Only a single instance of this action can be specified.
# Compatible with: Inspect
},
+ "publishToStackdriver": { # Enable Stackdriver metric dlp.googleapis.com/finding_count. This # Enable Stackdriver metric dlp.googleapis.com/finding_count.
+ # will publish a metric to stack driver on each infotype requested and
+ # how many findings were found for it. CustomDetectors will be bucketed
+ # as 'Custom' under the Stackdriver label 'info_type'.
+ },
+ "publishFindingsToCloudDataCatalog": { # Publish findings of a DlpJob to Cloud Data Catalog. Labels summarizing the # Publish findings to Cloud Datahub.
+ # results of the DlpJob will be applied to the entry for the resource scanned
+ # in Cloud Data Catalog. Any labels previously written by another DlpJob will
+ # be deleted. InfoType naming patterns are strictly enforced when using this
+ # feature. Note that the findings will be persisted in Cloud Data Catalog
+ # storage and are governed by Data Catalog service-specific policy, see
+ # https://cloud.google.com/terms/service-terms
+ # Only a single instance of this action can be specified and only allowed if
+ # all resources being scanned are BigQuery tables.
+ # Compatible with: Inspect
+ },
"pubSub": { # Publish a message into given Pub/Sub topic when DlpJob has completed. The # Publish a notification to a pubsub topic.
# message contains a single field, `DlpJobName`, which is equal to the
# finished job's
@@ -5386,16 +5708,29 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
},
],
"totalEstimatedBytes": "A String", # Estimate of the number of bytes to process.
"processedBytes": "A String", # Total size in bytes that were processed.
+ "hybridStats": { # Statistics related to processing hybrid inspect requests. # Statistics related to the processing of hybrid inspect.
+ # Early access feature is in a pre-release state and might change or have
+ # limited support. For more information, see
+ # https://cloud.google.com/products#product-launch-stages.
+ "abortedCount": "A String", # The number of hybrid inspection requests aborted because the job ran
+ # out of quota or was ended before they could be processed.
+ "pendingCount": "A String", # The number of hybrid requests currently being processed. Only populated
+ # when called via method `getDlpJob`.
+ # A burst of traffic may cause hybrid inspect requests to be enqueued.
+ # Processing will take place as quickly as possible, but resource limitations
+ # may impact how long a request is enqueued for.
+ "processedCount": "A String", # The number of hybrid inspection requests processed within this job.
+ },
},
},
"riskDetails": { # Result of a risk analysis operation request. # Results from analyzing risk of a data source.
- "numericalStatsResult": { # Result of the numerical stats computation.
+ "numericalStatsResult": { # Result of the numerical stats computation. # Numerical stats result
"quantileValues": [ # List of 99 values that partition the set of field values into 100 equal
# sized buckets.
{ # Set of primitive values supported by the system.
@@ -5404,10 +5739,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -5417,7 +5752,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -5427,17 +5762,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"maxValue": { # Set of primitive values supported by the system. # Maximum value appearing in the column.
@@ -5446,10 +5781,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -5459,7 +5794,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -5469,17 +5804,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
"minValue": { # Set of primitive values supported by the system. # Minimum value appearing in the column.
# Note that for the purposes of inspection or transformation, the number
@@ -5487,10 +5822,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -5500,7 +5835,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -5510,20 +5845,20 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
},
- "kMapEstimationResult": { # Result of the reidentifiability analysis. Note that these results are an
+ "kMapEstimationResult": { # Result of the reidentifiability analysis. Note that these results are an # K-map result
# estimation, not exact values.
"kMapEstimationHistogram": [ # The intervals [min_anonymity, max_anonymity] do not overlap. If a value
# doesn't correspond to any such interval, the associated frequency is
@@ -5552,10 +5887,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -5565,7 +5900,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -5575,17 +5910,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
},
@@ -5597,9 +5932,9 @@
},
],
},
- "kAnonymityResult": { # Result of the k-anonymity computation.
+ "kAnonymityResult": { # Result of the k-anonymity computation. # K-anonymity result
"equivalenceClassHistogramBuckets": [ # Histogram of k-anonymity equivalence classes.
- {
+ { # Histogram of k-anonymity equivalence classes.
"bucketValues": [ # Sample of equivalence classes in this bucket. The total number of
# classes returned per bucket is capped at 20.
{ # The set of columns' values that share the same ldiversity value
@@ -5612,10 +5947,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -5625,7 +5960,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -5635,17 +5970,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"equivalenceClassSize": "A String", # Size of the equivalence class, for example number of rows with the
@@ -5659,9 +5994,9 @@
},
],
},
- "lDiversityResult": { # Result of the l-diversity computation.
+ "lDiversityResult": { # Result of the l-diversity computation. # L-divesity result
"sensitiveValueFrequencyHistogramBuckets": [ # Histogram of l-diversity equivalence class sensitive value frequencies.
- {
+ { # Histogram of l-diversity equivalence class sensitive value frequencies.
"bucketValues": [ # Sample of equivalence classes in this bucket. The total number of
# classes returned per bucket is capped at 20.
{ # The set of columns' values that share the same ldiversity value.
@@ -5674,10 +6009,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -5687,7 +6022,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -5697,17 +6032,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"topSensitiveValues": [ # Estimated frequencies of top sensitive values.
@@ -5719,10 +6054,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -5732,7 +6067,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -5742,17 +6077,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
},
],
@@ -5769,27 +6104,26 @@
],
},
"requestedPrivacyMetric": { # Privacy metric to compute for reidentification risk analysis. # Privacy metric to compute.
- "numericalStatsConfig": { # Compute numerical stats over an individual column, including
+ "numericalStatsConfig": { # Compute numerical stats over an individual column, including # Numerical stats
# min, max, and quantiles.
"field": { # General identifier of a data field in a storage service. # Field to compute numerical stats on. Supported types are
# integer, float, date, datetime, timestamp, time.
"name": "A String", # Name describing the field.
},
},
- "kMapEstimationConfig": { # Reidentifiability metric. This corresponds to a risk model similar to what
+ "kMapEstimationConfig": { # Reidentifiability metric. This corresponds to a risk model similar to what # k-map
# is called "journalist risk" in the literature, except the attack dataset is
# statistically modeled instead of being perfectly known. This can be done
# using publicly available data (like the US Census), or using a custom
# statistical model (indicated as one or several BigQuery tables), or by
# extrapolating from the distribution of values in the input dataset.
- # A column with a semantic tag attached.
"regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
- # Required if no column is tagged with a region-specific InfoType (like
+ # Set if no column is tagged with a region-specific InfoType (like
# US_ZIP_5) or a region code.
- "quasiIds": [ # Fields considered to be quasi-identifiers. No two columns can have the
- # same tag. [required]
- {
- "field": { # General identifier of a data field in a storage service. # Identifies the column. [required]
+ "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two columns can have the
+ # same tag.
+ { # A column with a semantic tag attached.
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
"name": "A String", # Name describing the field.
},
"customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
@@ -5804,7 +6138,7 @@
# creating a CustomInfoType, or one of the names listed
# at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
# a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
+ # `[a-zA-Z0-9_]{1,64}`.
},
"inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
# the distribution of values in the input data
@@ -5829,34 +6163,33 @@
# If a tuple is present in the data but not in the auxiliary table, the
# corresponding relative frequency is assumed to be zero (and thus, the
# tuple is highly reidentifiable).
- "relativeFrequency": { # General identifier of a data field in a storage service. # The relative frequency column must contain a floating-point number
- # between 0 and 1 (inclusive). Null values are assumed to be zero.
- # [required]
- "name": "A String", # Name describing the field.
- },
- "quasiIds": [ # Quasi-identifier columns. [required]
- { # A quasi-identifier column has a custom_tag, used to know which column
- # in the data corresponds to which column in the statistical model.
- "field": { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String",
- },
- ],
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Auxiliary table location. [required]
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.
"datasetId": "A String", # Dataset ID of the table.
},
+ "quasiIds": [ # Required. Quasi-identifier columns.
+ { # A quasi-identifier column has a custom_tag, used to know which column
+ # in the data corresponds to which column in the statistical model.
+ "field": { # General identifier of a data field in a storage service. # Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A auxiliary field.
+ },
+ ],
+ "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
+ # between 0 and 1 (inclusive). Null values are assumed to be zero.
+ "name": "A String", # Name describing the field.
+ },
},
],
},
- "lDiversityConfig": { # l-diversity metric, used for analysis of reidentification risk.
+ "lDiversityConfig": { # l-diversity metric, used for analysis of reidentification risk. # l-diversity
"sensitiveAttribute": { # General identifier of a data field in a storage service. # Sensitive field for computing the l-value.
"name": "A String", # Name describing the field.
},
@@ -5868,94 +6201,8 @@
},
],
},
- "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to
- # figure out that one given individual appears in a de-identified dataset.
- # Similarly to the k-map metric, we cannot compute δ-presence exactly without
- # knowing the attack dataset, so we use a statistical model instead.
- "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
- # Required if no column is tagged with a region-specific InfoType (like
- # US_ZIP_5) or a region code.
- "quasiIds": [ # Fields considered to be quasi-identifiers. No two fields can have the
- # same tag. [required]
- { # A column with a semantic tag attached.
- "field": { # General identifier of a data field in a storage service. # Identifies the column. [required]
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
- # indicate an auxiliary table that contains statistical information on
- # the possible values of this column (below).
- "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
- # dataset as a statistical model of population, if available. We
- # currently support US ZIP codes, region codes, ages and genders.
- # To programmatically obtain the list of supported InfoTypes, use
- # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
- "name": "A String", # Name of the information type. Either a name of your choosing when
- # creating a CustomInfoType, or one of the names listed
- # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
- # a built-in type. InfoType names should conform to the pattern
- # [a-zA-Z0-9_]{1,64}.
- },
- "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
- # the distribution of values in the input data
- # empty messages in your APIs. A typical example is to use it as the request
- # or the response type of an API method. For instance:
- #
- # service Foo {
- # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
- # }
- #
- # The JSON representation for `Empty` is empty JSON object `{}`.
- },
- },
- ],
- "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
- # used to tag a quasi-identifiers field must appear in exactly one
- # field of one auxiliary table.
- { # An auxiliary table containing statistical information on the relative
- # frequency of different quasi-identifiers values. It has one or several
- # quasi-identifiers columns, and one column that indicates the relative
- # frequency of each quasi-identifier tuple.
- # If a tuple is present in the data but not in the auxiliary table, the
- # corresponding relative frequency is assumed to be zero (and thus, the
- # tuple is highly reidentifiable).
- "relativeFrequency": { # General identifier of a data field in a storage service. # The relative frequency column must contain a floating-point number
- # between 0 and 1 (inclusive). Null values are assumed to be zero.
- # [required]
- "name": "A String", # Name describing the field.
- },
- "quasiIds": [ # Quasi-identifier columns. [required]
- { # A quasi-identifier column has a custom_tag, used to know which column
- # in the data corresponds to which column in the statistical model.
- "field": { # General identifier of a data field in a storage service.
- "name": "A String", # Name describing the field.
- },
- "customTag": "A String",
- },
- ],
- "table": { # Message defining the location of a BigQuery table. A table is uniquely # Auxiliary table location. [required]
- # identified by its project_id, dataset_id, and table_name. Within a query
- # a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
- "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
- # If omitted, project ID is inferred from the API call.
- "tableId": "A String", # Name of the table.
- "datasetId": "A String", # Dataset ID of the table.
- },
- },
- ],
- },
- "categoricalStatsConfig": { # Compute numerical stats over an individual column, including
- # number of distinct values and value count distribution.
- "field": { # General identifier of a data field in a storage service. # Field to compute categorical stats on. All column types are
- # supported except for arrays and structs. However, it may be more
- # informative to use NumericalStats when the field type is supported,
- # depending on the data.
- "name": "A String", # Name describing the field.
- },
- },
- "kAnonymityConfig": { # k-anonymity metric, used for analysis of reidentification risk.
- "entityId": { # An entity in a dataset is a field or set of fields that correspond to a # Optional message indicating that multiple rows might be associated to a
+ "kAnonymityConfig": { # k-anonymity metric, used for analysis of reidentification risk. # K-anonymity
+ "entityId": { # An entity in a dataset is a field or set of fields that correspond to a # Message indicating that multiple rows might be associated to a
# single individual. If the same entity_id is associated to multiple
# quasi-identifier tuples over distinct rows, we consider the entire
# collection of tuples as the composite quasi-identifier. This collection
@@ -5983,10 +6230,97 @@
},
],
},
+ "categoricalStatsConfig": { # Compute numerical stats over an individual column, including # Categorical stats
+ # number of distinct values and value count distribution.
+ "field": { # General identifier of a data field in a storage service. # Field to compute categorical stats on. All column types are
+ # supported except for arrays and structs. However, it may be more
+ # informative to use NumericalStats when the field type is supported,
+ # depending on the data.
+ "name": "A String", # Name describing the field.
+ },
+ },
+ "deltaPresenceEstimationConfig": { # δ-presence metric, used to estimate how likely it is for an attacker to # delta-presence
+ # figure out that one given individual appears in a de-identified dataset.
+ # Similarly to the k-map metric, we cannot compute δ-presence exactly without
+ # knowing the attack dataset, so we use a statistical model instead.
+ "regionCode": "A String", # ISO 3166-1 alpha-2 region code to use in the statistical modeling.
+ # Set if no column is tagged with a region-specific InfoType (like
+ # US_ZIP_5) or a region code.
+ "quasiIds": [ # Required. Fields considered to be quasi-identifiers. No two fields can have the
+ # same tag.
+ { # A column with a semantic tag attached.
+ "field": { # General identifier of a data field in a storage service. # Required. Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ "infoType": { # Type of information detected by the API. # A column can be tagged with a InfoType to use the relevant public
+ # dataset as a statistical model of population, if available. We
+ # currently support US ZIP codes, region codes, ages and genders.
+ # To programmatically obtain the list of supported InfoTypes, use
+ # ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
+ "name": "A String", # Name of the information type. Either a name of your choosing when
+ # creating a CustomInfoType, or one of the names listed
+ # at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
+ # a built-in type. InfoType names should conform to the pattern
+ # `[a-zA-Z0-9_]{1,64}`.
+ },
+ "inferred": { # A generic empty message that you can re-use to avoid defining duplicated # If no semantic tag is indicated, we infer the statistical model from
+ # the distribution of values in the input data
+ # empty messages in your APIs. A typical example is to use it as the request
+ # or the response type of an API method. For instance:
+ #
+ # service Foo {
+ # rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
+ # }
+ #
+ # The JSON representation for `Empty` is empty JSON object `{}`.
+ },
+ },
+ ],
+ "auxiliaryTables": [ # Several auxiliary tables can be used in the analysis. Each custom_tag
+ # used to tag a quasi-identifiers field must appear in exactly one
+ # field of one auxiliary table.
+ { # An auxiliary table containing statistical information on the relative
+ # frequency of different quasi-identifiers values. It has one or several
+ # quasi-identifiers columns, and one column that indicates the relative
+ # frequency of each quasi-identifier tuple.
+ # If a tuple is present in the data but not in the auxiliary table, the
+ # corresponding relative frequency is assumed to be zero (and thus, the
+ # tuple is highly reidentifiable).
+ "relativeFrequency": { # General identifier of a data field in a storage service. # Required. The relative frequency column must contain a floating-point number
+ # between 0 and 1 (inclusive). Null values are assumed to be zero.
+ "name": "A String", # Name describing the field.
+ },
+ "quasiIds": [ # Required. Quasi-identifier columns.
+ { # A quasi-identifier column has a custom_tag, used to know which column
+ # in the data corresponds to which column in the statistical model.
+ "field": { # General identifier of a data field in a storage service. # Identifies the column.
+ "name": "A String", # Name describing the field.
+ },
+ "customTag": "A String", # A column can be tagged with a custom tag. In this case, the user must
+ # indicate an auxiliary table that contains statistical information on
+ # the possible values of this column (below).
+ },
+ ],
+ "table": { # Message defining the location of a BigQuery table. A table is uniquely # Required. Auxiliary table location.
+ # identified by its project_id, dataset_id, and table_name. Within a query
+ # a table is often referenced with a string in the format of:
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
+ "projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
+ # If omitted, project ID is inferred from the API call.
+ "tableId": "A String", # Name of the table.
+ "datasetId": "A String", # Dataset ID of the table.
+ },
+ },
+ ],
+ },
},
- "categoricalStatsResult": { # Result of the categorical stats computation.
+ "categoricalStatsResult": { # Result of the categorical stats computation. # Categorical stats result
"valueFrequencyHistogramBuckets": [ # Histogram of value frequencies in the column.
- {
+ { # Histogram of value frequencies in the column.
"bucketValues": [ # Sample of value frequencies in this bucket. The total number of
# values returned per bucket is capped at 20.
{ # A value of a field, including its frequency.
@@ -5997,10 +6331,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -6010,7 +6344,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -6020,17 +6354,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
},
],
@@ -6041,7 +6375,7 @@
},
],
},
- "deltaPresenceEstimationResult": { # Result of the δ-presence computation. Note that these results are an
+ "deltaPresenceEstimationResult": { # Result of the δ-presence computation. Note that these results are an # Delta-presence result
# estimation, not exact values.
"deltaPresenceEstimationHistogram": [ # The intervals [min_probability, max_probability) do not overlap. If a
# value doesn't correspond to any such interval, the associated frequency
@@ -6070,10 +6404,10 @@
# as a UTF-8 encoded string. For example, if 'integer_value' is set to
# 123456789, the number of bytes would be counted as 9, even though an
# int64 only holds up to 8 bytes of data.
- "floatValue": 3.14,
- "timestampValue": "A String",
- "dayOfWeekValue": "A String",
- "timeValue": { # Represents a time of day. The date and time zone are either not significant
+ "floatValue": 3.14, # float
+ "timestampValue": "A String", # timestamp
+ "dayOfWeekValue": "A String", # day of week
+ "timeValue": { # Represents a time of day. The date and time zone are either not significant # time of day
# or are specified elsewhere. An API may choose to allow leap seconds. Related
# types are google.type.Date and `google.protobuf.Timestamp`.
"hours": 42, # Hours of day in 24 hour format. Should be from 0 to 23. An API may choose
@@ -6083,7 +6417,7 @@
# allow the value 60 if it allows leap-seconds.
"minutes": 42, # Minutes of hour of day. Must be from 0 to 59.
},
- "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day
+ "dateValue": { # Represents a whole or partial calendar date, e.g. a birthday. The time of day # date
# and time zone are either specified elsewhere or are not significant. The date
# is relative to the Proleptic Gregorian Calendar. This can represent:
#
@@ -6093,17 +6427,17 @@
# * A year and month value, with a zero day, e.g. a credit card expiration date
#
# Related types are google.type.TimeOfDay and `google.protobuf.Timestamp`.
- "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
- # a year.
+ "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
+ # month and day.
"day": 42, # Day of month. Must be from 1 to 31 and valid for the year and month, or 0
# if specifying a year by itself or a year and month where the day is not
# significant.
- "month": 42, # Month of year. Must be from 1 to 12, or 0 if specifying a year without a
- # month and day.
+ "year": 42, # Year of date. Must be from 1 to 9999, or 0 if specifying a date without
+ # a year.
},
- "stringValue": "A String",
- "booleanValue": True or False,
- "integerValue": "A String",
+ "stringValue": "A String", # string
+ "booleanValue": True or False, # boolean
+ "integerValue": "A String", # integer
},
],
"estimatedProbability": 3.14, # The estimated probability that a given individual sharing these
@@ -6126,8 +6460,8 @@
"requestedSourceTable": { # Message defining the location of a BigQuery table. A table is uniquely # Input dataset to compute metrics over.
# identified by its project_id, dataset_id, and table_name. Within a query
# a table is often referenced with a string in the format of:
- # `<project_id>:<dataset_id>.<table_id>` or
- # `<project_id>.<dataset_id>.<table_id>`.
+ # `<project_id>:<dataset_id>.<table_id>` or
+ # `<project_id>.<dataset_id>.<table_id>`.
"projectId": "A String", # The Google Cloud Platform project ID of the project containing the table.
# If omitted, project ID is inferred from the API call.
"tableId": "A String", # Name of the table.