chore: regens API reference docs (#889)
diff --git a/docs/dyn/dataflow_v1b3.projects.jobs.html b/docs/dyn/dataflow_v1b3.projects.jobs.html
index 20a94c9..52c745d 100644
--- a/docs/dyn/dataflow_v1b3.projects.jobs.html
+++ b/docs/dyn/dataflow_v1b3.projects.jobs.html
@@ -96,7 +96,7 @@
<code><a href="#aggregated_next">aggregated_next(previous_request, previous_response)</a></code></p>
<p class="firstline">Retrieves the next page of results.</p>
<p class="toc_element">
- <code><a href="#create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
+ <code><a href="#create">create(projectId, body=None, location=None, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
<p class="firstline">Creates a Cloud Dataflow job.</p>
<p class="toc_element">
<code><a href="#get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</a></code></p>
@@ -111,10 +111,10 @@
<code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
<p class="firstline">Retrieves the next page of results.</p>
<p class="toc_element">
- <code><a href="#snapshot">snapshot(projectId, jobId, body, x__xgafv=None)</a></code></p>
+ <code><a href="#snapshot">snapshot(projectId, jobId, body=None, x__xgafv=None)</a></code></p>
<p class="firstline">Snapshot the state of a streaming job.</p>
<p class="toc_element">
- <code><a href="#update">update(projectId, jobId, body, location=None, x__xgafv=None)</a></code></p>
+ <code><a href="#update">update(projectId, jobId, body=None, location=None, x__xgafv=None)</a></code></p>
<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
<h3>Method Details</h3>
<div class="method">
@@ -141,8 +141,11 @@
Returns:
An object of the form:
- { # Response to a request to list Cloud Dataflow jobs. This may be a partial
- # response, depending on the page size in the ListJobsRequest.
+ { # Response to a request to list Cloud Dataflow jobs in a project. This might
+ # be a partial response, depending on the page size in the ListJobsRequest.
+ # However, if the project does not have any jobs, an instance of
+ # ListJobsResponse is not returned and the requests's response
+ # body is empty {}.
"nextPageToken": "A String", # Set if there may be more results than fit in this response.
"failedLocation": [ # Zero or more messages describing the [regional endpoints]
# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
@@ -164,7 +167,7 @@
#
# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
- # * Both keys and values are additionally constrained to be <= 128 bytes in
+ # * Both keys and values are additionally constrained to be <= 128 bytes in
# size.
"a_key": "A String",
},
@@ -211,9 +214,9 @@
"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
{ # Metadata for a BigQuery connector used by the job.
"projectId": "A String", # Project accessed in the connection.
- "dataset": "A String", # Dataset accessed in the connection.
- "table": "A String", # Table accessed in the connection.
"query": "A String", # Query used to access data in the connection.
+ "table": "A String", # Table accessed in the connection.
+ "dataset": "A String", # Dataset accessed in the connection.
},
],
},
@@ -232,13 +235,16 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -249,11 +255,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
"outputCollectionName": [ # User names for all collection outputs to this transform.
@@ -308,13 +311,16 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -325,11 +331,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
},
@@ -353,6 +356,11 @@
"a_key": "A String",
},
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
+ "workerRegion": "A String", # The Compute Engine region
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1". Mutually exclusive
+ # with worker_zone. If neither worker_region nor worker_zone is specified,
+ # default to the control plane's region.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
"a_key": "", # Properties of the object.
@@ -386,10 +394,11 @@
"userAgent": { # A description of the process that generated the request.
"a_key": "", # Properties of the object.
},
- "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
- # unspecified, the service will attempt to choose a reasonable
- # default. This should be in the form of the API service name,
- # e.g. "compute.googleapis.com".
+ "workerZone": "A String", # The Compute Engine zone
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
+ # with worker_region. If neither worker_region nor worker_zone is specified,
+ # a zone in the control plane's region is chosen based on available capacity.
"workerPools": [ # The worker pools. At least one "harness" worker pool must be
# specified in order for the job to have workers.
{ # Describes one particular pool of Cloud Dataflow workers to be
@@ -397,7 +406,27 @@
# computations required by a job. Note that a workflow job may use
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
+ "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
+ # harness, residing in Google Container Registry.
+ #
+ # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ },
"diskSourceImage": "A String", # Fully qualified source image for disks.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -411,6 +440,23 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
"commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "vmId": "A String", # The ID string of the VM.
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "root".
+ "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
+ # access the Cloud Dataflow API.
+ "A String",
+ ],
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -437,31 +483,6 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
- "vmId": "A String", # The ID string of the VM.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
- # access the Cloud Dataflow API.
- "A String",
- ],
- "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "root".
- "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
- #
- # When workers access Google Cloud APIs, they logically do so via
- # relative URLs. If this field is specified, it supplies the base
- # URL to use for resolving these relative URLs. The normative
- # algorithm used is defined by RFC 1808, "Relative Uniform Resource
- # Locators".
- #
- # If not specified, the default value is "http://www.googleapis.com/"
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
"harnessCommand": "A String", # The command to launch the worker harness.
"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
@@ -472,10 +493,22 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
+ #
+ # When workers access Google Cloud APIs, they logically do so via
+ # relative URLs. If this field is specified, it supplies the base
+ # URL to use for resolving these relative URLs. The normative
+ # algorithm used is defined by RFC 1808, "Relative Uniform Resource
+ # Locators".
+ #
+ # If not specified, the default value is "http://www.googleapis.com/"
},
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
"packages": [ # Packages to be installed on workers.
{ # The packages that must be installed in order for a worker to run the
# steps of the Cloud Dataflow job that will be assigned to its worker
@@ -495,13 +528,12 @@
"name": "A String", # The name of the package.
},
],
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
# attempt to choose a reasonable default.
"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
@@ -519,20 +551,11 @@
#
# If unknown or unspecified, the service will attempt to choose a reasonable
# default.
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
# execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
- "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
- # harness, residing in Google Container Registry.
"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
# the form "regions/REGION/subnetworks/SUBNETWORK".
"dataDisks": [ # Data disks that are used by a VM in this workflow.
@@ -560,20 +583,25 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
+ "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
+ # only be set in the Fn API path. For non-cross-language pipelines this
+ # should have only one entry. Cross-language pipelines will have two or more
+ # entries.
+ { # Defines a SDK harness container for executing Dataflow pipelines.
+ "containerImage": "A String", # A docker container image that resides in Google Container Registry.
+ "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
+ # container instance with this image. If false (or unset) recommends using
+ # more than one core per SDK container instance with this image for
+ # efficiency. Note that Dataflow service may choose to override this property
+ # if needed.
+ },
+ ],
},
],
+ "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
+ # unspecified, the service will attempt to choose a reasonable
+ # default. This should be in the form of the API service name,
+ # e.g. "compute.googleapis.com".
"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
# storage. The system will append the suffix "/temp-{JOBNAME} to
# this resource prefix, where {JOBNAME} is the value of the
@@ -670,13 +698,13 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
@@ -729,7 +757,7 @@
</div>
<div class="method">
- <code class="details" id="create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</code>
+ <code class="details" id="create">create(projectId, body=None, location=None, x__xgafv=None, replaceJobId=None, view=None)</code>
<pre>Creates a Cloud Dataflow job.
To create a job, we recommend using `projects.locations.jobs.create` with a
@@ -740,7 +768,7 @@
Args:
projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
- body: object, The request body. (required)
+ body: object, The request body.
The object takes the form of:
{ # Defines a job to be run by the Cloud Dataflow service.
@@ -751,7 +779,7 @@
#
# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
- # * Both keys and values are additionally constrained to be <= 128 bytes in
+ # * Both keys and values are additionally constrained to be <= 128 bytes in
# size.
"a_key": "A String",
},
@@ -798,9 +826,9 @@
"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
{ # Metadata for a BigQuery connector used by the job.
"projectId": "A String", # Project accessed in the connection.
- "dataset": "A String", # Dataset accessed in the connection.
- "table": "A String", # Table accessed in the connection.
"query": "A String", # Query used to access data in the connection.
+ "table": "A String", # Table accessed in the connection.
+ "dataset": "A String", # Dataset accessed in the connection.
},
],
},
@@ -819,13 +847,16 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -836,11 +867,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
"outputCollectionName": [ # User names for all collection outputs to this transform.
@@ -895,13 +923,16 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -912,11 +943,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
},
@@ -940,6 +968,11 @@
"a_key": "A String",
},
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
+ "workerRegion": "A String", # The Compute Engine region
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1". Mutually exclusive
+ # with worker_zone. If neither worker_region nor worker_zone is specified,
+ # default to the control plane's region.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
"a_key": "", # Properties of the object.
@@ -973,10 +1006,11 @@
"userAgent": { # A description of the process that generated the request.
"a_key": "", # Properties of the object.
},
- "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
- # unspecified, the service will attempt to choose a reasonable
- # default. This should be in the form of the API service name,
- # e.g. "compute.googleapis.com".
+ "workerZone": "A String", # The Compute Engine zone
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
+ # with worker_region. If neither worker_region nor worker_zone is specified,
+ # a zone in the control plane's region is chosen based on available capacity.
"workerPools": [ # The worker pools. At least one "harness" worker pool must be
# specified in order for the job to have workers.
{ # Describes one particular pool of Cloud Dataflow workers to be
@@ -984,7 +1018,27 @@
# computations required by a job. Note that a workflow job may use
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
+ "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
+ # harness, residing in Google Container Registry.
+ #
+ # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ },
"diskSourceImage": "A String", # Fully qualified source image for disks.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -998,6 +1052,23 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
"commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "vmId": "A String", # The ID string of the VM.
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "root".
+ "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
+ # access the Cloud Dataflow API.
+ "A String",
+ ],
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -1024,31 +1095,6 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
- "vmId": "A String", # The ID string of the VM.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
- # access the Cloud Dataflow API.
- "A String",
- ],
- "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "root".
- "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
- #
- # When workers access Google Cloud APIs, they logically do so via
- # relative URLs. If this field is specified, it supplies the base
- # URL to use for resolving these relative URLs. The normative
- # algorithm used is defined by RFC 1808, "Relative Uniform Resource
- # Locators".
- #
- # If not specified, the default value is "http://www.googleapis.com/"
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
"harnessCommand": "A String", # The command to launch the worker harness.
"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
@@ -1059,10 +1105,22 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
+ #
+ # When workers access Google Cloud APIs, they logically do so via
+ # relative URLs. If this field is specified, it supplies the base
+ # URL to use for resolving these relative URLs. The normative
+ # algorithm used is defined by RFC 1808, "Relative Uniform Resource
+ # Locators".
+ #
+ # If not specified, the default value is "http://www.googleapis.com/"
},
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
"packages": [ # Packages to be installed on workers.
{ # The packages that must be installed in order for a worker to run the
# steps of the Cloud Dataflow job that will be assigned to its worker
@@ -1082,13 +1140,12 @@
"name": "A String", # The name of the package.
},
],
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
# attempt to choose a reasonable default.
"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
@@ -1106,20 +1163,11 @@
#
# If unknown or unspecified, the service will attempt to choose a reasonable
# default.
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
# execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
- "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
- # harness, residing in Google Container Registry.
"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
# the form "regions/REGION/subnetworks/SUBNETWORK".
"dataDisks": [ # Data disks that are used by a VM in this workflow.
@@ -1147,20 +1195,25 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
+ "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
+ # only be set in the Fn API path. For non-cross-language pipelines this
+ # should have only one entry. Cross-language pipelines will have two or more
+ # entries.
+ { # Defines a SDK harness container for executing Dataflow pipelines.
+ "containerImage": "A String", # A docker container image that resides in Google Container Registry.
+ "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
+ # container instance with this image. If false (or unset) recommends using
+ # more than one core per SDK container instance with this image for
+ # efficiency. Note that Dataflow service may choose to override this property
+ # if needed.
+ },
+ ],
},
],
+ "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
+ # unspecified, the service will attempt to choose a reasonable
+ # default. This should be in the form of the API service name,
+ # e.g. "compute.googleapis.com".
"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
# storage. The system will append the suffix "/temp-{JOBNAME} to
# this resource prefix, where {JOBNAME} is the value of the
@@ -1257,13 +1310,13 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
@@ -1319,7 +1372,7 @@
#
# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
- # * Both keys and values are additionally constrained to be <= 128 bytes in
+ # * Both keys and values are additionally constrained to be <= 128 bytes in
# size.
"a_key": "A String",
},
@@ -1366,9 +1419,9 @@
"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
{ # Metadata for a BigQuery connector used by the job.
"projectId": "A String", # Project accessed in the connection.
- "dataset": "A String", # Dataset accessed in the connection.
- "table": "A String", # Table accessed in the connection.
"query": "A String", # Query used to access data in the connection.
+ "table": "A String", # Table accessed in the connection.
+ "dataset": "A String", # Dataset accessed in the connection.
},
],
},
@@ -1387,13 +1440,16 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -1404,11 +1460,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
"outputCollectionName": [ # User names for all collection outputs to this transform.
@@ -1463,13 +1516,16 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -1480,11 +1536,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
},
@@ -1508,6 +1561,11 @@
"a_key": "A String",
},
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
+ "workerRegion": "A String", # The Compute Engine region
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1". Mutually exclusive
+ # with worker_zone. If neither worker_region nor worker_zone is specified,
+ # default to the control plane's region.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
"a_key": "", # Properties of the object.
@@ -1541,10 +1599,11 @@
"userAgent": { # A description of the process that generated the request.
"a_key": "", # Properties of the object.
},
- "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
- # unspecified, the service will attempt to choose a reasonable
- # default. This should be in the form of the API service name,
- # e.g. "compute.googleapis.com".
+ "workerZone": "A String", # The Compute Engine zone
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
+ # with worker_region. If neither worker_region nor worker_zone is specified,
+ # a zone in the control plane's region is chosen based on available capacity.
"workerPools": [ # The worker pools. At least one "harness" worker pool must be
# specified in order for the job to have workers.
{ # Describes one particular pool of Cloud Dataflow workers to be
@@ -1552,7 +1611,27 @@
# computations required by a job. Note that a workflow job may use
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
+ "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
+ # harness, residing in Google Container Registry.
+ #
+ # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ },
"diskSourceImage": "A String", # Fully qualified source image for disks.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -1566,6 +1645,23 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
"commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "vmId": "A String", # The ID string of the VM.
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "root".
+ "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
+ # access the Cloud Dataflow API.
+ "A String",
+ ],
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -1592,31 +1688,6 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
- "vmId": "A String", # The ID string of the VM.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
- # access the Cloud Dataflow API.
- "A String",
- ],
- "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "root".
- "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
- #
- # When workers access Google Cloud APIs, they logically do so via
- # relative URLs. If this field is specified, it supplies the base
- # URL to use for resolving these relative URLs. The normative
- # algorithm used is defined by RFC 1808, "Relative Uniform Resource
- # Locators".
- #
- # If not specified, the default value is "http://www.googleapis.com/"
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
"harnessCommand": "A String", # The command to launch the worker harness.
"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
@@ -1627,10 +1698,22 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
+ #
+ # When workers access Google Cloud APIs, they logically do so via
+ # relative URLs. If this field is specified, it supplies the base
+ # URL to use for resolving these relative URLs. The normative
+ # algorithm used is defined by RFC 1808, "Relative Uniform Resource
+ # Locators".
+ #
+ # If not specified, the default value is "http://www.googleapis.com/"
},
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
"packages": [ # Packages to be installed on workers.
{ # The packages that must be installed in order for a worker to run the
# steps of the Cloud Dataflow job that will be assigned to its worker
@@ -1650,13 +1733,12 @@
"name": "A String", # The name of the package.
},
],
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
# attempt to choose a reasonable default.
"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
@@ -1674,20 +1756,11 @@
#
# If unknown or unspecified, the service will attempt to choose a reasonable
# default.
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
# execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
- "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
- # harness, residing in Google Container Registry.
"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
# the form "regions/REGION/subnetworks/SUBNETWORK".
"dataDisks": [ # Data disks that are used by a VM in this workflow.
@@ -1715,20 +1788,25 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
+ "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
+ # only be set in the Fn API path. For non-cross-language pipelines this
+ # should have only one entry. Cross-language pipelines will have two or more
+ # entries.
+ { # Defines a SDK harness container for executing Dataflow pipelines.
+ "containerImage": "A String", # A docker container image that resides in Google Container Registry.
+ "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
+ # container instance with this image. If false (or unset) recommends using
+ # more than one core per SDK container instance with this image for
+ # efficiency. Note that Dataflow service may choose to override this property
+ # if needed.
+ },
+ ],
},
],
+ "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
+ # unspecified, the service will attempt to choose a reasonable
+ # default. This should be in the form of the API service name,
+ # e.g. "compute.googleapis.com".
"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
# storage. The system will append the suffix "/temp-{JOBNAME} to
# this resource prefix, where {JOBNAME} is the value of the
@@ -1825,13 +1903,13 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
@@ -1900,7 +1978,7 @@
#
# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
- # * Both keys and values are additionally constrained to be <= 128 bytes in
+ # * Both keys and values are additionally constrained to be <= 128 bytes in
# size.
"a_key": "A String",
},
@@ -1947,9 +2025,9 @@
"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
{ # Metadata for a BigQuery connector used by the job.
"projectId": "A String", # Project accessed in the connection.
- "dataset": "A String", # Dataset accessed in the connection.
- "table": "A String", # Table accessed in the connection.
"query": "A String", # Query used to access data in the connection.
+ "table": "A String", # Table accessed in the connection.
+ "dataset": "A String", # Dataset accessed in the connection.
},
],
},
@@ -1968,13 +2046,16 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -1985,11 +2066,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
"outputCollectionName": [ # User names for all collection outputs to this transform.
@@ -2044,13 +2122,16 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -2061,11 +2142,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
},
@@ -2089,6 +2167,11 @@
"a_key": "A String",
},
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
+ "workerRegion": "A String", # The Compute Engine region
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1". Mutually exclusive
+ # with worker_zone. If neither worker_region nor worker_zone is specified,
+ # default to the control plane's region.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
"a_key": "", # Properties of the object.
@@ -2122,10 +2205,11 @@
"userAgent": { # A description of the process that generated the request.
"a_key": "", # Properties of the object.
},
- "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
- # unspecified, the service will attempt to choose a reasonable
- # default. This should be in the form of the API service name,
- # e.g. "compute.googleapis.com".
+ "workerZone": "A String", # The Compute Engine zone
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
+ # with worker_region. If neither worker_region nor worker_zone is specified,
+ # a zone in the control plane's region is chosen based on available capacity.
"workerPools": [ # The worker pools. At least one "harness" worker pool must be
# specified in order for the job to have workers.
{ # Describes one particular pool of Cloud Dataflow workers to be
@@ -2133,7 +2217,27 @@
# computations required by a job. Note that a workflow job may use
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
+ "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
+ # harness, residing in Google Container Registry.
+ #
+ # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ },
"diskSourceImage": "A String", # Fully qualified source image for disks.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -2147,6 +2251,23 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
"commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "vmId": "A String", # The ID string of the VM.
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "root".
+ "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
+ # access the Cloud Dataflow API.
+ "A String",
+ ],
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -2173,31 +2294,6 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
- "vmId": "A String", # The ID string of the VM.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
- # access the Cloud Dataflow API.
- "A String",
- ],
- "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "root".
- "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
- #
- # When workers access Google Cloud APIs, they logically do so via
- # relative URLs. If this field is specified, it supplies the base
- # URL to use for resolving these relative URLs. The normative
- # algorithm used is defined by RFC 1808, "Relative Uniform Resource
- # Locators".
- #
- # If not specified, the default value is "http://www.googleapis.com/"
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
"harnessCommand": "A String", # The command to launch the worker harness.
"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
@@ -2208,10 +2304,22 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
+ #
+ # When workers access Google Cloud APIs, they logically do so via
+ # relative URLs. If this field is specified, it supplies the base
+ # URL to use for resolving these relative URLs. The normative
+ # algorithm used is defined by RFC 1808, "Relative Uniform Resource
+ # Locators".
+ #
+ # If not specified, the default value is "http://www.googleapis.com/"
},
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
"packages": [ # Packages to be installed on workers.
{ # The packages that must be installed in order for a worker to run the
# steps of the Cloud Dataflow job that will be assigned to its worker
@@ -2231,13 +2339,12 @@
"name": "A String", # The name of the package.
},
],
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
# attempt to choose a reasonable default.
"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
@@ -2255,20 +2362,11 @@
#
# If unknown or unspecified, the service will attempt to choose a reasonable
# default.
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
# execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
- "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
- # harness, residing in Google Container Registry.
"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
# the form "regions/REGION/subnetworks/SUBNETWORK".
"dataDisks": [ # Data disks that are used by a VM in this workflow.
@@ -2296,20 +2394,25 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
+ "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
+ # only be set in the Fn API path. For non-cross-language pipelines this
+ # should have only one entry. Cross-language pipelines will have two or more
+ # entries.
+ { # Defines a SDK harness container for executing Dataflow pipelines.
+ "containerImage": "A String", # A docker container image that resides in Google Container Registry.
+ "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
+ # container instance with this image. If false (or unset) recommends using
+ # more than one core per SDK container instance with this image for
+ # efficiency. Note that Dataflow service may choose to override this property
+ # if needed.
+ },
+ ],
},
],
+ "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
+ # unspecified, the service will attempt to choose a reasonable
+ # default. This should be in the form of the API service name,
+ # e.g. "compute.googleapis.com".
"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
# storage. The system will append the suffix "/temp-{JOBNAME} to
# this resource prefix, where {JOBNAME} is the value of the
@@ -2406,13 +2509,13 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
@@ -2506,8 +2609,8 @@
# metric is associated with, such as the name of a step or collection.
#
# For example, built-in counters associated with steps will have
- # context['step'] = <step-name>. Counters associated with PCollections
- # in the SDK will have context['pcollection'] = <pcollection-name>.
+ # context['step'] = <step-name>. Counters associated with PCollections
+ # in the SDK will have context['pcollection'] = <pcollection-name>.
"a_key": "A String",
},
},
@@ -2567,8 +2670,11 @@
Returns:
An object of the form:
- { # Response to a request to list Cloud Dataflow jobs. This may be a partial
- # response, depending on the page size in the ListJobsRequest.
+ { # Response to a request to list Cloud Dataflow jobs in a project. This might
+ # be a partial response, depending on the page size in the ListJobsRequest.
+ # However, if the project does not have any jobs, an instance of
+ # ListJobsResponse is not returned and the requests's response
+ # body is empty {}.
"nextPageToken": "A String", # Set if there may be more results than fit in this response.
"failedLocation": [ # Zero or more messages describing the [regional endpoints]
# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
@@ -2590,7 +2696,7 @@
#
# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
- # * Both keys and values are additionally constrained to be <= 128 bytes in
+ # * Both keys and values are additionally constrained to be <= 128 bytes in
# size.
"a_key": "A String",
},
@@ -2637,9 +2743,9 @@
"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
{ # Metadata for a BigQuery connector used by the job.
"projectId": "A String", # Project accessed in the connection.
- "dataset": "A String", # Dataset accessed in the connection.
- "table": "A String", # Table accessed in the connection.
"query": "A String", # Query used to access data in the connection.
+ "table": "A String", # Table accessed in the connection.
+ "dataset": "A String", # Dataset accessed in the connection.
},
],
},
@@ -2658,13 +2764,16 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -2675,11 +2784,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
"outputCollectionName": [ # User names for all collection outputs to this transform.
@@ -2734,13 +2840,16 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -2751,11 +2860,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
},
@@ -2779,6 +2885,11 @@
"a_key": "A String",
},
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
+ "workerRegion": "A String", # The Compute Engine region
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1". Mutually exclusive
+ # with worker_zone. If neither worker_region nor worker_zone is specified,
+ # default to the control plane's region.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
"a_key": "", # Properties of the object.
@@ -2812,10 +2923,11 @@
"userAgent": { # A description of the process that generated the request.
"a_key": "", # Properties of the object.
},
- "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
- # unspecified, the service will attempt to choose a reasonable
- # default. This should be in the form of the API service name,
- # e.g. "compute.googleapis.com".
+ "workerZone": "A String", # The Compute Engine zone
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
+ # with worker_region. If neither worker_region nor worker_zone is specified,
+ # a zone in the control plane's region is chosen based on available capacity.
"workerPools": [ # The worker pools. At least one "harness" worker pool must be
# specified in order for the job to have workers.
{ # Describes one particular pool of Cloud Dataflow workers to be
@@ -2823,7 +2935,27 @@
# computations required by a job. Note that a workflow job may use
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
+ "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
+ # harness, residing in Google Container Registry.
+ #
+ # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ },
"diskSourceImage": "A String", # Fully qualified source image for disks.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -2837,6 +2969,23 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
"commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "vmId": "A String", # The ID string of the VM.
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "root".
+ "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
+ # access the Cloud Dataflow API.
+ "A String",
+ ],
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -2863,31 +3012,6 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
- "vmId": "A String", # The ID string of the VM.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
- # access the Cloud Dataflow API.
- "A String",
- ],
- "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "root".
- "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
- #
- # When workers access Google Cloud APIs, they logically do so via
- # relative URLs. If this field is specified, it supplies the base
- # URL to use for resolving these relative URLs. The normative
- # algorithm used is defined by RFC 1808, "Relative Uniform Resource
- # Locators".
- #
- # If not specified, the default value is "http://www.googleapis.com/"
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
"harnessCommand": "A String", # The command to launch the worker harness.
"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
@@ -2898,10 +3022,22 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
+ #
+ # When workers access Google Cloud APIs, they logically do so via
+ # relative URLs. If this field is specified, it supplies the base
+ # URL to use for resolving these relative URLs. The normative
+ # algorithm used is defined by RFC 1808, "Relative Uniform Resource
+ # Locators".
+ #
+ # If not specified, the default value is "http://www.googleapis.com/"
},
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
"packages": [ # Packages to be installed on workers.
{ # The packages that must be installed in order for a worker to run the
# steps of the Cloud Dataflow job that will be assigned to its worker
@@ -2921,13 +3057,12 @@
"name": "A String", # The name of the package.
},
],
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
# attempt to choose a reasonable default.
"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
@@ -2945,20 +3080,11 @@
#
# If unknown or unspecified, the service will attempt to choose a reasonable
# default.
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
# execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
- "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
- # harness, residing in Google Container Registry.
"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
# the form "regions/REGION/subnetworks/SUBNETWORK".
"dataDisks": [ # Data disks that are used by a VM in this workflow.
@@ -2986,20 +3112,25 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
+ "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
+ # only be set in the Fn API path. For non-cross-language pipelines this
+ # should have only one entry. Cross-language pipelines will have two or more
+ # entries.
+ { # Defines a SDK harness container for executing Dataflow pipelines.
+ "containerImage": "A String", # A docker container image that resides in Google Container Registry.
+ "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
+ # container instance with this image. If false (or unset) recommends using
+ # more than one core per SDK container instance with this image for
+ # efficiency. Note that Dataflow service may choose to override this property
+ # if needed.
+ },
+ ],
},
],
+ "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
+ # unspecified, the service will attempt to choose a reasonable
+ # default. This should be in the form of the API service name,
+ # e.g. "compute.googleapis.com".
"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
# storage. The system will append the suffix "/temp-{JOBNAME} to
# this resource prefix, where {JOBNAME} is the value of the
@@ -3096,13 +3227,13 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
@@ -3155,18 +3286,20 @@
</div>
<div class="method">
- <code class="details" id="snapshot">snapshot(projectId, jobId, body, x__xgafv=None)</code>
+ <code class="details" id="snapshot">snapshot(projectId, jobId, body=None, x__xgafv=None)</code>
<pre>Snapshot the state of a streaming job.
Args:
projectId: string, The project which owns the job to be snapshotted. (required)
jobId: string, The job to be snapshotted. (required)
- body: object, The request body. (required)
+ body: object, The request body.
The object takes the form of:
{ # Request to create a snapshot of a job.
"location": "A String", # The location that contains this job.
"ttl": "A String", # TTL for the snapshot.
+ "description": "A String", # User specified description of the snapshot. Maybe empty.
+ "snapshotSources": True or False, # If true, perform snapshots for sources which support this.
}
x__xgafv: string, V1 error format.
@@ -3179,16 +3312,26 @@
{ # Represents a snapshot of a job.
"sourceJobId": "A String", # The job this snapshot was created from.
+ "diskSizeBytes": "A String", # The disk byte size of the snapshot. Only available for snapshots in READY
+ # state.
+ "description": "A String", # User specified description of the snapshot. Maybe empty.
"projectId": "A String", # The project this snapshot belongs to.
"creationTime": "A String", # The time this snapshot was created.
"state": "A String", # State of the snapshot.
"ttl": "A String", # The time after which this snapshot will be automatically deleted.
+ "pubsubMetadata": [ # PubSub snapshot metadata.
+ { # Represents a Pubsub snapshot.
+ "expireTime": "A String", # The expire time of the Pubsub snapshot.
+ "snapshotName": "A String", # The name of the Pubsub snapshot.
+ "topicName": "A String", # The name of the Pubsub topic.
+ },
+ ],
"id": "A String", # The unique ID of this snapshot.
}</pre>
</div>
<div class="method">
- <code class="details" id="update">update(projectId, jobId, body, location=None, x__xgafv=None)</code>
+ <code class="details" id="update">update(projectId, jobId, body=None, location=None, x__xgafv=None)</code>
<pre>Updates the state of an existing Cloud Dataflow job.
To update the state of an existing job, we recommend using
@@ -3200,7 +3343,7 @@
Args:
projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
jobId: string, The job ID. (required)
- body: object, The request body. (required)
+ body: object, The request body.
The object takes the form of:
{ # Defines a job to be run by the Cloud Dataflow service.
@@ -3211,7 +3354,7 @@
#
# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
- # * Both keys and values are additionally constrained to be <= 128 bytes in
+ # * Both keys and values are additionally constrained to be <= 128 bytes in
# size.
"a_key": "A String",
},
@@ -3258,9 +3401,9 @@
"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
{ # Metadata for a BigQuery connector used by the job.
"projectId": "A String", # Project accessed in the connection.
- "dataset": "A String", # Dataset accessed in the connection.
- "table": "A String", # Table accessed in the connection.
"query": "A String", # Query used to access data in the connection.
+ "table": "A String", # Table accessed in the connection.
+ "dataset": "A String", # Dataset accessed in the connection.
},
],
},
@@ -3279,13 +3422,16 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -3296,11 +3442,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
"outputCollectionName": [ # User names for all collection outputs to this transform.
@@ -3355,13 +3498,16 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -3372,11 +3518,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
},
@@ -3400,6 +3543,11 @@
"a_key": "A String",
},
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
+ "workerRegion": "A String", # The Compute Engine region
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1". Mutually exclusive
+ # with worker_zone. If neither worker_region nor worker_zone is specified,
+ # default to the control plane's region.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
"a_key": "", # Properties of the object.
@@ -3433,10 +3581,11 @@
"userAgent": { # A description of the process that generated the request.
"a_key": "", # Properties of the object.
},
- "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
- # unspecified, the service will attempt to choose a reasonable
- # default. This should be in the form of the API service name,
- # e.g. "compute.googleapis.com".
+ "workerZone": "A String", # The Compute Engine zone
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
+ # with worker_region. If neither worker_region nor worker_zone is specified,
+ # a zone in the control plane's region is chosen based on available capacity.
"workerPools": [ # The worker pools. At least one "harness" worker pool must be
# specified in order for the job to have workers.
{ # Describes one particular pool of Cloud Dataflow workers to be
@@ -3444,7 +3593,27 @@
# computations required by a job. Note that a workflow job may use
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
+ "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
+ # harness, residing in Google Container Registry.
+ #
+ # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ },
"diskSourceImage": "A String", # Fully qualified source image for disks.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -3458,6 +3627,23 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
"commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "vmId": "A String", # The ID string of the VM.
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "root".
+ "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
+ # access the Cloud Dataflow API.
+ "A String",
+ ],
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -3484,31 +3670,6 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
- "vmId": "A String", # The ID string of the VM.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
- # access the Cloud Dataflow API.
- "A String",
- ],
- "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "root".
- "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
- #
- # When workers access Google Cloud APIs, they logically do so via
- # relative URLs. If this field is specified, it supplies the base
- # URL to use for resolving these relative URLs. The normative
- # algorithm used is defined by RFC 1808, "Relative Uniform Resource
- # Locators".
- #
- # If not specified, the default value is "http://www.googleapis.com/"
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
"harnessCommand": "A String", # The command to launch the worker harness.
"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
@@ -3519,10 +3680,22 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
+ #
+ # When workers access Google Cloud APIs, they logically do so via
+ # relative URLs. If this field is specified, it supplies the base
+ # URL to use for resolving these relative URLs. The normative
+ # algorithm used is defined by RFC 1808, "Relative Uniform Resource
+ # Locators".
+ #
+ # If not specified, the default value is "http://www.googleapis.com/"
},
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
"packages": [ # Packages to be installed on workers.
{ # The packages that must be installed in order for a worker to run the
# steps of the Cloud Dataflow job that will be assigned to its worker
@@ -3542,13 +3715,12 @@
"name": "A String", # The name of the package.
},
],
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
# attempt to choose a reasonable default.
"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
@@ -3566,20 +3738,11 @@
#
# If unknown or unspecified, the service will attempt to choose a reasonable
# default.
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
# execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
- "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
- # harness, residing in Google Container Registry.
"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
# the form "regions/REGION/subnetworks/SUBNETWORK".
"dataDisks": [ # Data disks that are used by a VM in this workflow.
@@ -3607,20 +3770,25 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
+ "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
+ # only be set in the Fn API path. For non-cross-language pipelines this
+ # should have only one entry. Cross-language pipelines will have two or more
+ # entries.
+ { # Defines a SDK harness container for executing Dataflow pipelines.
+ "containerImage": "A String", # A docker container image that resides in Google Container Registry.
+ "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
+ # container instance with this image. If false (or unset) recommends using
+ # more than one core per SDK container instance with this image for
+ # efficiency. Note that Dataflow service may choose to override this property
+ # if needed.
+ },
+ ],
},
],
+ "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
+ # unspecified, the service will attempt to choose a reasonable
+ # default. This should be in the form of the API service name,
+ # e.g. "compute.googleapis.com".
"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
# storage. The system will append the suffix "/temp-{JOBNAME} to
# this resource prefix, where {JOBNAME} is the value of the
@@ -3717,13 +3885,13 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
@@ -3777,7 +3945,7 @@
#
# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
- # * Both keys and values are additionally constrained to be <= 128 bytes in
+ # * Both keys and values are additionally constrained to be <= 128 bytes in
# size.
"a_key": "A String",
},
@@ -3824,9 +3992,9 @@
"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
{ # Metadata for a BigQuery connector used by the job.
"projectId": "A String", # Project accessed in the connection.
- "dataset": "A String", # Dataset accessed in the connection.
- "table": "A String", # Table accessed in the connection.
"query": "A String", # Query used to access data in the connection.
+ "table": "A String", # Table accessed in the connection.
+ "dataset": "A String", # Dataset accessed in the connection.
},
],
},
@@ -3845,13 +4013,16 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -3862,11 +4033,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
"outputCollectionName": [ # User names for all collection outputs to this transform.
@@ -3921,13 +4089,16 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
# com.mypackage.MyDoFn as the java_class_name value.
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -3938,11 +4109,8 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"int64Value": "A String", # Contains value if the data is of int64 type.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
},
],
},
@@ -3966,6 +4134,11 @@
"a_key": "A String",
},
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
+ "workerRegion": "A String", # The Compute Engine region
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1". Mutually exclusive
+ # with worker_zone. If neither worker_region nor worker_zone is specified,
+ # default to the control plane's region.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
"a_key": "", # Properties of the object.
@@ -3999,10 +4172,11 @@
"userAgent": { # A description of the process that generated the request.
"a_key": "", # Properties of the object.
},
- "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
- # unspecified, the service will attempt to choose a reasonable
- # default. This should be in the form of the API service name,
- # e.g. "compute.googleapis.com".
+ "workerZone": "A String", # The Compute Engine zone
+ # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
+ # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
+ # with worker_region. If neither worker_region nor worker_zone is specified,
+ # a zone in the control plane's region is chosen based on available capacity.
"workerPools": [ # The worker pools. At least one "harness" worker pool must be
# specified in order for the job to have workers.
{ # Describes one particular pool of Cloud Dataflow workers to be
@@ -4010,7 +4184,27 @@
# computations required by a job. Note that a workflow job may use
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
+ "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
+ # harness, residing in Google Container Registry.
+ #
+ # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ },
"diskSourceImage": "A String", # Fully qualified source image for disks.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -4024,6 +4218,23 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
"commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "vmId": "A String", # The ID string of the VM.
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "root".
+ "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
+ # access the Cloud Dataflow API.
+ "A String",
+ ],
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -4050,31 +4261,6 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
- "vmId": "A String", # The ID string of the VM.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
- # access the Cloud Dataflow API.
- "A String",
- ],
- "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "root".
- "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
- #
- # When workers access Google Cloud APIs, they logically do so via
- # relative URLs. If this field is specified, it supplies the base
- # URL to use for resolving these relative URLs. The normative
- # algorithm used is defined by RFC 1808, "Relative Uniform Resource
- # Locators".
- #
- # If not specified, the default value is "http://www.googleapis.com/"
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
"harnessCommand": "A String", # The command to launch the worker harness.
"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
@@ -4085,10 +4271,22 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
+ #
+ # When workers access Google Cloud APIs, they logically do so via
+ # relative URLs. If this field is specified, it supplies the base
+ # URL to use for resolving these relative URLs. The normative
+ # algorithm used is defined by RFC 1808, "Relative Uniform Resource
+ # Locators".
+ #
+ # If not specified, the default value is "http://www.googleapis.com/"
},
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
"packages": [ # Packages to be installed on workers.
{ # The packages that must be installed in order for a worker to run the
# steps of the Cloud Dataflow job that will be assigned to its worker
@@ -4108,13 +4306,12 @@
"name": "A String", # The name of the package.
},
],
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
# attempt to choose a reasonable default.
"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
@@ -4132,20 +4329,11 @@
#
# If unknown or unspecified, the service will attempt to choose a reasonable
# default.
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
# execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
- "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
- # harness, residing in Google Container Registry.
"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
# the form "regions/REGION/subnetworks/SUBNETWORK".
"dataDisks": [ # Data disks that are used by a VM in this workflow.
@@ -4173,20 +4361,25 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
+ "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
+ # only be set in the Fn API path. For non-cross-language pipelines this
+ # should have only one entry. Cross-language pipelines will have two or more
+ # entries.
+ { # Defines a SDK harness container for executing Dataflow pipelines.
+ "containerImage": "A String", # A docker container image that resides in Google Container Registry.
+ "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
+ # container instance with this image. If false (or unset) recommends using
+ # more than one core per SDK container instance with this image for
+ # efficiency. Note that Dataflow service may choose to override this property
+ # if needed.
+ },
+ ],
},
],
+ "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
+ # unspecified, the service will attempt to choose a reasonable
+ # default. This should be in the form of the API service name,
+ # e.g. "compute.googleapis.com".
"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
# storage. The system will append the suffix "/temp-{JOBNAME} to
# this resource prefix, where {JOBNAME} is the value of the
@@ -4283,13 +4476,13 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID