Regen docs (#373)
diff --git a/docs/dyn/dataflow_v1b3.projects.jobs.html b/docs/dyn/dataflow_v1b3.projects.jobs.html
index 0c87e36..75c7d0c 100644
--- a/docs/dyn/dataflow_v1b3.projects.jobs.html
+++ b/docs/dyn/dataflow_v1b3.projects.jobs.html
@@ -156,13 +156,12 @@
# size.
"a_key": "A String",
},
- "location": "A String", # The location that contains this job.
- "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
- # Cloud Dataflow service.
"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
# corresponding name prefixes of the new job.
"a_key": "A String",
},
+ "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
+ # Cloud Dataflow service.
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
@@ -216,49 +215,6 @@
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
"diskSourceImage": "A String", # Fully qualified source image for disks.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
- # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
- # `TEARDOWN_NEVER`.
- # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
- # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
- # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
- # down.
- #
- # If the workers are not torn down by the service, they will
- # continue to run and use Google Compute Engine VM resources in the
- # user's project until they are explicitly terminated by the user.
- # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
- # policy except for small, manually supervised test jobs.
- #
- # If unknown or unspecified, the service will attempt to choose a reasonable
- # default.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
- # the form "regions/REGION/subnetworks/SUBNETWORK".
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
- # execute the job. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -271,24 +227,17 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "commandlinesFileName": "A String", # The file to store preprocessing commands in.
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "vmId": "A String", # The ID string of the VM.
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
# taskrunner; e.g. "root".
+ "commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "vmId": "A String", # The ID string of the VM.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
# access the Cloud Dataflow API.
"A String",
],
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -315,16 +264,14 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
- "harnessCommand": "A String", # The command to launch the worker harness.
- "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
- # temporary storage.
- #
- # The supported resource type is:
- #
- # Google Cloud Storage:
- # storage.googleapis.com/{bucket}/{object}
- # bucket.storage.googleapis.com/{object}
"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
#
# When workers access Google Cloud APIs, they logically do so via
@@ -334,33 +281,26 @@
# Locators".
#
# If not specified, the default value is "http://www.googleapis.com/"
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "packages": [ # Packages to be installed on workers.
- { # The packages that must be installed in order for a worker to run the
- # steps of the Cloud Dataflow job that will be assigned to its worker
- # pool.
+ "harnessCommand": "A String", # The command to launch the worker harness.
+ "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
+ # temporary storage.
#
- # This is the mechanism by which the Cloud Dataflow SDK causes code to
- # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
- # might use this to install jars containing the user's code and all of the
- # various dependencies (libraries, data files, etc.) required in order
- # for that code to run.
- "location": "A String", # The resource to read the package from. The supported resource type is:
- #
- # Google Cloud Storage:
- #
- # storage.googleapis.com/{bucket}
- # bucket.storage.googleapis.com/
- "name": "A String", # The name of the package.
- },
- ],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ # The supported resource type is:
+ #
+ # Google Cloud Storage:
+ # storage.googleapis.com/{bucket}/{object}
+ # bucket.storage.googleapis.com/{object}
},
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"dataDisks": [ # Data disks that are used by a VM in this workflow.
{ # Describes the data disk used by a workflow job.
"mountPoint": "A String", # Directory in a VM where disk is mounted.
@@ -386,13 +326,78 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
+ # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
+ # `TEARDOWN_NEVER`.
+ # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
+ # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
+ # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
+ # down.
+ #
+ # If the workers are not torn down by the service, they will
+ # continue to run and use Google Compute Engine VM resources in the
+ # user's project until they are explicitly terminated by the user.
+ # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
+ # policy except for small, manually supervised test jobs.
+ #
+ # If unknown or unspecified, the service will attempt to choose a reasonable
+ # default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
+ # execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
# harness, residing in Google Container Registry.
+ "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
+ # the form "regions/REGION/subnetworks/SUBNETWORK".
+ "packages": [ # Packages to be installed on workers.
+ { # The packages that must be installed in order for a worker to run the
+ # steps of the Cloud Dataflow job that will be assigned to its worker
+ # pool.
+ #
+ # This is the mechanism by which the Cloud Dataflow SDK causes code to
+ # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
+ # might use this to install jars containing the user's code and all of the
+ # various dependencies (libraries, data files, etc.) required in order
+ # for that code to run.
+ "location": "A String", # The resource to read the package from. The supported resource type is:
+ #
+ # Google Cloud Storage:
+ #
+ # storage.googleapis.com/{bucket}
+ # bucket.storage.googleapis.com/
+ "name": "A String", # The name of the package.
+ },
+ ],
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ },
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
},
],
},
+ "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
+ # of the job it replaced.
+ #
+ # When sending a `CreateJobRequest`, you can update a job by specifying it
+ # here. The job named here is stopped, and its intermediate state is
+ # transferred to this job.
"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
# A description of the user pipeline and stages through which it is executed.
# Created by Cloud Dataflow service. Only retrieved with
@@ -408,9 +413,6 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -418,6 +420,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -428,7 +431,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -440,9 +445,6 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -450,6 +452,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -460,7 +463,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -482,19 +487,19 @@
"outputSource": [ # Output sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"inputSource": [ # Input sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"componentTransform": [ # Transforms that comprise this execution stage.
@@ -536,26 +541,16 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
- "currentState": "A String", # The current state of the job.
- #
- # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
- # specified.
- #
- # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
- # terminal state. After a job has reached a terminal state, no
- # further state updates may be made.
- #
- # This field may be mutated by the Cloud Dataflow service;
- # callers cannot mutate it.
+ "location": "A String", # The location that contains this job.
"tempFiles": [ # A set of files the system should be aware of that are used
# for temporary storage. These temporary files will be
# removed on job completion.
@@ -575,12 +570,17 @@
#
# This field is set by the Cloud Dataflow service when the Job is
# created, and is immutable for the life of the job.
- "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
- # of the job it replaced.
+ "currentState": "A String", # The current state of the job.
#
- # When sending a `CreateJobRequest`, you can update a job by specifying it
- # here. The job named here is stopped, and its intermediate state is
- # transferred to this job.
+ # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
+ # specified.
+ #
+ # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
+ # terminal state. After a job has reached a terminal state, no
+ # further state updates may be made.
+ #
+ # This field may be mutated by the Cloud Dataflow service;
+ # callers cannot mutate it.
"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
# isn't contained in the submitted job.
"stages": { # A mapping from each stage to the information about that stage.
@@ -646,13 +646,12 @@
# size.
"a_key": "A String",
},
- "location": "A String", # The location that contains this job.
- "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
- # Cloud Dataflow service.
"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
# corresponding name prefixes of the new job.
"a_key": "A String",
},
+ "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
+ # Cloud Dataflow service.
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
@@ -706,49 +705,6 @@
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
"diskSourceImage": "A String", # Fully qualified source image for disks.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
- # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
- # `TEARDOWN_NEVER`.
- # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
- # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
- # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
- # down.
- #
- # If the workers are not torn down by the service, they will
- # continue to run and use Google Compute Engine VM resources in the
- # user's project until they are explicitly terminated by the user.
- # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
- # policy except for small, manually supervised test jobs.
- #
- # If unknown or unspecified, the service will attempt to choose a reasonable
- # default.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
- # the form "regions/REGION/subnetworks/SUBNETWORK".
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
- # execute the job. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -761,24 +717,17 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "commandlinesFileName": "A String", # The file to store preprocessing commands in.
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "vmId": "A String", # The ID string of the VM.
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
# taskrunner; e.g. "root".
+ "commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "vmId": "A String", # The ID string of the VM.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
# access the Cloud Dataflow API.
"A String",
],
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -805,16 +754,14 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
- "harnessCommand": "A String", # The command to launch the worker harness.
- "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
- # temporary storage.
- #
- # The supported resource type is:
- #
- # Google Cloud Storage:
- # storage.googleapis.com/{bucket}/{object}
- # bucket.storage.googleapis.com/{object}
"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
#
# When workers access Google Cloud APIs, they logically do so via
@@ -824,33 +771,26 @@
# Locators".
#
# If not specified, the default value is "http://www.googleapis.com/"
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "packages": [ # Packages to be installed on workers.
- { # The packages that must be installed in order for a worker to run the
- # steps of the Cloud Dataflow job that will be assigned to its worker
- # pool.
+ "harnessCommand": "A String", # The command to launch the worker harness.
+ "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
+ # temporary storage.
#
- # This is the mechanism by which the Cloud Dataflow SDK causes code to
- # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
- # might use this to install jars containing the user's code and all of the
- # various dependencies (libraries, data files, etc.) required in order
- # for that code to run.
- "location": "A String", # The resource to read the package from. The supported resource type is:
- #
- # Google Cloud Storage:
- #
- # storage.googleapis.com/{bucket}
- # bucket.storage.googleapis.com/
- "name": "A String", # The name of the package.
- },
- ],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ # The supported resource type is:
+ #
+ # Google Cloud Storage:
+ # storage.googleapis.com/{bucket}/{object}
+ # bucket.storage.googleapis.com/{object}
},
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"dataDisks": [ # Data disks that are used by a VM in this workflow.
{ # Describes the data disk used by a workflow job.
"mountPoint": "A String", # Directory in a VM where disk is mounted.
@@ -876,13 +816,78 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
+ # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
+ # `TEARDOWN_NEVER`.
+ # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
+ # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
+ # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
+ # down.
+ #
+ # If the workers are not torn down by the service, they will
+ # continue to run and use Google Compute Engine VM resources in the
+ # user's project until they are explicitly terminated by the user.
+ # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
+ # policy except for small, manually supervised test jobs.
+ #
+ # If unknown or unspecified, the service will attempt to choose a reasonable
+ # default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
+ # execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
# harness, residing in Google Container Registry.
+ "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
+ # the form "regions/REGION/subnetworks/SUBNETWORK".
+ "packages": [ # Packages to be installed on workers.
+ { # The packages that must be installed in order for a worker to run the
+ # steps of the Cloud Dataflow job that will be assigned to its worker
+ # pool.
+ #
+ # This is the mechanism by which the Cloud Dataflow SDK causes code to
+ # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
+ # might use this to install jars containing the user's code and all of the
+ # various dependencies (libraries, data files, etc.) required in order
+ # for that code to run.
+ "location": "A String", # The resource to read the package from. The supported resource type is:
+ #
+ # Google Cloud Storage:
+ #
+ # storage.googleapis.com/{bucket}
+ # bucket.storage.googleapis.com/
+ "name": "A String", # The name of the package.
+ },
+ ],
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ },
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
},
],
},
+ "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
+ # of the job it replaced.
+ #
+ # When sending a `CreateJobRequest`, you can update a job by specifying it
+ # here. The job named here is stopped, and its intermediate state is
+ # transferred to this job.
"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
# A description of the user pipeline and stages through which it is executed.
# Created by Cloud Dataflow service. Only retrieved with
@@ -898,9 +903,6 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -908,6 +910,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -918,7 +921,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -930,9 +935,6 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -940,6 +942,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -950,7 +953,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -972,19 +977,19 @@
"outputSource": [ # Output sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"inputSource": [ # Input sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"componentTransform": [ # Transforms that comprise this execution stage.
@@ -1026,26 +1031,16 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
- "currentState": "A String", # The current state of the job.
- #
- # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
- # specified.
- #
- # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
- # terminal state. After a job has reached a terminal state, no
- # further state updates may be made.
- #
- # This field may be mutated by the Cloud Dataflow service;
- # callers cannot mutate it.
+ "location": "A String", # The location that contains this job.
"tempFiles": [ # A set of files the system should be aware of that are used
# for temporary storage. These temporary files will be
# removed on job completion.
@@ -1065,12 +1060,17 @@
#
# This field is set by the Cloud Dataflow service when the Job is
# created, and is immutable for the life of the job.
- "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
- # of the job it replaced.
+ "currentState": "A String", # The current state of the job.
#
- # When sending a `CreateJobRequest`, you can update a job by specifying it
- # here. The job named here is stopped, and its intermediate state is
- # transferred to this job.
+ # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
+ # specified.
+ #
+ # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
+ # terminal state. After a job has reached a terminal state, no
+ # further state updates may be made.
+ #
+ # This field may be mutated by the Cloud Dataflow service;
+ # callers cannot mutate it.
"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
# isn't contained in the submitted job.
"stages": { # A mapping from each stage to the information about that stage.
@@ -1143,13 +1143,12 @@
# size.
"a_key": "A String",
},
- "location": "A String", # The location that contains this job.
- "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
- # Cloud Dataflow service.
"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
# corresponding name prefixes of the new job.
"a_key": "A String",
},
+ "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
+ # Cloud Dataflow service.
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
@@ -1203,49 +1202,6 @@
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
"diskSourceImage": "A String", # Fully qualified source image for disks.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
- # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
- # `TEARDOWN_NEVER`.
- # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
- # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
- # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
- # down.
- #
- # If the workers are not torn down by the service, they will
- # continue to run and use Google Compute Engine VM resources in the
- # user's project until they are explicitly terminated by the user.
- # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
- # policy except for small, manually supervised test jobs.
- #
- # If unknown or unspecified, the service will attempt to choose a reasonable
- # default.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
- # the form "regions/REGION/subnetworks/SUBNETWORK".
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
- # execute the job. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -1258,24 +1214,17 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "commandlinesFileName": "A String", # The file to store preprocessing commands in.
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "vmId": "A String", # The ID string of the VM.
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
# taskrunner; e.g. "root".
+ "commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "vmId": "A String", # The ID string of the VM.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
# access the Cloud Dataflow API.
"A String",
],
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -1302,16 +1251,14 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
- "harnessCommand": "A String", # The command to launch the worker harness.
- "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
- # temporary storage.
- #
- # The supported resource type is:
- #
- # Google Cloud Storage:
- # storage.googleapis.com/{bucket}/{object}
- # bucket.storage.googleapis.com/{object}
"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
#
# When workers access Google Cloud APIs, they logically do so via
@@ -1321,33 +1268,26 @@
# Locators".
#
# If not specified, the default value is "http://www.googleapis.com/"
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "packages": [ # Packages to be installed on workers.
- { # The packages that must be installed in order for a worker to run the
- # steps of the Cloud Dataflow job that will be assigned to its worker
- # pool.
+ "harnessCommand": "A String", # The command to launch the worker harness.
+ "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
+ # temporary storage.
#
- # This is the mechanism by which the Cloud Dataflow SDK causes code to
- # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
- # might use this to install jars containing the user's code and all of the
- # various dependencies (libraries, data files, etc.) required in order
- # for that code to run.
- "location": "A String", # The resource to read the package from. The supported resource type is:
- #
- # Google Cloud Storage:
- #
- # storage.googleapis.com/{bucket}
- # bucket.storage.googleapis.com/
- "name": "A String", # The name of the package.
- },
- ],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ # The supported resource type is:
+ #
+ # Google Cloud Storage:
+ # storage.googleapis.com/{bucket}/{object}
+ # bucket.storage.googleapis.com/{object}
},
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"dataDisks": [ # Data disks that are used by a VM in this workflow.
{ # Describes the data disk used by a workflow job.
"mountPoint": "A String", # Directory in a VM where disk is mounted.
@@ -1373,13 +1313,78 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
+ # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
+ # `TEARDOWN_NEVER`.
+ # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
+ # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
+ # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
+ # down.
+ #
+ # If the workers are not torn down by the service, they will
+ # continue to run and use Google Compute Engine VM resources in the
+ # user's project until they are explicitly terminated by the user.
+ # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
+ # policy except for small, manually supervised test jobs.
+ #
+ # If unknown or unspecified, the service will attempt to choose a reasonable
+ # default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
+ # execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
# harness, residing in Google Container Registry.
+ "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
+ # the form "regions/REGION/subnetworks/SUBNETWORK".
+ "packages": [ # Packages to be installed on workers.
+ { # The packages that must be installed in order for a worker to run the
+ # steps of the Cloud Dataflow job that will be assigned to its worker
+ # pool.
+ #
+ # This is the mechanism by which the Cloud Dataflow SDK causes code to
+ # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
+ # might use this to install jars containing the user's code and all of the
+ # various dependencies (libraries, data files, etc.) required in order
+ # for that code to run.
+ "location": "A String", # The resource to read the package from. The supported resource type is:
+ #
+ # Google Cloud Storage:
+ #
+ # storage.googleapis.com/{bucket}
+ # bucket.storage.googleapis.com/
+ "name": "A String", # The name of the package.
+ },
+ ],
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ },
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
},
],
},
+ "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
+ # of the job it replaced.
+ #
+ # When sending a `CreateJobRequest`, you can update a job by specifying it
+ # here. The job named here is stopped, and its intermediate state is
+ # transferred to this job.
"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
# A description of the user pipeline and stages through which it is executed.
# Created by Cloud Dataflow service. Only retrieved with
@@ -1395,9 +1400,6 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -1405,6 +1407,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -1415,7 +1418,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -1427,9 +1432,6 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -1437,6 +1439,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -1447,7 +1450,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -1469,19 +1474,19 @@
"outputSource": [ # Output sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"inputSource": [ # Input sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"componentTransform": [ # Transforms that comprise this execution stage.
@@ -1523,26 +1528,16 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
- "currentState": "A String", # The current state of the job.
- #
- # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
- # specified.
- #
- # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
- # terminal state. After a job has reached a terminal state, no
- # further state updates may be made.
- #
- # This field may be mutated by the Cloud Dataflow service;
- # callers cannot mutate it.
+ "location": "A String", # The location that contains this job.
"tempFiles": [ # A set of files the system should be aware of that are used
# for temporary storage. These temporary files will be
# removed on job completion.
@@ -1562,12 +1557,17 @@
#
# This field is set by the Cloud Dataflow service when the Job is
# created, and is immutable for the life of the job.
- "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
- # of the job it replaced.
+ "currentState": "A String", # The current state of the job.
#
- # When sending a `CreateJobRequest`, you can update a job by specifying it
- # here. The job named here is stopped, and its intermediate state is
- # transferred to this job.
+ # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
+ # specified.
+ #
+ # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
+ # terminal state. After a job has reached a terminal state, no
+ # further state updates may be made.
+ #
+ # This field may be mutated by the Cloud Dataflow service;
+ # callers cannot mutate it.
"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
# isn't contained in the submitted job.
"stages": { # A mapping from each stage to the information about that stage.
@@ -1685,11 +1685,6 @@
{ # Response to a request to list Cloud Dataflow jobs. This may be a partial
# response, depending on the page size in the ListJobsRequest.
"nextPageToken": "A String", # Set if there may be more results than fit in this response.
- "failedLocation": [ # Zero or more messages describing locations that failed to respond.
- { # Indicates which location failed to respond to a request for data.
- "name": "A String", # The name of the failed location.
- },
- ],
"jobs": [ # A subset of the requested job information.
{ # Defines a job to be run by the Cloud Dataflow service.
"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
@@ -1730,13 +1725,12 @@
# size.
"a_key": "A String",
},
- "location": "A String", # The location that contains this job.
- "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
- # Cloud Dataflow service.
"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
# corresponding name prefixes of the new job.
"a_key": "A String",
},
+ "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
+ # Cloud Dataflow service.
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
@@ -1790,49 +1784,6 @@
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
"diskSourceImage": "A String", # Fully qualified source image for disks.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
- # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
- # `TEARDOWN_NEVER`.
- # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
- # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
- # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
- # down.
- #
- # If the workers are not torn down by the service, they will
- # continue to run and use Google Compute Engine VM resources in the
- # user's project until they are explicitly terminated by the user.
- # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
- # policy except for small, manually supervised test jobs.
- #
- # If unknown or unspecified, the service will attempt to choose a reasonable
- # default.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
- # the form "regions/REGION/subnetworks/SUBNETWORK".
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
- # execute the job. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -1845,24 +1796,17 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "commandlinesFileName": "A String", # The file to store preprocessing commands in.
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "vmId": "A String", # The ID string of the VM.
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
# taskrunner; e.g. "root".
+ "commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "vmId": "A String", # The ID string of the VM.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
# access the Cloud Dataflow API.
"A String",
],
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -1889,16 +1833,14 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
- "harnessCommand": "A String", # The command to launch the worker harness.
- "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
- # temporary storage.
- #
- # The supported resource type is:
- #
- # Google Cloud Storage:
- # storage.googleapis.com/{bucket}/{object}
- # bucket.storage.googleapis.com/{object}
"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
#
# When workers access Google Cloud APIs, they logically do so via
@@ -1908,33 +1850,26 @@
# Locators".
#
# If not specified, the default value is "http://www.googleapis.com/"
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "packages": [ # Packages to be installed on workers.
- { # The packages that must be installed in order for a worker to run the
- # steps of the Cloud Dataflow job that will be assigned to its worker
- # pool.
+ "harnessCommand": "A String", # The command to launch the worker harness.
+ "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
+ # temporary storage.
#
- # This is the mechanism by which the Cloud Dataflow SDK causes code to
- # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
- # might use this to install jars containing the user's code and all of the
- # various dependencies (libraries, data files, etc.) required in order
- # for that code to run.
- "location": "A String", # The resource to read the package from. The supported resource type is:
- #
- # Google Cloud Storage:
- #
- # storage.googleapis.com/{bucket}
- # bucket.storage.googleapis.com/
- "name": "A String", # The name of the package.
- },
- ],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ # The supported resource type is:
+ #
+ # Google Cloud Storage:
+ # storage.googleapis.com/{bucket}/{object}
+ # bucket.storage.googleapis.com/{object}
},
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"dataDisks": [ # Data disks that are used by a VM in this workflow.
{ # Describes the data disk used by a workflow job.
"mountPoint": "A String", # Directory in a VM where disk is mounted.
@@ -1960,13 +1895,78 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
+ # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
+ # `TEARDOWN_NEVER`.
+ # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
+ # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
+ # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
+ # down.
+ #
+ # If the workers are not torn down by the service, they will
+ # continue to run and use Google Compute Engine VM resources in the
+ # user's project until they are explicitly terminated by the user.
+ # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
+ # policy except for small, manually supervised test jobs.
+ #
+ # If unknown or unspecified, the service will attempt to choose a reasonable
+ # default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
+ # execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
# harness, residing in Google Container Registry.
+ "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
+ # the form "regions/REGION/subnetworks/SUBNETWORK".
+ "packages": [ # Packages to be installed on workers.
+ { # The packages that must be installed in order for a worker to run the
+ # steps of the Cloud Dataflow job that will be assigned to its worker
+ # pool.
+ #
+ # This is the mechanism by which the Cloud Dataflow SDK causes code to
+ # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
+ # might use this to install jars containing the user's code and all of the
+ # various dependencies (libraries, data files, etc.) required in order
+ # for that code to run.
+ "location": "A String", # The resource to read the package from. The supported resource type is:
+ #
+ # Google Cloud Storage:
+ #
+ # storage.googleapis.com/{bucket}
+ # bucket.storage.googleapis.com/
+ "name": "A String", # The name of the package.
+ },
+ ],
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ },
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
},
],
},
+ "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
+ # of the job it replaced.
+ #
+ # When sending a `CreateJobRequest`, you can update a job by specifying it
+ # here. The job named here is stopped, and its intermediate state is
+ # transferred to this job.
"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
# A description of the user pipeline and stages through which it is executed.
# Created by Cloud Dataflow service. Only retrieved with
@@ -1982,9 +1982,6 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -1992,6 +1989,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -2002,7 +2000,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -2014,9 +2014,6 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -2024,6 +2021,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -2034,7 +2032,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -2056,19 +2056,19 @@
"outputSource": [ # Output sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"inputSource": [ # Input sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"componentTransform": [ # Transforms that comprise this execution stage.
@@ -2110,26 +2110,16 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
- "currentState": "A String", # The current state of the job.
- #
- # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
- # specified.
- #
- # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
- # terminal state. After a job has reached a terminal state, no
- # further state updates may be made.
- #
- # This field may be mutated by the Cloud Dataflow service;
- # callers cannot mutate it.
+ "location": "A String", # The location that contains this job.
"tempFiles": [ # A set of files the system should be aware of that are used
# for temporary storage. These temporary files will be
# removed on job completion.
@@ -2149,12 +2139,17 @@
#
# This field is set by the Cloud Dataflow service when the Job is
# created, and is immutable for the life of the job.
- "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
- # of the job it replaced.
+ "currentState": "A String", # The current state of the job.
#
- # When sending a `CreateJobRequest`, you can update a job by specifying it
- # here. The job named here is stopped, and its intermediate state is
- # transferred to this job.
+ # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
+ # specified.
+ #
+ # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
+ # terminal state. After a job has reached a terminal state, no
+ # further state updates may be made.
+ #
+ # This field may be mutated by the Cloud Dataflow service;
+ # callers cannot mutate it.
"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
# isn't contained in the submitted job.
"stages": { # A mapping from each stage to the information about that stage.
@@ -2170,6 +2165,11 @@
},
},
],
+ "failedLocation": [ # Zero or more messages describing locations that failed to respond.
+ { # Indicates which location failed to respond to a request for data.
+ "name": "A String", # The name of the failed location.
+ },
+ ],
}</pre>
</div>
@@ -2236,13 +2236,12 @@
# size.
"a_key": "A String",
},
- "location": "A String", # The location that contains this job.
- "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
- # Cloud Dataflow service.
"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
# corresponding name prefixes of the new job.
"a_key": "A String",
},
+ "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
+ # Cloud Dataflow service.
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
@@ -2296,49 +2295,6 @@
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
"diskSourceImage": "A String", # Fully qualified source image for disks.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
- # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
- # `TEARDOWN_NEVER`.
- # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
- # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
- # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
- # down.
- #
- # If the workers are not torn down by the service, they will
- # continue to run and use Google Compute Engine VM resources in the
- # user's project until they are explicitly terminated by the user.
- # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
- # policy except for small, manually supervised test jobs.
- #
- # If unknown or unspecified, the service will attempt to choose a reasonable
- # default.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
- # the form "regions/REGION/subnetworks/SUBNETWORK".
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
- # execute the job. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -2351,24 +2307,17 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "commandlinesFileName": "A String", # The file to store preprocessing commands in.
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "vmId": "A String", # The ID string of the VM.
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
# taskrunner; e.g. "root".
+ "commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "vmId": "A String", # The ID string of the VM.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
# access the Cloud Dataflow API.
"A String",
],
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -2395,16 +2344,14 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
- "harnessCommand": "A String", # The command to launch the worker harness.
- "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
- # temporary storage.
- #
- # The supported resource type is:
- #
- # Google Cloud Storage:
- # storage.googleapis.com/{bucket}/{object}
- # bucket.storage.googleapis.com/{object}
"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
#
# When workers access Google Cloud APIs, they logically do so via
@@ -2414,33 +2361,26 @@
# Locators".
#
# If not specified, the default value is "http://www.googleapis.com/"
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "packages": [ # Packages to be installed on workers.
- { # The packages that must be installed in order for a worker to run the
- # steps of the Cloud Dataflow job that will be assigned to its worker
- # pool.
+ "harnessCommand": "A String", # The command to launch the worker harness.
+ "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
+ # temporary storage.
#
- # This is the mechanism by which the Cloud Dataflow SDK causes code to
- # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
- # might use this to install jars containing the user's code and all of the
- # various dependencies (libraries, data files, etc.) required in order
- # for that code to run.
- "location": "A String", # The resource to read the package from. The supported resource type is:
- #
- # Google Cloud Storage:
- #
- # storage.googleapis.com/{bucket}
- # bucket.storage.googleapis.com/
- "name": "A String", # The name of the package.
- },
- ],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ # The supported resource type is:
+ #
+ # Google Cloud Storage:
+ # storage.googleapis.com/{bucket}/{object}
+ # bucket.storage.googleapis.com/{object}
},
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"dataDisks": [ # Data disks that are used by a VM in this workflow.
{ # Describes the data disk used by a workflow job.
"mountPoint": "A String", # Directory in a VM where disk is mounted.
@@ -2466,13 +2406,78 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
+ # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
+ # `TEARDOWN_NEVER`.
+ # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
+ # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
+ # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
+ # down.
+ #
+ # If the workers are not torn down by the service, they will
+ # continue to run and use Google Compute Engine VM resources in the
+ # user's project until they are explicitly terminated by the user.
+ # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
+ # policy except for small, manually supervised test jobs.
+ #
+ # If unknown or unspecified, the service will attempt to choose a reasonable
+ # default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
+ # execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
# harness, residing in Google Container Registry.
+ "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
+ # the form "regions/REGION/subnetworks/SUBNETWORK".
+ "packages": [ # Packages to be installed on workers.
+ { # The packages that must be installed in order for a worker to run the
+ # steps of the Cloud Dataflow job that will be assigned to its worker
+ # pool.
+ #
+ # This is the mechanism by which the Cloud Dataflow SDK causes code to
+ # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
+ # might use this to install jars containing the user's code and all of the
+ # various dependencies (libraries, data files, etc.) required in order
+ # for that code to run.
+ "location": "A String", # The resource to read the package from. The supported resource type is:
+ #
+ # Google Cloud Storage:
+ #
+ # storage.googleapis.com/{bucket}
+ # bucket.storage.googleapis.com/
+ "name": "A String", # The name of the package.
+ },
+ ],
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ },
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
},
],
},
+ "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
+ # of the job it replaced.
+ #
+ # When sending a `CreateJobRequest`, you can update a job by specifying it
+ # here. The job named here is stopped, and its intermediate state is
+ # transferred to this job.
"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
# A description of the user pipeline and stages through which it is executed.
# Created by Cloud Dataflow service. Only retrieved with
@@ -2488,9 +2493,6 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -2498,6 +2500,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -2508,7 +2511,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -2520,9 +2525,6 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -2530,6 +2532,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -2540,7 +2543,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -2562,19 +2567,19 @@
"outputSource": [ # Output sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"inputSource": [ # Input sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"componentTransform": [ # Transforms that comprise this execution stage.
@@ -2616,26 +2621,16 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
- "currentState": "A String", # The current state of the job.
- #
- # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
- # specified.
- #
- # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
- # terminal state. After a job has reached a terminal state, no
- # further state updates may be made.
- #
- # This field may be mutated by the Cloud Dataflow service;
- # callers cannot mutate it.
+ "location": "A String", # The location that contains this job.
"tempFiles": [ # A set of files the system should be aware of that are used
# for temporary storage. These temporary files will be
# removed on job completion.
@@ -2655,12 +2650,17 @@
#
# This field is set by the Cloud Dataflow service when the Job is
# created, and is immutable for the life of the job.
- "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
- # of the job it replaced.
+ "currentState": "A String", # The current state of the job.
#
- # When sending a `CreateJobRequest`, you can update a job by specifying it
- # here. The job named here is stopped, and its intermediate state is
- # transferred to this job.
+ # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
+ # specified.
+ #
+ # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
+ # terminal state. After a job has reached a terminal state, no
+ # further state updates may be made.
+ #
+ # This field may be mutated by the Cloud Dataflow service;
+ # callers cannot mutate it.
"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
# isn't contained in the submitted job.
"stages": { # A mapping from each stage to the information about that stage.
@@ -2724,13 +2724,12 @@
# size.
"a_key": "A String",
},
- "location": "A String", # The location that contains this job.
- "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
- # Cloud Dataflow service.
"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
# corresponding name prefixes of the new job.
"a_key": "A String",
},
+ "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
+ # Cloud Dataflow service.
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
"version": { # A structure describing which components and their versions of the service
# are required in order to run the job.
@@ -2784,49 +2783,6 @@
# multiple pools, in order to match the various computational
# requirements of the various stages of the job.
"diskSourceImage": "A String", # Fully qualified source image for disks.
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
- # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
- # `TEARDOWN_NEVER`.
- # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
- # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
- # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
- # down.
- #
- # If the workers are not torn down by the service, they will
- # continue to run and use Google Compute Engine VM resources in the
- # user's project until they are explicitly terminated by the user.
- # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
- # policy except for small, manually supervised test jobs.
- #
- # If unknown or unspecified, the service will attempt to choose a reasonable
- # default.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
- # the form "regions/REGION/subnetworks/SUBNETWORK".
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
- # execute the job. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
# using the standard Dataflow task runner. Users should ignore
# this field.
@@ -2839,24 +2795,17 @@
# Google Cloud Storage:
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
- "commandlinesFileName": "A String", # The file to store preprocessing commands in.
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "vmId": "A String", # The ID string of the VM.
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
# taskrunner; e.g. "root".
+ "commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "vmId": "A String", # The ID string of the VM.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
# access the Cloud Dataflow API.
"A String",
],
- "languageHint": "A String", # The suggested backend language.
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "logDir": "A String", # The directory on the VM to store logs.
"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
"reportingEnabled": True or False, # Whether to send work progress updates to the service.
"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
@@ -2883,16 +2832,14 @@
# storage.googleapis.com/{bucket}/{object}
# bucket.storage.googleapis.com/{object}
},
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "languageHint": "A String", # The suggested backend language.
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "logDir": "A String", # The directory on the VM to store logs.
"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
- "harnessCommand": "A String", # The command to launch the worker harness.
- "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
- # temporary storage.
- #
- # The supported resource type is:
- #
- # Google Cloud Storage:
- # storage.googleapis.com/{bucket}/{object}
- # bucket.storage.googleapis.com/{object}
"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
#
# When workers access Google Cloud APIs, they logically do so via
@@ -2902,33 +2849,26 @@
# Locators".
#
# If not specified, the default value is "http://www.googleapis.com/"
- },
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "packages": [ # Packages to be installed on workers.
- { # The packages that must be installed in order for a worker to run the
- # steps of the Cloud Dataflow job that will be assigned to its worker
- # pool.
+ "harnessCommand": "A String", # The command to launch the worker harness.
+ "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
+ # temporary storage.
#
- # This is the mechanism by which the Cloud Dataflow SDK causes code to
- # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
- # might use this to install jars containing the user's code and all of the
- # various dependencies (libraries, data files, etc.) required in order
- # for that code to run.
- "location": "A String", # The resource to read the package from. The supported resource type is:
- #
- # Google Cloud Storage:
- #
- # storage.googleapis.com/{bucket}
- # bucket.storage.googleapis.com/
- "name": "A String", # The name of the package.
- },
- ],
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ # The supported resource type is:
+ #
+ # Google Cloud Storage:
+ # storage.googleapis.com/{bucket}/{object}
+ # bucket.storage.googleapis.com/{object}
},
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
"dataDisks": [ # Data disks that are used by a VM in this workflow.
{ # Describes the data disk used by a workflow job.
"mountPoint": "A String", # Directory in a VM where disk is mounted.
@@ -2954,13 +2894,78 @@
# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
},
],
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
+ # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
+ # `TEARDOWN_NEVER`.
+ # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
+ # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
+ # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
+ # down.
+ #
+ # If the workers are not torn down by the service, they will
+ # continue to run and use Google Compute Engine VM resources in the
+ # user's project until they are explicitly terminated by the user.
+ # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
+ # policy except for small, manually supervised test jobs.
+ #
+ # If unknown or unspecified, the service will attempt to choose a reasonable
+ # default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
+ # execute the job. If zero or unspecified, the service will
# attempt to choose a reasonable default.
"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
# harness, residing in Google Container Registry.
+ "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
+ # the form "regions/REGION/subnetworks/SUBNETWORK".
+ "packages": [ # Packages to be installed on workers.
+ { # The packages that must be installed in order for a worker to run the
+ # steps of the Cloud Dataflow job that will be assigned to its worker
+ # pool.
+ #
+ # This is the mechanism by which the Cloud Dataflow SDK causes code to
+ # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
+ # might use this to install jars containing the user's code and all of the
+ # various dependencies (libraries, data files, etc.) required in order
+ # for that code to run.
+ "location": "A String", # The resource to read the package from. The supported resource type is:
+ #
+ # Google Cloud Storage:
+ #
+ # storage.googleapis.com/{bucket}
+ # bucket.storage.googleapis.com/
+ "name": "A String", # The name of the package.
+ },
+ ],
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ },
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
},
],
},
+ "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
+ # of the job it replaced.
+ #
+ # When sending a `CreateJobRequest`, you can update a job by specifying it
+ # here. The job named here is stopped, and its intermediate state is
+ # transferred to this job.
"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
# A description of the user pipeline and stages through which it is executed.
# Created by Cloud Dataflow service. Only retrieved with
@@ -2976,9 +2981,6 @@
],
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -2986,6 +2988,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -2996,7 +2999,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -3008,9 +3013,6 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "key": "A String", # The key identifying the display data.
- # This is intended to be used as a label for the display data
- # when viewed in a dax monitoring system.
"shortStrValue": "A String", # A possible additional shorter value to display.
# For example a java_class_name_value of com.mypackage.MyDoFn
# will be stored with MyDoFn as the short_str_value and
@@ -3018,6 +3020,7 @@
# short_str_value can be displayed and java_class_name_value
# will be displayed as a tooltip.
"timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "durationValue": "A String", # Contains value if the data is of duration type.
"url": "A String", # An optional full URL.
"floatValue": 3.14, # Contains value if the data is of float type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
@@ -3028,7 +3031,9 @@
"label": "A String", # An optional label to display in a dax UI for the element.
"boolValue": True or False, # Contains value if the data is of a boolean type.
"strValue": "A String", # Contains value if the data is of string type.
- "durationValue": "A String", # Contains value if the data is of duration type.
+ "key": "A String", # The key identifying the display data.
+ # This is intended to be used as a label for the display data
+ # when viewed in a dax monitoring system.
"int64Value": "A String", # Contains value if the data is of int64 type.
},
],
@@ -3050,19 +3055,19 @@
"outputSource": [ # Output sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"inputSource": [ # Input sources for this stage.
{ # Description of an input or output of an execution stage.
"userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
# source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
},
],
"componentTransform": [ # Transforms that comprise this execution stage.
@@ -3104,26 +3109,16 @@
# Note that the Cloud Dataflow service may be used to run many different
# types of jobs, not just Map-Reduce.
"kind": "A String", # The kind of step in the Cloud Dataflow job.
+ "name": "A String", # The name that identifies the step. This must be unique for each
+ # step with respect to all other steps in the Cloud Dataflow job.
"properties": { # Named properties associated with the step. Each kind of
# predefined step has its own required set of properties.
# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
"a_key": "", # Properties of the object.
},
- "name": "A String", # The name that identifies the step. This must be unique for each
- # step with respect to all other steps in the Cloud Dataflow job.
},
],
- "currentState": "A String", # The current state of the job.
- #
- # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
- # specified.
- #
- # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
- # terminal state. After a job has reached a terminal state, no
- # further state updates may be made.
- #
- # This field may be mutated by the Cloud Dataflow service;
- # callers cannot mutate it.
+ "location": "A String", # The location that contains this job.
"tempFiles": [ # A set of files the system should be aware of that are used
# for temporary storage. These temporary files will be
# removed on job completion.
@@ -3143,12 +3138,17 @@
#
# This field is set by the Cloud Dataflow service when the Job is
# created, and is immutable for the life of the job.
- "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
- # of the job it replaced.
+ "currentState": "A String", # The current state of the job.
#
- # When sending a `CreateJobRequest`, you can update a job by specifying it
- # here. The job named here is stopped, and its intermediate state is
- # transferred to this job.
+ # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
+ # specified.
+ #
+ # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
+ # terminal state. After a job has reached a terminal state, no
+ # further state updates may be made.
+ #
+ # This field may be mutated by the Cloud Dataflow service;
+ # callers cannot mutate it.
"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
# isn't contained in the submitted job.
"stages": { # A mapping from each stage to the information about that stage.