blob: 8055b70d955bae142f689c08c5c1c10ef31aa618 [file] [log] [blame]
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070075<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.templates.html">templates</a></h1>
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070076<h2>Instance Methods</h2>
77<p class="toc_element">
Dan O'Mearadd494642020-05-01 07:42:23 -070078 <code><a href="#create">create(projectId, body=None, x__xgafv=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040079<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
Bu Sun Kim65020912020-05-20 12:08:20 -070081 <code><a href="#get">get(projectId, view=None, gcsPath=None, location=None, x__xgafv=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040082<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
Bu Sun Kim65020912020-05-20 12:08:20 -070084 <code><a href="#launch">launch(projectId, body=None, validateOnly=None, gcsPath=None, location=None, dynamicTemplate_gcsPath=None, dynamicTemplate_stagingLocation=None, x__xgafv=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040085<p class="firstline">Launch a template.</p>
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070086<h3>Method Details</h3>
87<div class="method">
Dan O'Mearadd494642020-05-01 07:42:23 -070088 <code class="details" id="create">create(projectId, body=None, x__xgafv=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040089 <pre>Creates a Cloud Dataflow job from a template.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070090
91Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040092 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
Dan O'Mearadd494642020-05-01 07:42:23 -070093 body: object, The request body.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070094 The object takes the form of:
95
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040096{ # A request to create a Cloud Dataflow job from a template.
Bu Sun Kim65020912020-05-20 12:08:20 -070097 &quot;environment&quot;: { # The environment values to set at runtime. # The runtime environment for the job.
98 &quot;workerRegion&quot;: &quot;A String&quot;, # The Compute Engine region
Dan O'Mearadd494642020-05-01 07:42:23 -070099 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
Bu Sun Kim65020912020-05-20 12:08:20 -0700100 # which worker processing should occur, e.g. &quot;us-west1&quot;. Mutually exclusive
Dan O'Mearadd494642020-05-01 07:42:23 -0700101 # with worker_zone. If neither worker_region nor worker_zone is specified,
Bu Sun Kim65020912020-05-20 12:08:20 -0700102 # default to the control plane&#x27;s region.
103 &quot;numWorkers&quot;: 42, # The initial number of Google Compute Engine instnaces for the job.
104 &quot;zone&quot;: &quot;A String&quot;, # The Compute Engine [availability
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400105 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400106 # for launching worker instances to run your pipeline.
Dan O'Mearadd494642020-05-01 07:42:23 -0700107 # In the future, worker_zone will take precedence.
Bu Sun Kim65020912020-05-20 12:08:20 -0700108 &quot;workerZone&quot;: &quot;A String&quot;, # The Compute Engine zone
109 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
110 # which worker processing should occur, e.g. &quot;us-west1-a&quot;. Mutually exclusive
111 # with worker_region. If neither worker_region nor worker_zone is specified,
112 # a zone in the control plane&#x27;s region is chosen based on available capacity.
113 # If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.
114 &quot;additionalUserLabels&quot;: { # Additional user labels to be specified for the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700115 # Keys and values should follow the restrictions specified in the [labeling
116 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
117 # page.
Bu Sun Kim65020912020-05-20 12:08:20 -0700118 &quot;a_key&quot;: &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700119 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700120 &quot;additionalExperiments&quot;: [ # Additional experiment flags for the job.
121 &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700122 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700123 &quot;maxWorkers&quot;: 42, # The maximum number of Google Compute Engine instances to be made
124 # available to your pipeline during execution, from 1 to 1000.
125 &quot;serviceAccountEmail&quot;: &quot;A String&quot;, # The email address of the service account to run the job as.
126 &quot;machineType&quot;: &quot;A String&quot;, # The machine type to use for the job. Defaults to the value from the
127 # template if not specified.
128 &quot;subnetwork&quot;: &quot;A String&quot;, # Subnetwork to which VMs will be assigned, if desired. Expected to be of
129 # the form &quot;regions/REGION/subnetworks/SUBNETWORK&quot;.
130 &quot;ipConfiguration&quot;: &quot;A String&quot;, # Configuration for VM IPs.
131 &quot;kmsKeyName&quot;: &quot;A String&quot;, # Optional. Name for the Cloud KMS key for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700132 # Key format is:
133 # projects/&lt;project&gt;/locations/&lt;location&gt;/keyRings/&lt;keyring&gt;/cryptoKeys/&lt;key&gt;
Bu Sun Kim65020912020-05-20 12:08:20 -0700134 &quot;bypassTempDirValidation&quot;: True or False, # Whether to bypass the safety checks for the job&#x27;s temporary directory.
135 # Use with caution.
136 &quot;tempLocation&quot;: &quot;A String&quot;, # The Cloud Storage path to use for temporary files.
137 # Must be a valid Cloud Storage URL, beginning with `gs://`.
138 &quot;network&quot;: &quot;A String&quot;, # Network to which VMs will be assigned. If empty or unspecified,
139 # the service will use the network &quot;default&quot;.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800140 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700141 &quot;location&quot;: &quot;A String&quot;, # The [regional endpoint]
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700142 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
143 # which to direct the request.
Bu Sun Kim65020912020-05-20 12:08:20 -0700144 &quot;parameters&quot;: { # The runtime parameters to pass to the job.
145 &quot;a_key&quot;: &quot;A String&quot;,
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700146 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700147 &quot;jobName&quot;: &quot;A String&quot;, # Required. The job name to use for the created job.
148 &quot;gcsPath&quot;: &quot;A String&quot;, # Required. A Cloud Storage path to the template from which to
149 # create the job.
150 # Must be a valid Cloud Storage URL, beginning with `gs://`.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700151 }
152
153 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400154 Allowed values
155 1 - v1 error format
156 2 - v2 error format
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700157
158Returns:
159 An object of the form:
160
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400161 { # Defines a job to be run by the Cloud Dataflow service.
Bu Sun Kim65020912020-05-20 12:08:20 -0700162 &quot;clientRequestId&quot;: &quot;A String&quot;, # The client&#x27;s unique identifier of the job, re-used across retried attempts.
163 # If this field is set, the service will ensure its uniqueness.
164 # The request to create a job will fail if the service has knowledge of a
165 # previously submitted job with the same client&#x27;s ID and job name.
166 # The caller may use this field to ensure idempotence of job
167 # creation across retried attempts to create a job.
168 # By default, the field is empty and, in that case, the service ignores it.
169 &quot;id&quot;: &quot;A String&quot;, # The unique ID of this job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700170 #
171 # This field is set by the Cloud Dataflow service when the Job is
172 # created, and is immutable for the life of the job.
Bu Sun Kim65020912020-05-20 12:08:20 -0700173 &quot;currentStateTime&quot;: &quot;A String&quot;, # The timestamp associated with the current state.
174 &quot;transformNameMapping&quot;: { # The map of transform name prefixes of the job to be replaced to the
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700175 # corresponding name prefixes of the new job.
Bu Sun Kim65020912020-05-20 12:08:20 -0700176 &quot;a_key&quot;: &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700177 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700178 &quot;environment&quot;: { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
179 &quot;sdkPipelineOptions&quot;: { # The Cloud Dataflow SDK pipeline options specified by the user. These
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700180 # options are passed through the service and are used to recreate the
181 # SDK pipeline options on the worker in a language agnostic and platform
182 # independent way.
Bu Sun Kim65020912020-05-20 12:08:20 -0700183 &quot;a_key&quot;: &quot;&quot;, # Properties of the object.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700184 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700185 &quot;flexResourceSchedulingGoal&quot;: &quot;A String&quot;, # Which Flexible Resource Scheduling mode to run in.
186 &quot;workerPools&quot;: [ # The worker pools. At least one &quot;harness&quot; worker pool must be
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700187 # specified in order for the job to have workers.
188 { # Describes one particular pool of Cloud Dataflow workers to be
189 # instantiated by the Cloud Dataflow service in order to perform the
190 # computations required by a job. Note that a workflow job may use
191 # multiple pools, in order to match the various computational
192 # requirements of the various stages of the job.
Bu Sun Kim65020912020-05-20 12:08:20 -0700193 &quot;defaultPackageSet&quot;: &quot;A String&quot;, # The default package set to install. This allows the service to
194 # select a default set of packages which are useful to worker
195 # harnesses written in a particular language.
196 &quot;network&quot;: &quot;A String&quot;, # Network to which VMs will be assigned. If empty or unspecified,
197 # the service will use the network &quot;default&quot;.
198 &quot;zone&quot;: &quot;A String&quot;, # Zone to run the worker pools in. If empty or unspecified, the service
Dan O'Mearadd494642020-05-01 07:42:23 -0700199 # will attempt to choose a reasonable default.
Bu Sun Kim65020912020-05-20 12:08:20 -0700200 &quot;numWorkers&quot;: 42, # Number of Google Compute Engine workers in this pool needed to
201 # execute the job. If zero or unspecified, the service will
202 # attempt to choose a reasonable default.
203 &quot;numThreadsPerWorker&quot;: 42, # The number of threads per worker harness. If empty or unspecified, the
Dan O'Mearadd494642020-05-01 07:42:23 -0700204 # service will choose a number of threads (according to the number of cores
205 # on the selected machine type for batch, or 1 by convention for streaming).
Bu Sun Kim65020912020-05-20 12:08:20 -0700206 &quot;diskSourceImage&quot;: &quot;A String&quot;, # Fully qualified source image for disks.
207 &quot;packages&quot;: [ # Packages to be installed on workers.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700208 { # The packages that must be installed in order for a worker to run the
209 # steps of the Cloud Dataflow job that will be assigned to its worker
210 # pool.
211 #
212 # This is the mechanism by which the Cloud Dataflow SDK causes code to
213 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
Bu Sun Kim65020912020-05-20 12:08:20 -0700214 # might use this to install jars containing the user&#x27;s code and all of the
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700215 # various dependencies (libraries, data files, etc.) required in order
216 # for that code to run.
Bu Sun Kim65020912020-05-20 12:08:20 -0700217 &quot;location&quot;: &quot;A String&quot;, # The resource to read the package from. The supported resource type is:
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700218 #
219 # Google Cloud Storage:
220 #
221 # storage.googleapis.com/{bucket}
222 # bucket.storage.googleapis.com/
Bu Sun Kim65020912020-05-20 12:08:20 -0700223 &quot;name&quot;: &quot;A String&quot;, # The name of the package.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700224 },
225 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700226 &quot;teardownPolicy&quot;: &quot;A String&quot;, # Sets the policy for determining when to turndown worker pool.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700227 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
228 # `TEARDOWN_NEVER`.
229 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
230 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
231 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
232 # down.
233 #
234 # If the workers are not torn down by the service, they will
235 # continue to run and use Google Compute Engine VM resources in the
Bu Sun Kim65020912020-05-20 12:08:20 -0700236 # user&#x27;s project until they are explicitly terminated by the user.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700237 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
238 # policy except for small, manually supervised test jobs.
239 #
240 # If unknown or unspecified, the service will attempt to choose a reasonable
241 # default.
Bu Sun Kim65020912020-05-20 12:08:20 -0700242 &quot;onHostMaintenance&quot;: &quot;A String&quot;, # The action to take on host maintenance, as defined by the Google
243 # Compute Engine API.
244 &quot;poolArgs&quot;: { # Extra arguments for this worker pool.
245 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
246 },
247 &quot;diskSizeGb&quot;: 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
Dan O'Mearadd494642020-05-01 07:42:23 -0700248 # attempt to choose a reasonable default.
Bu Sun Kim65020912020-05-20 12:08:20 -0700249 &quot;workerHarnessContainerImage&quot;: &quot;A String&quot;, # Required. Docker container image that executes the Cloud Dataflow worker
250 # harness, residing in Google Container Registry.
251 #
252 # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
253 &quot;diskType&quot;: &quot;A String&quot;, # Type of root disk for VMs. If empty or unspecified, the service will
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700254 # attempt to choose a reasonable default.
Bu Sun Kim65020912020-05-20 12:08:20 -0700255 &quot;machineType&quot;: &quot;A String&quot;, # Machine type (e.g. &quot;n1-standard-1&quot;). If empty or unspecified, the
256 # service will attempt to choose a reasonable default.
257 &quot;kind&quot;: &quot;A String&quot;, # The kind of the worker pool; currently only `harness` and `shuffle`
258 # are supported.
259 &quot;dataDisks&quot;: [ # Data disks that are used by a VM in this workflow.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700260 { # Describes the data disk used by a workflow job.
Bu Sun Kim65020912020-05-20 12:08:20 -0700261 &quot;sizeGb&quot;: 42, # Size of disk in GB. If zero or unspecified, the service will
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700262 # attempt to choose a reasonable default.
Bu Sun Kim65020912020-05-20 12:08:20 -0700263 &quot;diskType&quot;: &quot;A String&quot;, # Disk storage type, as defined by Google Compute Engine. This
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700264 # must be a disk type appropriate to the project and zone in which
265 # the workers will run. If unknown or unspecified, the service
266 # will attempt to choose a reasonable default.
267 #
268 # For example, the standard persistent disk type is a resource name
Bu Sun Kim65020912020-05-20 12:08:20 -0700269 # typically ending in &quot;pd-standard&quot;. If SSD persistent disks are
270 # available, the resource name typically ends with &quot;pd-ssd&quot;. The
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700271 # actual valid values are defined the Google Compute Engine API,
272 # not by the Cloud Dataflow API; consult the Google Compute Engine
273 # documentation for more information about determining the set of
274 # available disk types for a particular project and zone.
275 #
276 # Google Compute Engine Disk types are local to a particular
277 # project in a particular zone, and so the resource name will
278 # typically look something like this:
279 #
280 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Bu Sun Kim65020912020-05-20 12:08:20 -0700281 &quot;mountPoint&quot;: &quot;A String&quot;, # Directory in a VM where disk is mounted.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700282 },
283 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700284 &quot;sdkHarnessContainerImages&quot;: [ # Set of SDK harness containers needed to execute this pipeline. This will
Dan O'Mearadd494642020-05-01 07:42:23 -0700285 # only be set in the Fn API path. For non-cross-language pipelines this
286 # should have only one entry. Cross-language pipelines will have two or more
287 # entries.
288 { # Defines a SDK harness container for executing Dataflow pipelines.
Bu Sun Kim65020912020-05-20 12:08:20 -0700289 &quot;containerImage&quot;: &quot;A String&quot;, # A docker container image that resides in Google Container Registry.
290 &quot;useSingleCorePerContainer&quot;: True or False, # If true, recommends the Dataflow service to use only one core per SDK
Dan O'Mearadd494642020-05-01 07:42:23 -0700291 # container instance with this image. If false (or unset) recommends using
292 # more than one core per SDK container instance with this image for
293 # efficiency. Note that Dataflow service may choose to override this property
294 # if needed.
295 },
296 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700297 &quot;subnetwork&quot;: &quot;A String&quot;, # Subnetwork to which VMs will be assigned, if desired. Expected to be of
298 # the form &quot;regions/REGION/subnetworks/SUBNETWORK&quot;.
299 &quot;ipConfiguration&quot;: &quot;A String&quot;, # Configuration for VM IPs.
300 &quot;taskrunnerSettings&quot;: { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
301 # using the standard Dataflow task runner. Users should ignore
302 # this field.
303 &quot;alsologtostderr&quot;: True or False, # Whether to also send taskrunner log info to stderr.
304 &quot;taskGroup&quot;: &quot;A String&quot;, # The UNIX group ID on the worker VM to use for tasks launched by
305 # taskrunner; e.g. &quot;wheel&quot;.
306 &quot;harnessCommand&quot;: &quot;A String&quot;, # The command to launch the worker harness.
307 &quot;logDir&quot;: &quot;A String&quot;, # The directory on the VM to store logs.
308 &quot;oauthScopes&quot;: [ # The OAuth2 scopes to be requested by the taskrunner in order to
309 # access the Cloud Dataflow API.
310 &quot;A String&quot;,
311 ],
312 &quot;dataflowApiVersion&quot;: &quot;A String&quot;, # The API version of endpoint, e.g. &quot;v1b3&quot;
313 &quot;logUploadLocation&quot;: &quot;A String&quot;, # Indicates where to put logs. If this is not specified, the logs
314 # will not be uploaded.
315 #
316 # The supported resource type is:
317 #
318 # Google Cloud Storage:
319 # storage.googleapis.com/{bucket}/{object}
320 # bucket.storage.googleapis.com/{object}
321 &quot;streamingWorkerMainClass&quot;: &quot;A String&quot;, # The streaming worker main class name.
322 &quot;workflowFileName&quot;: &quot;A String&quot;, # The file to store the workflow in.
323 &quot;baseTaskDir&quot;: &quot;A String&quot;, # The location on the worker for task-specific subdirectories.
324 &quot;tempStoragePrefix&quot;: &quot;A String&quot;, # The prefix of the resources the taskrunner should use for
325 # temporary storage.
326 #
327 # The supported resource type is:
328 #
329 # Google Cloud Storage:
330 # storage.googleapis.com/{bucket}/{object}
331 # bucket.storage.googleapis.com/{object}
332 &quot;commandlinesFileName&quot;: &quot;A String&quot;, # The file to store preprocessing commands in.
333 &quot;languageHint&quot;: &quot;A String&quot;, # The suggested backend language.
334 &quot;baseUrl&quot;: &quot;A String&quot;, # The base URL for the taskrunner to use when accessing Google Cloud APIs.
335 #
336 # When workers access Google Cloud APIs, they logically do so via
337 # relative URLs. If this field is specified, it supplies the base
338 # URL to use for resolving these relative URLs. The normative
339 # algorithm used is defined by RFC 1808, &quot;Relative Uniform Resource
340 # Locators&quot;.
341 #
342 # If not specified, the default value is &quot;http://www.googleapis.com/&quot;
343 &quot;logToSerialconsole&quot;: True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
344 # console.
345 &quot;continueOnException&quot;: True or False, # Whether to continue taskrunner if an exception is hit.
346 &quot;parallelWorkerSettings&quot;: { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
347 &quot;baseUrl&quot;: &quot;A String&quot;, # The base URL for accessing Google Cloud APIs.
348 #
349 # When workers access Google Cloud APIs, they logically do so via
350 # relative URLs. If this field is specified, it supplies the base
351 # URL to use for resolving these relative URLs. The normative
352 # algorithm used is defined by RFC 1808, &quot;Relative Uniform Resource
353 # Locators&quot;.
354 #
355 # If not specified, the default value is &quot;http://www.googleapis.com/&quot;
356 &quot;reportingEnabled&quot;: True or False, # Whether to send work progress updates to the service.
357 &quot;servicePath&quot;: &quot;A String&quot;, # The Cloud Dataflow service path relative to the root URL, for example,
358 # &quot;dataflow/v1b3/projects&quot;.
359 &quot;shuffleServicePath&quot;: &quot;A String&quot;, # The Shuffle service path relative to the root URL, for example,
360 # &quot;shuffle/v1beta1&quot;.
361 &quot;workerId&quot;: &quot;A String&quot;, # The ID of the worker running this pipeline.
362 &quot;tempStoragePrefix&quot;: &quot;A String&quot;, # The prefix of the resources the system should use for temporary
363 # storage.
364 #
365 # The supported resource type is:
366 #
367 # Google Cloud Storage:
368 #
369 # storage.googleapis.com/{bucket}/{object}
370 # bucket.storage.googleapis.com/{object}
371 },
372 &quot;vmId&quot;: &quot;A String&quot;, # The ID string of the VM.
373 &quot;taskUser&quot;: &quot;A String&quot;, # The UNIX user ID on the worker VM to use for tasks launched by
374 # taskrunner; e.g. &quot;root&quot;.
375 },
376 &quot;autoscalingSettings&quot;: { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
377 &quot;maxNumWorkers&quot;: 42, # The maximum number of workers to cap scaling at.
378 &quot;algorithm&quot;: &quot;A String&quot;, # The algorithm to use for autoscaling.
379 },
380 &quot;metadata&quot;: { # Metadata to set on the Google Compute Engine VMs.
381 &quot;a_key&quot;: &quot;A String&quot;,
382 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700383 },
384 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700385 &quot;dataset&quot;: &quot;A String&quot;, # The dataset for the current project where various workflow
386 # related tables are stored.
387 #
388 # The supported resource type is:
389 #
390 # Google BigQuery:
391 # bigquery.googleapis.com/{dataset}
392 &quot;internalExperiments&quot;: { # Experimental settings.
393 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
394 },
395 &quot;workerRegion&quot;: &quot;A String&quot;, # The Compute Engine region
396 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
397 # which worker processing should occur, e.g. &quot;us-west1&quot;. Mutually exclusive
398 # with worker_zone. If neither worker_region nor worker_zone is specified,
399 # default to the control plane&#x27;s region.
400 &quot;serviceKmsKeyName&quot;: &quot;A String&quot;, # If set, contains the Cloud KMS key identifier used to encrypt data
401 # at rest, AKA a Customer Managed Encryption Key (CMEK).
402 #
403 # Format:
404 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
405 &quot;userAgent&quot;: { # A description of the process that generated the request.
406 &quot;a_key&quot;: &quot;&quot;, # Properties of the object.
407 },
408 &quot;workerZone&quot;: &quot;A String&quot;, # The Compute Engine zone
409 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
410 # which worker processing should occur, e.g. &quot;us-west1-a&quot;. Mutually exclusive
411 # with worker_region. If neither worker_region nor worker_zone is specified,
412 # a zone in the control plane&#x27;s region is chosen based on available capacity.
413 &quot;clusterManagerApiService&quot;: &quot;A String&quot;, # The type of cluster manager API to use. If unknown or
Dan O'Mearadd494642020-05-01 07:42:23 -0700414 # unspecified, the service will attempt to choose a reasonable
415 # default. This should be in the form of the API service name,
Bu Sun Kim65020912020-05-20 12:08:20 -0700416 # e.g. &quot;compute.googleapis.com&quot;.
417 &quot;tempStoragePrefix&quot;: &quot;A String&quot;, # The prefix of the resources the system should use for temporary
418 # storage. The system will append the suffix &quot;/temp-{JOBNAME} to
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700419 # this resource prefix, where {JOBNAME} is the value of the
420 # job_name field. The resulting bucket and object prefix is used
421 # as the prefix of the resources used to store temporary data
422 # needed during the job execution. NOTE: This will override the
423 # value in taskrunner_settings.
424 # The supported resource type is:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400425 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700426 # Google Cloud Storage:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400427 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700428 # storage.googleapis.com/{bucket}/{object}
429 # bucket.storage.googleapis.com/{object}
Bu Sun Kim65020912020-05-20 12:08:20 -0700430 &quot;experiments&quot;: [ # The list of experiments to enable.
431 &quot;A String&quot;,
432 ],
433 &quot;version&quot;: { # A structure describing which components and their versions of the service
434 # are required in order to run the job.
435 &quot;a_key&quot;: &quot;&quot;, # Properties of the object.
436 },
437 &quot;serviceAccountEmail&quot;: &quot;A String&quot;, # Identity to run virtual machines as. Defaults to the default account.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700438 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700439 &quot;stageStates&quot;: [ # This field may be mutated by the Cloud Dataflow service;
440 # callers cannot mutate it.
441 { # A message describing the state of a particular execution stage.
442 &quot;executionStageName&quot;: &quot;A String&quot;, # The name of the execution stage.
443 &quot;currentStateTime&quot;: &quot;A String&quot;, # The time at which the stage transitioned to this state.
444 &quot;executionStageState&quot;: &quot;A String&quot;, # Executions stage states allow the same set of values as JobState.
445 },
446 ],
447 &quot;jobMetadata&quot;: { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
448 # by the metadata values provided here. Populated for ListJobs and all GetJob
449 # views SUMMARY and higher.
450 # ListJob response and Job SUMMARY view.
451 &quot;bigTableDetails&quot;: [ # Identification of a BigTable source used in the Dataflow job.
452 { # Metadata for a BigTable connector used by the job.
453 &quot;tableId&quot;: &quot;A String&quot;, # TableId accessed in the connection.
454 &quot;projectId&quot;: &quot;A String&quot;, # ProjectId accessed in the connection.
455 &quot;instanceId&quot;: &quot;A String&quot;, # InstanceId accessed in the connection.
456 },
457 ],
458 &quot;spannerDetails&quot;: [ # Identification of a Spanner source used in the Dataflow job.
459 { # Metadata for a Spanner connector used by the job.
460 &quot;databaseId&quot;: &quot;A String&quot;, # DatabaseId accessed in the connection.
461 &quot;instanceId&quot;: &quot;A String&quot;, # InstanceId accessed in the connection.
462 &quot;projectId&quot;: &quot;A String&quot;, # ProjectId accessed in the connection.
463 },
464 ],
465 &quot;datastoreDetails&quot;: [ # Identification of a Datastore source used in the Dataflow job.
466 { # Metadata for a Datastore connector used by the job.
467 &quot;projectId&quot;: &quot;A String&quot;, # ProjectId accessed in the connection.
468 &quot;namespace&quot;: &quot;A String&quot;, # Namespace used in the connection.
469 },
470 ],
471 &quot;sdkVersion&quot;: { # The version of the SDK used to run the job. # The SDK version used to run the job.
472 &quot;versionDisplayName&quot;: &quot;A String&quot;, # A readable string describing the version of the SDK.
473 &quot;sdkSupportStatus&quot;: &quot;A String&quot;, # The support status for this SDK version.
474 &quot;version&quot;: &quot;A String&quot;, # The version of the SDK used to run the job.
475 },
476 &quot;bigqueryDetails&quot;: [ # Identification of a BigQuery source used in the Dataflow job.
477 { # Metadata for a BigQuery connector used by the job.
478 &quot;table&quot;: &quot;A String&quot;, # Table accessed in the connection.
479 &quot;dataset&quot;: &quot;A String&quot;, # Dataset accessed in the connection.
480 &quot;projectId&quot;: &quot;A String&quot;, # Project accessed in the connection.
481 &quot;query&quot;: &quot;A String&quot;, # Query used to access data in the connection.
482 },
483 ],
484 &quot;fileDetails&quot;: [ # Identification of a File source used in the Dataflow job.
485 { # Metadata for a File connector used by the job.
486 &quot;filePattern&quot;: &quot;A String&quot;, # File Pattern used to access files by the connector.
487 },
488 ],
489 &quot;pubsubDetails&quot;: [ # Identification of a PubSub source used in the Dataflow job.
490 { # Metadata for a PubSub connector used by the job.
491 &quot;subscription&quot;: &quot;A String&quot;, # Subscription used in the connection.
492 &quot;topic&quot;: &quot;A String&quot;, # Topic accessed in the connection.
493 },
494 ],
495 },
496 &quot;createdFromSnapshotId&quot;: &quot;A String&quot;, # If this is specified, the job&#x27;s initial state is populated from the given
497 # snapshot.
498 &quot;projectId&quot;: &quot;A String&quot;, # The ID of the Cloud Platform project that the job belongs to.
499 &quot;type&quot;: &quot;A String&quot;, # The type of Cloud Dataflow job.
500 &quot;pipelineDescription&quot;: { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
501 # A description of the user pipeline and stages through which it is executed.
502 # Created by Cloud Dataflow service. Only retrieved with
503 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
504 # form. This data is provided by the Dataflow service for ease of visualizing
505 # the pipeline and interpreting Dataflow provided metrics.
506 &quot;executionPipelineStage&quot;: [ # Description of each stage of execution of the pipeline.
507 { # Description of the composing transforms, names/ids, and input/outputs of a
508 # stage of execution. Some composing transforms and sources may have been
509 # generated by the Dataflow service during execution planning.
510 &quot;id&quot;: &quot;A String&quot;, # Dataflow service generated id for this stage.
511 &quot;componentTransform&quot;: [ # Transforms that comprise this execution stage.
512 { # Description of a transform executed as part of an execution stage.
513 &quot;originalTransform&quot;: &quot;A String&quot;, # User name for the original user transform with which this transform is
514 # most closely associated.
515 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this source.
516 &quot;userName&quot;: &quot;A String&quot;, # Human-readable name for this transform; may be user or system generated.
517 },
518 ],
519 &quot;componentSource&quot;: [ # Collections produced and consumed by component transforms of this stage.
520 { # Description of an interstitial value between transforms in an execution
521 # stage.
522 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this source.
523 &quot;userName&quot;: &quot;A String&quot;, # Human-readable name for this transform; may be user or system generated.
524 &quot;originalTransformOrCollection&quot;: &quot;A String&quot;, # User name for the original user transform or collection with which this
525 # source is most closely associated.
526 },
527 ],
528 &quot;kind&quot;: &quot;A String&quot;, # Type of tranform this stage is executing.
529 &quot;outputSource&quot;: [ # Output sources for this stage.
530 { # Description of an input or output of an execution stage.
531 &quot;originalTransformOrCollection&quot;: &quot;A String&quot;, # User name for the original user transform or collection with which this
532 # source is most closely associated.
533 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this source.
534 &quot;sizeBytes&quot;: &quot;A String&quot;, # Size of the source, if measurable.
535 &quot;userName&quot;: &quot;A String&quot;, # Human-readable name for this source; may be user or system generated.
536 },
537 ],
538 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this stage.
539 &quot;inputSource&quot;: [ # Input sources for this stage.
540 { # Description of an input or output of an execution stage.
541 &quot;originalTransformOrCollection&quot;: &quot;A String&quot;, # User name for the original user transform or collection with which this
542 # source is most closely associated.
543 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this source.
544 &quot;sizeBytes&quot;: &quot;A String&quot;, # Size of the source, if measurable.
545 &quot;userName&quot;: &quot;A String&quot;, # Human-readable name for this source; may be user or system generated.
546 },
547 ],
548 },
549 ],
550 &quot;originalPipelineTransform&quot;: [ # Description of each transform in the pipeline and collections between them.
551 { # Description of the type, names/ids, and input/outputs for a transform.
552 &quot;kind&quot;: &quot;A String&quot;, # Type of transform.
553 &quot;inputCollectionName&quot;: [ # User names for all collection inputs to this transform.
554 &quot;A String&quot;,
555 ],
556 &quot;name&quot;: &quot;A String&quot;, # User provided name for this transform instance.
557 &quot;id&quot;: &quot;A String&quot;, # SDK generated id of this transform instance.
558 &quot;displayData&quot;: [ # Transform-specific display data.
559 { # Data provided with a pipeline or transform to provide descriptive info.
560 &quot;timestampValue&quot;: &quot;A String&quot;, # Contains value if the data is of timestamp type.
561 &quot;boolValue&quot;: True or False, # Contains value if the data is of a boolean type.
562 &quot;javaClassValue&quot;: &quot;A String&quot;, # Contains value if the data is of java class type.
563 &quot;strValue&quot;: &quot;A String&quot;, # Contains value if the data is of string type.
564 &quot;int64Value&quot;: &quot;A String&quot;, # Contains value if the data is of int64 type.
565 &quot;durationValue&quot;: &quot;A String&quot;, # Contains value if the data is of duration type.
566 &quot;namespace&quot;: &quot;A String&quot;, # The namespace for the key. This is usually a class name or programming
567 # language namespace (i.e. python module) which defines the display data.
568 # This allows a dax monitoring system to specially handle the data
569 # and perform custom rendering.
570 &quot;floatValue&quot;: 3.14, # Contains value if the data is of float type.
571 &quot;key&quot;: &quot;A String&quot;, # The key identifying the display data.
572 # This is intended to be used as a label for the display data
573 # when viewed in a dax monitoring system.
574 &quot;shortStrValue&quot;: &quot;A String&quot;, # A possible additional shorter value to display.
575 # For example a java_class_name_value of com.mypackage.MyDoFn
576 # will be stored with MyDoFn as the short_str_value and
577 # com.mypackage.MyDoFn as the java_class_name value.
578 # short_str_value can be displayed and java_class_name_value
579 # will be displayed as a tooltip.
580 &quot;url&quot;: &quot;A String&quot;, # An optional full URL.
581 &quot;label&quot;: &quot;A String&quot;, # An optional label to display in a dax UI for the element.
582 },
583 ],
584 &quot;outputCollectionName&quot;: [ # User names for all collection outputs to this transform.
585 &quot;A String&quot;,
586 ],
587 },
588 ],
589 &quot;displayData&quot;: [ # Pipeline level display data.
590 { # Data provided with a pipeline or transform to provide descriptive info.
591 &quot;timestampValue&quot;: &quot;A String&quot;, # Contains value if the data is of timestamp type.
592 &quot;boolValue&quot;: True or False, # Contains value if the data is of a boolean type.
593 &quot;javaClassValue&quot;: &quot;A String&quot;, # Contains value if the data is of java class type.
594 &quot;strValue&quot;: &quot;A String&quot;, # Contains value if the data is of string type.
595 &quot;int64Value&quot;: &quot;A String&quot;, # Contains value if the data is of int64 type.
596 &quot;durationValue&quot;: &quot;A String&quot;, # Contains value if the data is of duration type.
597 &quot;namespace&quot;: &quot;A String&quot;, # The namespace for the key. This is usually a class name or programming
598 # language namespace (i.e. python module) which defines the display data.
599 # This allows a dax monitoring system to specially handle the data
600 # and perform custom rendering.
601 &quot;floatValue&quot;: 3.14, # Contains value if the data is of float type.
602 &quot;key&quot;: &quot;A String&quot;, # The key identifying the display data.
603 # This is intended to be used as a label for the display data
604 # when viewed in a dax monitoring system.
605 &quot;shortStrValue&quot;: &quot;A String&quot;, # A possible additional shorter value to display.
606 # For example a java_class_name_value of com.mypackage.MyDoFn
607 # will be stored with MyDoFn as the short_str_value and
608 # com.mypackage.MyDoFn as the java_class_name value.
609 # short_str_value can be displayed and java_class_name_value
610 # will be displayed as a tooltip.
611 &quot;url&quot;: &quot;A String&quot;, # An optional full URL.
612 &quot;label&quot;: &quot;A String&quot;, # An optional label to display in a dax UI for the element.
613 },
614 ],
615 },
616 &quot;replaceJobId&quot;: &quot;A String&quot;, # If this job is an update of an existing job, this field is the job ID
617 # of the job it replaced.
618 #
619 # When sending a `CreateJobRequest`, you can update a job by specifying it
620 # here. The job named here is stopped, and its intermediate state is
621 # transferred to this job.
622 &quot;tempFiles&quot;: [ # A set of files the system should be aware of that are used
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700623 # for temporary storage. These temporary files will be
624 # removed on job completion.
625 # No duplicates are allowed.
626 # No file patterns are supported.
627 #
628 # The supported files are:
629 #
630 # Google Cloud Storage:
631 #
632 # storage.googleapis.com/{bucket}/{object}
633 # bucket.storage.googleapis.com/{object}
Bu Sun Kim65020912020-05-20 12:08:20 -0700634 &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700635 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700636 &quot;name&quot;: &quot;A String&quot;, # The user-specified Cloud Dataflow job name.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700637 #
638 # Only one Job with a given name may exist in a project at any
639 # given time. If a caller attempts to create a Job with the same
640 # name as an already-existing Job, the attempt returns the
641 # existing Job.
642 #
643 # The name must match the regular expression
644 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
Bu Sun Kim65020912020-05-20 12:08:20 -0700645 &quot;steps&quot;: [ # Exactly one of step or steps_location should be specified.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700646 #
647 # The top-level steps that constitute the entire job.
648 { # Defines a particular step within a Cloud Dataflow job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400649 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700650 # A job consists of multiple steps, each of which performs some
651 # specific operation as part of the overall job. Data is typically
652 # passed from one step to another as part of the job.
653 #
Bu Sun Kim65020912020-05-20 12:08:20 -0700654 # Here&#x27;s an example of a sequence of steps which together implement a
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700655 # Map-Reduce job:
656 #
657 # * Read a collection of data from some source, parsing the
Bu Sun Kim65020912020-05-20 12:08:20 -0700658 # collection&#x27;s elements.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700659 #
660 # * Validate the elements.
661 #
662 # * Apply a user-defined function to map each element to some value
663 # and extract an element-specific key value.
664 #
665 # * Group elements with the same key into a single element with
666 # that key, transforming a multiply-keyed collection into a
667 # uniquely-keyed collection.
668 #
669 # * Write the elements out to some data sink.
670 #
671 # Note that the Cloud Dataflow service may be used to run many different
672 # types of jobs, not just Map-Reduce.
Bu Sun Kim65020912020-05-20 12:08:20 -0700673 &quot;name&quot;: &quot;A String&quot;, # The name that identifies the step. This must be unique for each
Dan O'Mearadd494642020-05-01 07:42:23 -0700674 # step with respect to all other steps in the Cloud Dataflow job.
Bu Sun Kim65020912020-05-20 12:08:20 -0700675 &quot;kind&quot;: &quot;A String&quot;, # The kind of step in the Cloud Dataflow job.
676 &quot;properties&quot;: { # Named properties associated with the step. Each kind of
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700677 # predefined step has its own required set of properties.
678 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Bu Sun Kim65020912020-05-20 12:08:20 -0700679 &quot;a_key&quot;: &quot;&quot;, # Properties of the object.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700680 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700681 },
682 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700683 &quot;replacedByJobId&quot;: &quot;A String&quot;, # If another job is an update of this job (and thus, this job is in
684 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
685 &quot;executionInfo&quot;: { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
686 # isn&#x27;t contained in the submitted job.
687 &quot;stages&quot;: { # A mapping from each stage to the information about that stage.
688 &quot;a_key&quot;: { # Contains information about how a particular
689 # google.dataflow.v1beta3.Step will be executed.
690 &quot;stepName&quot;: [ # The steps associated with the execution stage.
691 # Note that stages may have several steps, and that a given step
692 # might be run by more than one stage.
693 &quot;A String&quot;,
694 ],
695 },
696 },
697 },
698 &quot;currentState&quot;: &quot;A String&quot;, # The current state of the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700699 #
700 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
701 # specified.
702 #
703 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
704 # terminal state. After a job has reached a terminal state, no
705 # further state updates may be made.
706 #
707 # This field may be mutated by the Cloud Dataflow service;
708 # callers cannot mutate it.
Bu Sun Kim65020912020-05-20 12:08:20 -0700709 &quot;location&quot;: &quot;A String&quot;, # The [regional endpoint]
710 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
711 # contains this job.
712 &quot;startTime&quot;: &quot;A String&quot;, # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
713 # Flexible resource scheduling jobs are started with some delay after job
714 # creation, so start_time is unset before start and is updated when the
715 # job is started by the Cloud Dataflow service. For other jobs, start_time
716 # always equals to create_time and is immutable and set by the Cloud Dataflow
717 # service.
718 &quot;stepsLocation&quot;: &quot;A String&quot;, # The GCS location where the steps are stored.
719 &quot;labels&quot;: { # User-defined labels for this job.
720 #
721 # The labels map can contain no more than 64 entries. Entries of the labels
722 # map are UTF8 strings that comply with the following restrictions:
723 #
724 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
725 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
726 # * Both keys and values are additionally constrained to be &lt;= 128 bytes in
727 # size.
728 &quot;a_key&quot;: &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700729 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700730 &quot;createTime&quot;: &quot;A String&quot;, # The timestamp when the job was initially created. Immutable and set by the
731 # Cloud Dataflow service.
732 &quot;requestedState&quot;: &quot;A String&quot;, # The job&#x27;s requested state.
733 #
734 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
735 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
736 # also be used to directly set a job&#x27;s requested state to
737 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
738 # job if it has not already reached a terminal state.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700739 }</pre>
740</div>
741
742<div class="method">
Bu Sun Kim65020912020-05-20 12:08:20 -0700743 <code class="details" id="get">get(projectId, view=None, gcsPath=None, location=None, x__xgafv=None)</code>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700744 <pre>Get the template associated with a template.
745
746Args:
747 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
Bu Sun Kim65020912020-05-20 12:08:20 -0700748 view: string, The view to retrieve. Defaults to METADATA_ONLY.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700749 gcsPath: string, Required. A Cloud Storage path to the template from which to
750create the job.
Bu Sun Kim65020912020-05-20 12:08:20 -0700751Must be valid Cloud Storage URL, beginning with &#x27;gs://&#x27;.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700752 location: string, The [regional endpoint]
753(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
754which to direct the request.
755 x__xgafv: string, V1 error format.
756 Allowed values
757 1 - v1 error format
758 2 - v2 error format
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700759
760Returns:
761 An object of the form:
762
763 { # The response to a GetTemplate request.
Bu Sun Kim65020912020-05-20 12:08:20 -0700764 &quot;status&quot;: { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700765 # request will be indicated in the error_details.
766 # different programming environments, including REST APIs and RPC APIs. It is
Dan O'Mearadd494642020-05-01 07:42:23 -0700767 # used by [gRPC](https://github.com/grpc). Each `Status` message contains
768 # three pieces of data: error code, error message, and error details.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700769 #
Dan O'Mearadd494642020-05-01 07:42:23 -0700770 # You can find out more about this error model and how to work with it in the
771 # [API Design Guide](https://cloud.google.com/apis/design/errors).
Bu Sun Kim65020912020-05-20 12:08:20 -0700772 &quot;message&quot;: &quot;A String&quot;, # A developer-facing error message, which should be in English. Any
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700773 # user-facing error message should be localized and sent in the
774 # google.rpc.Status.details field, or localized by the client.
Bu Sun Kim65020912020-05-20 12:08:20 -0700775 &quot;details&quot;: [ # A list of messages that carry the error details. There is a common set of
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700776 # message types for APIs to use.
777 {
Bu Sun Kim65020912020-05-20 12:08:20 -0700778 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700779 },
780 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700781 &quot;code&quot;: 42, # The status code, which should be an enum value of google.rpc.Code.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700782 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700783 &quot;templateType&quot;: &quot;A String&quot;, # Template Type.
784 &quot;metadata&quot;: { # Metadata describing a template. # The template metadata describing the template name, available
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700785 # parameters, etc.
Bu Sun Kim65020912020-05-20 12:08:20 -0700786 &quot;name&quot;: &quot;A String&quot;, # Required. The name of the template.
787 &quot;parameters&quot;: [ # The parameters for the template.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700788 { # Metadata for a specific parameter.
Bu Sun Kim65020912020-05-20 12:08:20 -0700789 &quot;label&quot;: &quot;A String&quot;, # Required. The label to display for the parameter.
790 &quot;paramType&quot;: &quot;A String&quot;, # Optional. The type of the parameter.
Dan O'Mearadd494642020-05-01 07:42:23 -0700791 # Used for selecting input picker.
Bu Sun Kim65020912020-05-20 12:08:20 -0700792 &quot;helpText&quot;: &quot;A String&quot;, # Required. The help text to display for the parameter.
793 &quot;name&quot;: &quot;A String&quot;, # Required. The name of the parameter.
794 &quot;regexes&quot;: [ # Optional. Regexes that the parameter must match.
795 &quot;A String&quot;,
796 ],
797 &quot;isOptional&quot;: True or False, # Optional. Whether the parameter is optional. Defaults to false.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700798 },
799 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700800 &quot;description&quot;: &quot;A String&quot;, # Optional. A description of the template.
801 },
802 &quot;runtimeMetadata&quot;: { # RuntimeMetadata describing a runtime environment. # Describes the runtime metadata with SDKInfo and available parameters.
803 &quot;sdkInfo&quot;: { # SDK Information. # SDK Info for the template.
804 &quot;language&quot;: &quot;A String&quot;, # Required. The SDK Language.
805 &quot;version&quot;: &quot;A String&quot;, # Optional. The SDK version.
806 },
807 &quot;parameters&quot;: [ # The parameters for the template.
808 { # Metadata for a specific parameter.
809 &quot;label&quot;: &quot;A String&quot;, # Required. The label to display for the parameter.
810 &quot;paramType&quot;: &quot;A String&quot;, # Optional. The type of the parameter.
811 # Used for selecting input picker.
812 &quot;helpText&quot;: &quot;A String&quot;, # Required. The help text to display for the parameter.
813 &quot;name&quot;: &quot;A String&quot;, # Required. The name of the parameter.
814 &quot;regexes&quot;: [ # Optional. Regexes that the parameter must match.
815 &quot;A String&quot;,
816 ],
817 &quot;isOptional&quot;: True or False, # Optional. Whether the parameter is optional. Defaults to false.
818 },
819 ],
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700820 },
821 }</pre>
822</div>
823
824<div class="method">
Bu Sun Kim65020912020-05-20 12:08:20 -0700825 <code class="details" id="launch">launch(projectId, body=None, validateOnly=None, gcsPath=None, location=None, dynamicTemplate_gcsPath=None, dynamicTemplate_stagingLocation=None, x__xgafv=None)</code>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700826 <pre>Launch a template.
827
828Args:
829 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
Dan O'Mearadd494642020-05-01 07:42:23 -0700830 body: object, The request body.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700831 The object takes the form of:
832
833{ # Parameters to provide to the template being launched.
Bu Sun Kim65020912020-05-20 12:08:20 -0700834 &quot;transformNameMapping&quot;: { # Only applicable when updating a pipeline. Map of transform name prefixes of
835 # the job to be replaced to the corresponding name prefixes of the new job.
836 &quot;a_key&quot;: &quot;A String&quot;,
837 },
838 &quot;environment&quot;: { # The environment values to set at runtime. # The runtime environment for the job.
839 &quot;workerRegion&quot;: &quot;A String&quot;, # The Compute Engine region
Dan O'Mearadd494642020-05-01 07:42:23 -0700840 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
Bu Sun Kim65020912020-05-20 12:08:20 -0700841 # which worker processing should occur, e.g. &quot;us-west1&quot;. Mutually exclusive
Dan O'Mearadd494642020-05-01 07:42:23 -0700842 # with worker_zone. If neither worker_region nor worker_zone is specified,
Bu Sun Kim65020912020-05-20 12:08:20 -0700843 # default to the control plane&#x27;s region.
844 &quot;numWorkers&quot;: 42, # The initial number of Google Compute Engine instnaces for the job.
845 &quot;zone&quot;: &quot;A String&quot;, # The Compute Engine [availability
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700846 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
847 # for launching worker instances to run your pipeline.
Dan O'Mearadd494642020-05-01 07:42:23 -0700848 # In the future, worker_zone will take precedence.
Bu Sun Kim65020912020-05-20 12:08:20 -0700849 &quot;workerZone&quot;: &quot;A String&quot;, # The Compute Engine zone
850 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
851 # which worker processing should occur, e.g. &quot;us-west1-a&quot;. Mutually exclusive
852 # with worker_region. If neither worker_region nor worker_zone is specified,
853 # a zone in the control plane&#x27;s region is chosen based on available capacity.
854 # If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.
855 &quot;additionalUserLabels&quot;: { # Additional user labels to be specified for the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700856 # Keys and values should follow the restrictions specified in the [labeling
857 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
858 # page.
Bu Sun Kim65020912020-05-20 12:08:20 -0700859 &quot;a_key&quot;: &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700860 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700861 &quot;additionalExperiments&quot;: [ # Additional experiment flags for the job.
862 &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700863 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700864 &quot;maxWorkers&quot;: 42, # The maximum number of Google Compute Engine instances to be made
865 # available to your pipeline during execution, from 1 to 1000.
866 &quot;serviceAccountEmail&quot;: &quot;A String&quot;, # The email address of the service account to run the job as.
867 &quot;machineType&quot;: &quot;A String&quot;, # The machine type to use for the job. Defaults to the value from the
868 # template if not specified.
869 &quot;subnetwork&quot;: &quot;A String&quot;, # Subnetwork to which VMs will be assigned, if desired. Expected to be of
870 # the form &quot;regions/REGION/subnetworks/SUBNETWORK&quot;.
871 &quot;ipConfiguration&quot;: &quot;A String&quot;, # Configuration for VM IPs.
872 &quot;kmsKeyName&quot;: &quot;A String&quot;, # Optional. Name for the Cloud KMS key for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700873 # Key format is:
874 # projects/&lt;project&gt;/locations/&lt;location&gt;/keyRings/&lt;keyring&gt;/cryptoKeys/&lt;key&gt;
Bu Sun Kim65020912020-05-20 12:08:20 -0700875 &quot;bypassTempDirValidation&quot;: True or False, # Whether to bypass the safety checks for the job&#x27;s temporary directory.
876 # Use with caution.
877 &quot;tempLocation&quot;: &quot;A String&quot;, # The Cloud Storage path to use for temporary files.
878 # Must be a valid Cloud Storage URL, beginning with `gs://`.
879 &quot;network&quot;: &quot;A String&quot;, # Network to which VMs will be assigned. If empty or unspecified,
880 # the service will use the network &quot;default&quot;.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700881 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700882 &quot;update&quot;: True or False, # If set, replace the existing pipeline with the name specified by jobName
Dan O'Mearadd494642020-05-01 07:42:23 -0700883 # with this pipeline, preserving state.
Bu Sun Kim65020912020-05-20 12:08:20 -0700884 &quot;parameters&quot;: { # The runtime parameters to pass to the job.
885 &quot;a_key&quot;: &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700886 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700887 &quot;jobName&quot;: &quot;A String&quot;, # Required. The job name to use for the created job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700888 }
889
Bu Sun Kim65020912020-05-20 12:08:20 -0700890 validateOnly: boolean, If true, the request is validated but not actually executed.
891Defaults to false.
892 gcsPath: string, A Cloud Storage path to the template from which to create
893the job.
894Must be valid Cloud Storage URL, beginning with &#x27;gs://&#x27;.
895 location: string, The [regional endpoint]
896(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
897which to direct the request.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700898 dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.
899The file must be a Json serialized DynamicTemplateFieSpec object.
Bu Sun Kim65020912020-05-20 12:08:20 -0700900 dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.
901Must be a valid Cloud Storage URL, beginning with `gs://`.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700902 x__xgafv: string, V1 error format.
903 Allowed values
904 1 - v1 error format
905 2 - v2 error format
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700906
907Returns:
908 An object of the form:
909
910 { # Response to the request to launch a template.
Bu Sun Kim65020912020-05-20 12:08:20 -0700911 &quot;job&quot;: { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700912 # the job was successfully launched.
Bu Sun Kim65020912020-05-20 12:08:20 -0700913 &quot;clientRequestId&quot;: &quot;A String&quot;, # The client&#x27;s unique identifier of the job, re-used across retried attempts.
914 # If this field is set, the service will ensure its uniqueness.
915 # The request to create a job will fail if the service has knowledge of a
916 # previously submitted job with the same client&#x27;s ID and job name.
917 # The caller may use this field to ensure idempotence of job
918 # creation across retried attempts to create a job.
919 # By default, the field is empty and, in that case, the service ignores it.
920 &quot;id&quot;: &quot;A String&quot;, # The unique ID of this job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400921 #
922 # This field is set by the Cloud Dataflow service when the Job is
923 # created, and is immutable for the life of the job.
Bu Sun Kim65020912020-05-20 12:08:20 -0700924 &quot;currentStateTime&quot;: &quot;A String&quot;, # The timestamp associated with the current state.
925 &quot;transformNameMapping&quot;: { # The map of transform name prefixes of the job to be replaced to the
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700926 # corresponding name prefixes of the new job.
Bu Sun Kim65020912020-05-20 12:08:20 -0700927 &quot;a_key&quot;: &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700928 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700929 &quot;environment&quot;: { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
930 &quot;sdkPipelineOptions&quot;: { # The Cloud Dataflow SDK pipeline options specified by the user. These
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400931 # options are passed through the service and are used to recreate the
932 # SDK pipeline options on the worker in a language agnostic and platform
933 # independent way.
Bu Sun Kim65020912020-05-20 12:08:20 -0700934 &quot;a_key&quot;: &quot;&quot;, # Properties of the object.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700935 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700936 &quot;flexResourceSchedulingGoal&quot;: &quot;A String&quot;, # Which Flexible Resource Scheduling mode to run in.
937 &quot;workerPools&quot;: [ # The worker pools. At least one &quot;harness&quot; worker pool must be
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400938 # specified in order for the job to have workers.
939 { # Describes one particular pool of Cloud Dataflow workers to be
940 # instantiated by the Cloud Dataflow service in order to perform the
941 # computations required by a job. Note that a workflow job may use
942 # multiple pools, in order to match the various computational
943 # requirements of the various stages of the job.
Bu Sun Kim65020912020-05-20 12:08:20 -0700944 &quot;defaultPackageSet&quot;: &quot;A String&quot;, # The default package set to install. This allows the service to
945 # select a default set of packages which are useful to worker
946 # harnesses written in a particular language.
947 &quot;network&quot;: &quot;A String&quot;, # Network to which VMs will be assigned. If empty or unspecified,
948 # the service will use the network &quot;default&quot;.
949 &quot;zone&quot;: &quot;A String&quot;, # Zone to run the worker pools in. If empty or unspecified, the service
Dan O'Mearadd494642020-05-01 07:42:23 -0700950 # will attempt to choose a reasonable default.
Bu Sun Kim65020912020-05-20 12:08:20 -0700951 &quot;numWorkers&quot;: 42, # Number of Google Compute Engine workers in this pool needed to
952 # execute the job. If zero or unspecified, the service will
953 # attempt to choose a reasonable default.
954 &quot;numThreadsPerWorker&quot;: 42, # The number of threads per worker harness. If empty or unspecified, the
Dan O'Mearadd494642020-05-01 07:42:23 -0700955 # service will choose a number of threads (according to the number of cores
956 # on the selected machine type for batch, or 1 by convention for streaming).
Bu Sun Kim65020912020-05-20 12:08:20 -0700957 &quot;diskSourceImage&quot;: &quot;A String&quot;, # Fully qualified source image for disks.
958 &quot;packages&quot;: [ # Packages to be installed on workers.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700959 { # The packages that must be installed in order for a worker to run the
960 # steps of the Cloud Dataflow job that will be assigned to its worker
961 # pool.
962 #
963 # This is the mechanism by which the Cloud Dataflow SDK causes code to
964 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
Bu Sun Kim65020912020-05-20 12:08:20 -0700965 # might use this to install jars containing the user&#x27;s code and all of the
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700966 # various dependencies (libraries, data files, etc.) required in order
967 # for that code to run.
Bu Sun Kim65020912020-05-20 12:08:20 -0700968 &quot;location&quot;: &quot;A String&quot;, # The resource to read the package from. The supported resource type is:
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700969 #
970 # Google Cloud Storage:
971 #
972 # storage.googleapis.com/{bucket}
973 # bucket.storage.googleapis.com/
Bu Sun Kim65020912020-05-20 12:08:20 -0700974 &quot;name&quot;: &quot;A String&quot;, # The name of the package.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700975 },
976 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700977 &quot;teardownPolicy&quot;: &quot;A String&quot;, # Sets the policy for determining when to turndown worker pool.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400978 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
979 # `TEARDOWN_NEVER`.
980 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
981 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
982 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
983 # down.
984 #
985 # If the workers are not torn down by the service, they will
986 # continue to run and use Google Compute Engine VM resources in the
Bu Sun Kim65020912020-05-20 12:08:20 -0700987 # user&#x27;s project until they are explicitly terminated by the user.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400988 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
989 # policy except for small, manually supervised test jobs.
990 #
991 # If unknown or unspecified, the service will attempt to choose a reasonable
992 # default.
Bu Sun Kim65020912020-05-20 12:08:20 -0700993 &quot;onHostMaintenance&quot;: &quot;A String&quot;, # The action to take on host maintenance, as defined by the Google
994 # Compute Engine API.
995 &quot;poolArgs&quot;: { # Extra arguments for this worker pool.
996 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
997 },
998 &quot;diskSizeGb&quot;: 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
Dan O'Mearadd494642020-05-01 07:42:23 -0700999 # attempt to choose a reasonable default.
Bu Sun Kim65020912020-05-20 12:08:20 -07001000 &quot;workerHarnessContainerImage&quot;: &quot;A String&quot;, # Required. Docker container image that executes the Cloud Dataflow worker
1001 # harness, residing in Google Container Registry.
1002 #
1003 # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
1004 &quot;diskType&quot;: &quot;A String&quot;, # Type of root disk for VMs. If empty or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001005 # attempt to choose a reasonable default.
Bu Sun Kim65020912020-05-20 12:08:20 -07001006 &quot;machineType&quot;: &quot;A String&quot;, # Machine type (e.g. &quot;n1-standard-1&quot;). If empty or unspecified, the
1007 # service will attempt to choose a reasonable default.
1008 &quot;kind&quot;: &quot;A String&quot;, # The kind of the worker pool; currently only `harness` and `shuffle`
1009 # are supported.
1010 &quot;dataDisks&quot;: [ # Data disks that are used by a VM in this workflow.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001011 { # Describes the data disk used by a workflow job.
Bu Sun Kim65020912020-05-20 12:08:20 -07001012 &quot;sizeGb&quot;: 42, # Size of disk in GB. If zero or unspecified, the service will
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001013 # attempt to choose a reasonable default.
Bu Sun Kim65020912020-05-20 12:08:20 -07001014 &quot;diskType&quot;: &quot;A String&quot;, # Disk storage type, as defined by Google Compute Engine. This
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001015 # must be a disk type appropriate to the project and zone in which
1016 # the workers will run. If unknown or unspecified, the service
1017 # will attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001018 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001019 # For example, the standard persistent disk type is a resource name
Bu Sun Kim65020912020-05-20 12:08:20 -07001020 # typically ending in &quot;pd-standard&quot;. If SSD persistent disks are
1021 # available, the resource name typically ends with &quot;pd-ssd&quot;. The
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001022 # actual valid values are defined the Google Compute Engine API,
1023 # not by the Cloud Dataflow API; consult the Google Compute Engine
1024 # documentation for more information about determining the set of
1025 # available disk types for a particular project and zone.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001026 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001027 # Google Compute Engine Disk types are local to a particular
1028 # project in a particular zone, and so the resource name will
1029 # typically look something like this:
1030 #
1031 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Bu Sun Kim65020912020-05-20 12:08:20 -07001032 &quot;mountPoint&quot;: &quot;A String&quot;, # Directory in a VM where disk is mounted.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001033 },
1034 ],
Bu Sun Kim65020912020-05-20 12:08:20 -07001035 &quot;sdkHarnessContainerImages&quot;: [ # Set of SDK harness containers needed to execute this pipeline. This will
Dan O'Mearadd494642020-05-01 07:42:23 -07001036 # only be set in the Fn API path. For non-cross-language pipelines this
1037 # should have only one entry. Cross-language pipelines will have two or more
1038 # entries.
1039 { # Defines a SDK harness container for executing Dataflow pipelines.
Bu Sun Kim65020912020-05-20 12:08:20 -07001040 &quot;containerImage&quot;: &quot;A String&quot;, # A docker container image that resides in Google Container Registry.
1041 &quot;useSingleCorePerContainer&quot;: True or False, # If true, recommends the Dataflow service to use only one core per SDK
Dan O'Mearadd494642020-05-01 07:42:23 -07001042 # container instance with this image. If false (or unset) recommends using
1043 # more than one core per SDK container instance with this image for
1044 # efficiency. Note that Dataflow service may choose to override this property
1045 # if needed.
1046 },
1047 ],
Bu Sun Kim65020912020-05-20 12:08:20 -07001048 &quot;subnetwork&quot;: &quot;A String&quot;, # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1049 # the form &quot;regions/REGION/subnetworks/SUBNETWORK&quot;.
1050 &quot;ipConfiguration&quot;: &quot;A String&quot;, # Configuration for VM IPs.
1051 &quot;taskrunnerSettings&quot;: { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1052 # using the standard Dataflow task runner. Users should ignore
1053 # this field.
1054 &quot;alsologtostderr&quot;: True or False, # Whether to also send taskrunner log info to stderr.
1055 &quot;taskGroup&quot;: &quot;A String&quot;, # The UNIX group ID on the worker VM to use for tasks launched by
1056 # taskrunner; e.g. &quot;wheel&quot;.
1057 &quot;harnessCommand&quot;: &quot;A String&quot;, # The command to launch the worker harness.
1058 &quot;logDir&quot;: &quot;A String&quot;, # The directory on the VM to store logs.
1059 &quot;oauthScopes&quot;: [ # The OAuth2 scopes to be requested by the taskrunner in order to
1060 # access the Cloud Dataflow API.
1061 &quot;A String&quot;,
1062 ],
1063 &quot;dataflowApiVersion&quot;: &quot;A String&quot;, # The API version of endpoint, e.g. &quot;v1b3&quot;
1064 &quot;logUploadLocation&quot;: &quot;A String&quot;, # Indicates where to put logs. If this is not specified, the logs
1065 # will not be uploaded.
1066 #
1067 # The supported resource type is:
1068 #
1069 # Google Cloud Storage:
1070 # storage.googleapis.com/{bucket}/{object}
1071 # bucket.storage.googleapis.com/{object}
1072 &quot;streamingWorkerMainClass&quot;: &quot;A String&quot;, # The streaming worker main class name.
1073 &quot;workflowFileName&quot;: &quot;A String&quot;, # The file to store the workflow in.
1074 &quot;baseTaskDir&quot;: &quot;A String&quot;, # The location on the worker for task-specific subdirectories.
1075 &quot;tempStoragePrefix&quot;: &quot;A String&quot;, # The prefix of the resources the taskrunner should use for
1076 # temporary storage.
1077 #
1078 # The supported resource type is:
1079 #
1080 # Google Cloud Storage:
1081 # storage.googleapis.com/{bucket}/{object}
1082 # bucket.storage.googleapis.com/{object}
1083 &quot;commandlinesFileName&quot;: &quot;A String&quot;, # The file to store preprocessing commands in.
1084 &quot;languageHint&quot;: &quot;A String&quot;, # The suggested backend language.
1085 &quot;baseUrl&quot;: &quot;A String&quot;, # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1086 #
1087 # When workers access Google Cloud APIs, they logically do so via
1088 # relative URLs. If this field is specified, it supplies the base
1089 # URL to use for resolving these relative URLs. The normative
1090 # algorithm used is defined by RFC 1808, &quot;Relative Uniform Resource
1091 # Locators&quot;.
1092 #
1093 # If not specified, the default value is &quot;http://www.googleapis.com/&quot;
1094 &quot;logToSerialconsole&quot;: True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1095 # console.
1096 &quot;continueOnException&quot;: True or False, # Whether to continue taskrunner if an exception is hit.
1097 &quot;parallelWorkerSettings&quot;: { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1098 &quot;baseUrl&quot;: &quot;A String&quot;, # The base URL for accessing Google Cloud APIs.
1099 #
1100 # When workers access Google Cloud APIs, they logically do so via
1101 # relative URLs. If this field is specified, it supplies the base
1102 # URL to use for resolving these relative URLs. The normative
1103 # algorithm used is defined by RFC 1808, &quot;Relative Uniform Resource
1104 # Locators&quot;.
1105 #
1106 # If not specified, the default value is &quot;http://www.googleapis.com/&quot;
1107 &quot;reportingEnabled&quot;: True or False, # Whether to send work progress updates to the service.
1108 &quot;servicePath&quot;: &quot;A String&quot;, # The Cloud Dataflow service path relative to the root URL, for example,
1109 # &quot;dataflow/v1b3/projects&quot;.
1110 &quot;shuffleServicePath&quot;: &quot;A String&quot;, # The Shuffle service path relative to the root URL, for example,
1111 # &quot;shuffle/v1beta1&quot;.
1112 &quot;workerId&quot;: &quot;A String&quot;, # The ID of the worker running this pipeline.
1113 &quot;tempStoragePrefix&quot;: &quot;A String&quot;, # The prefix of the resources the system should use for temporary
1114 # storage.
1115 #
1116 # The supported resource type is:
1117 #
1118 # Google Cloud Storage:
1119 #
1120 # storage.googleapis.com/{bucket}/{object}
1121 # bucket.storage.googleapis.com/{object}
1122 },
1123 &quot;vmId&quot;: &quot;A String&quot;, # The ID string of the VM.
1124 &quot;taskUser&quot;: &quot;A String&quot;, # The UNIX user ID on the worker VM to use for tasks launched by
1125 # taskrunner; e.g. &quot;root&quot;.
1126 },
1127 &quot;autoscalingSettings&quot;: { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1128 &quot;maxNumWorkers&quot;: 42, # The maximum number of workers to cap scaling at.
1129 &quot;algorithm&quot;: &quot;A String&quot;, # The algorithm to use for autoscaling.
1130 },
1131 &quot;metadata&quot;: { # Metadata to set on the Google Compute Engine VMs.
1132 &quot;a_key&quot;: &quot;A String&quot;,
1133 },
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001134 },
1135 ],
Bu Sun Kim65020912020-05-20 12:08:20 -07001136 &quot;dataset&quot;: &quot;A String&quot;, # The dataset for the current project where various workflow
1137 # related tables are stored.
1138 #
1139 # The supported resource type is:
1140 #
1141 # Google BigQuery:
1142 # bigquery.googleapis.com/{dataset}
1143 &quot;internalExperiments&quot;: { # Experimental settings.
1144 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
1145 },
1146 &quot;workerRegion&quot;: &quot;A String&quot;, # The Compute Engine region
1147 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
1148 # which worker processing should occur, e.g. &quot;us-west1&quot;. Mutually exclusive
1149 # with worker_zone. If neither worker_region nor worker_zone is specified,
1150 # default to the control plane&#x27;s region.
1151 &quot;serviceKmsKeyName&quot;: &quot;A String&quot;, # If set, contains the Cloud KMS key identifier used to encrypt data
1152 # at rest, AKA a Customer Managed Encryption Key (CMEK).
1153 #
1154 # Format:
1155 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
1156 &quot;userAgent&quot;: { # A description of the process that generated the request.
1157 &quot;a_key&quot;: &quot;&quot;, # Properties of the object.
1158 },
1159 &quot;workerZone&quot;: &quot;A String&quot;, # The Compute Engine zone
1160 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
1161 # which worker processing should occur, e.g. &quot;us-west1-a&quot;. Mutually exclusive
1162 # with worker_region. If neither worker_region nor worker_zone is specified,
1163 # a zone in the control plane&#x27;s region is chosen based on available capacity.
1164 &quot;clusterManagerApiService&quot;: &quot;A String&quot;, # The type of cluster manager API to use. If unknown or
Dan O'Mearadd494642020-05-01 07:42:23 -07001165 # unspecified, the service will attempt to choose a reasonable
1166 # default. This should be in the form of the API service name,
Bu Sun Kim65020912020-05-20 12:08:20 -07001167 # e.g. &quot;compute.googleapis.com&quot;.
1168 &quot;tempStoragePrefix&quot;: &quot;A String&quot;, # The prefix of the resources the system should use for temporary
1169 # storage. The system will append the suffix &quot;/temp-{JOBNAME} to
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001170 # this resource prefix, where {JOBNAME} is the value of the
1171 # job_name field. The resulting bucket and object prefix is used
1172 # as the prefix of the resources used to store temporary data
1173 # needed during the job execution. NOTE: This will override the
1174 # value in taskrunner_settings.
1175 # The supported resource type is:
1176 #
1177 # Google Cloud Storage:
1178 #
1179 # storage.googleapis.com/{bucket}/{object}
1180 # bucket.storage.googleapis.com/{object}
Bu Sun Kim65020912020-05-20 12:08:20 -07001181 &quot;experiments&quot;: [ # The list of experiments to enable.
1182 &quot;A String&quot;,
1183 ],
1184 &quot;version&quot;: { # A structure describing which components and their versions of the service
1185 # are required in order to run the job.
1186 &quot;a_key&quot;: &quot;&quot;, # Properties of the object.
1187 },
1188 &quot;serviceAccountEmail&quot;: &quot;A String&quot;, # Identity to run virtual machines as. Defaults to the default account.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001189 },
Bu Sun Kim65020912020-05-20 12:08:20 -07001190 &quot;stageStates&quot;: [ # This field may be mutated by the Cloud Dataflow service;
1191 # callers cannot mutate it.
1192 { # A message describing the state of a particular execution stage.
1193 &quot;executionStageName&quot;: &quot;A String&quot;, # The name of the execution stage.
1194 &quot;currentStateTime&quot;: &quot;A String&quot;, # The time at which the stage transitioned to this state.
1195 &quot;executionStageState&quot;: &quot;A String&quot;, # Executions stage states allow the same set of values as JobState.
1196 },
1197 ],
1198 &quot;jobMetadata&quot;: { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
1199 # by the metadata values provided here. Populated for ListJobs and all GetJob
1200 # views SUMMARY and higher.
1201 # ListJob response and Job SUMMARY view.
1202 &quot;bigTableDetails&quot;: [ # Identification of a BigTable source used in the Dataflow job.
1203 { # Metadata for a BigTable connector used by the job.
1204 &quot;tableId&quot;: &quot;A String&quot;, # TableId accessed in the connection.
1205 &quot;projectId&quot;: &quot;A String&quot;, # ProjectId accessed in the connection.
1206 &quot;instanceId&quot;: &quot;A String&quot;, # InstanceId accessed in the connection.
1207 },
1208 ],
1209 &quot;spannerDetails&quot;: [ # Identification of a Spanner source used in the Dataflow job.
1210 { # Metadata for a Spanner connector used by the job.
1211 &quot;databaseId&quot;: &quot;A String&quot;, # DatabaseId accessed in the connection.
1212 &quot;instanceId&quot;: &quot;A String&quot;, # InstanceId accessed in the connection.
1213 &quot;projectId&quot;: &quot;A String&quot;, # ProjectId accessed in the connection.
1214 },
1215 ],
1216 &quot;datastoreDetails&quot;: [ # Identification of a Datastore source used in the Dataflow job.
1217 { # Metadata for a Datastore connector used by the job.
1218 &quot;projectId&quot;: &quot;A String&quot;, # ProjectId accessed in the connection.
1219 &quot;namespace&quot;: &quot;A String&quot;, # Namespace used in the connection.
1220 },
1221 ],
1222 &quot;sdkVersion&quot;: { # The version of the SDK used to run the job. # The SDK version used to run the job.
1223 &quot;versionDisplayName&quot;: &quot;A String&quot;, # A readable string describing the version of the SDK.
1224 &quot;sdkSupportStatus&quot;: &quot;A String&quot;, # The support status for this SDK version.
1225 &quot;version&quot;: &quot;A String&quot;, # The version of the SDK used to run the job.
1226 },
1227 &quot;bigqueryDetails&quot;: [ # Identification of a BigQuery source used in the Dataflow job.
1228 { # Metadata for a BigQuery connector used by the job.
1229 &quot;table&quot;: &quot;A String&quot;, # Table accessed in the connection.
1230 &quot;dataset&quot;: &quot;A String&quot;, # Dataset accessed in the connection.
1231 &quot;projectId&quot;: &quot;A String&quot;, # Project accessed in the connection.
1232 &quot;query&quot;: &quot;A String&quot;, # Query used to access data in the connection.
1233 },
1234 ],
1235 &quot;fileDetails&quot;: [ # Identification of a File source used in the Dataflow job.
1236 { # Metadata for a File connector used by the job.
1237 &quot;filePattern&quot;: &quot;A String&quot;, # File Pattern used to access files by the connector.
1238 },
1239 ],
1240 &quot;pubsubDetails&quot;: [ # Identification of a PubSub source used in the Dataflow job.
1241 { # Metadata for a PubSub connector used by the job.
1242 &quot;subscription&quot;: &quot;A String&quot;, # Subscription used in the connection.
1243 &quot;topic&quot;: &quot;A String&quot;, # Topic accessed in the connection.
1244 },
1245 ],
1246 },
1247 &quot;createdFromSnapshotId&quot;: &quot;A String&quot;, # If this is specified, the job&#x27;s initial state is populated from the given
1248 # snapshot.
1249 &quot;projectId&quot;: &quot;A String&quot;, # The ID of the Cloud Platform project that the job belongs to.
1250 &quot;type&quot;: &quot;A String&quot;, # The type of Cloud Dataflow job.
1251 &quot;pipelineDescription&quot;: { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1252 # A description of the user pipeline and stages through which it is executed.
1253 # Created by Cloud Dataflow service. Only retrieved with
1254 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1255 # form. This data is provided by the Dataflow service for ease of visualizing
1256 # the pipeline and interpreting Dataflow provided metrics.
1257 &quot;executionPipelineStage&quot;: [ # Description of each stage of execution of the pipeline.
1258 { # Description of the composing transforms, names/ids, and input/outputs of a
1259 # stage of execution. Some composing transforms and sources may have been
1260 # generated by the Dataflow service during execution planning.
1261 &quot;id&quot;: &quot;A String&quot;, # Dataflow service generated id for this stage.
1262 &quot;componentTransform&quot;: [ # Transforms that comprise this execution stage.
1263 { # Description of a transform executed as part of an execution stage.
1264 &quot;originalTransform&quot;: &quot;A String&quot;, # User name for the original user transform with which this transform is
1265 # most closely associated.
1266 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this source.
1267 &quot;userName&quot;: &quot;A String&quot;, # Human-readable name for this transform; may be user or system generated.
1268 },
1269 ],
1270 &quot;componentSource&quot;: [ # Collections produced and consumed by component transforms of this stage.
1271 { # Description of an interstitial value between transforms in an execution
1272 # stage.
1273 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this source.
1274 &quot;userName&quot;: &quot;A String&quot;, # Human-readable name for this transform; may be user or system generated.
1275 &quot;originalTransformOrCollection&quot;: &quot;A String&quot;, # User name for the original user transform or collection with which this
1276 # source is most closely associated.
1277 },
1278 ],
1279 &quot;kind&quot;: &quot;A String&quot;, # Type of tranform this stage is executing.
1280 &quot;outputSource&quot;: [ # Output sources for this stage.
1281 { # Description of an input or output of an execution stage.
1282 &quot;originalTransformOrCollection&quot;: &quot;A String&quot;, # User name for the original user transform or collection with which this
1283 # source is most closely associated.
1284 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this source.
1285 &quot;sizeBytes&quot;: &quot;A String&quot;, # Size of the source, if measurable.
1286 &quot;userName&quot;: &quot;A String&quot;, # Human-readable name for this source; may be user or system generated.
1287 },
1288 ],
1289 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this stage.
1290 &quot;inputSource&quot;: [ # Input sources for this stage.
1291 { # Description of an input or output of an execution stage.
1292 &quot;originalTransformOrCollection&quot;: &quot;A String&quot;, # User name for the original user transform or collection with which this
1293 # source is most closely associated.
1294 &quot;name&quot;: &quot;A String&quot;, # Dataflow service generated name for this source.
1295 &quot;sizeBytes&quot;: &quot;A String&quot;, # Size of the source, if measurable.
1296 &quot;userName&quot;: &quot;A String&quot;, # Human-readable name for this source; may be user or system generated.
1297 },
1298 ],
1299 },
1300 ],
1301 &quot;originalPipelineTransform&quot;: [ # Description of each transform in the pipeline and collections between them.
1302 { # Description of the type, names/ids, and input/outputs for a transform.
1303 &quot;kind&quot;: &quot;A String&quot;, # Type of transform.
1304 &quot;inputCollectionName&quot;: [ # User names for all collection inputs to this transform.
1305 &quot;A String&quot;,
1306 ],
1307 &quot;name&quot;: &quot;A String&quot;, # User provided name for this transform instance.
1308 &quot;id&quot;: &quot;A String&quot;, # SDK generated id of this transform instance.
1309 &quot;displayData&quot;: [ # Transform-specific display data.
1310 { # Data provided with a pipeline or transform to provide descriptive info.
1311 &quot;timestampValue&quot;: &quot;A String&quot;, # Contains value if the data is of timestamp type.
1312 &quot;boolValue&quot;: True or False, # Contains value if the data is of a boolean type.
1313 &quot;javaClassValue&quot;: &quot;A String&quot;, # Contains value if the data is of java class type.
1314 &quot;strValue&quot;: &quot;A String&quot;, # Contains value if the data is of string type.
1315 &quot;int64Value&quot;: &quot;A String&quot;, # Contains value if the data is of int64 type.
1316 &quot;durationValue&quot;: &quot;A String&quot;, # Contains value if the data is of duration type.
1317 &quot;namespace&quot;: &quot;A String&quot;, # The namespace for the key. This is usually a class name or programming
1318 # language namespace (i.e. python module) which defines the display data.
1319 # This allows a dax monitoring system to specially handle the data
1320 # and perform custom rendering.
1321 &quot;floatValue&quot;: 3.14, # Contains value if the data is of float type.
1322 &quot;key&quot;: &quot;A String&quot;, # The key identifying the display data.
1323 # This is intended to be used as a label for the display data
1324 # when viewed in a dax monitoring system.
1325 &quot;shortStrValue&quot;: &quot;A String&quot;, # A possible additional shorter value to display.
1326 # For example a java_class_name_value of com.mypackage.MyDoFn
1327 # will be stored with MyDoFn as the short_str_value and
1328 # com.mypackage.MyDoFn as the java_class_name value.
1329 # short_str_value can be displayed and java_class_name_value
1330 # will be displayed as a tooltip.
1331 &quot;url&quot;: &quot;A String&quot;, # An optional full URL.
1332 &quot;label&quot;: &quot;A String&quot;, # An optional label to display in a dax UI for the element.
1333 },
1334 ],
1335 &quot;outputCollectionName&quot;: [ # User names for all collection outputs to this transform.
1336 &quot;A String&quot;,
1337 ],
1338 },
1339 ],
1340 &quot;displayData&quot;: [ # Pipeline level display data.
1341 { # Data provided with a pipeline or transform to provide descriptive info.
1342 &quot;timestampValue&quot;: &quot;A String&quot;, # Contains value if the data is of timestamp type.
1343 &quot;boolValue&quot;: True or False, # Contains value if the data is of a boolean type.
1344 &quot;javaClassValue&quot;: &quot;A String&quot;, # Contains value if the data is of java class type.
1345 &quot;strValue&quot;: &quot;A String&quot;, # Contains value if the data is of string type.
1346 &quot;int64Value&quot;: &quot;A String&quot;, # Contains value if the data is of int64 type.
1347 &quot;durationValue&quot;: &quot;A String&quot;, # Contains value if the data is of duration type.
1348 &quot;namespace&quot;: &quot;A String&quot;, # The namespace for the key. This is usually a class name or programming
1349 # language namespace (i.e. python module) which defines the display data.
1350 # This allows a dax monitoring system to specially handle the data
1351 # and perform custom rendering.
1352 &quot;floatValue&quot;: 3.14, # Contains value if the data is of float type.
1353 &quot;key&quot;: &quot;A String&quot;, # The key identifying the display data.
1354 # This is intended to be used as a label for the display data
1355 # when viewed in a dax monitoring system.
1356 &quot;shortStrValue&quot;: &quot;A String&quot;, # A possible additional shorter value to display.
1357 # For example a java_class_name_value of com.mypackage.MyDoFn
1358 # will be stored with MyDoFn as the short_str_value and
1359 # com.mypackage.MyDoFn as the java_class_name value.
1360 # short_str_value can be displayed and java_class_name_value
1361 # will be displayed as a tooltip.
1362 &quot;url&quot;: &quot;A String&quot;, # An optional full URL.
1363 &quot;label&quot;: &quot;A String&quot;, # An optional label to display in a dax UI for the element.
1364 },
1365 ],
1366 },
1367 &quot;replaceJobId&quot;: &quot;A String&quot;, # If this job is an update of an existing job, this field is the job ID
1368 # of the job it replaced.
1369 #
1370 # When sending a `CreateJobRequest`, you can update a job by specifying it
1371 # here. The job named here is stopped, and its intermediate state is
1372 # transferred to this job.
1373 &quot;tempFiles&quot;: [ # A set of files the system should be aware of that are used
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001374 # for temporary storage. These temporary files will be
1375 # removed on job completion.
1376 # No duplicates are allowed.
1377 # No file patterns are supported.
1378 #
1379 # The supported files are:
1380 #
1381 # Google Cloud Storage:
1382 #
1383 # storage.googleapis.com/{bucket}/{object}
1384 # bucket.storage.googleapis.com/{object}
Bu Sun Kim65020912020-05-20 12:08:20 -07001385 &quot;A String&quot;,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001386 ],
Bu Sun Kim65020912020-05-20 12:08:20 -07001387 &quot;name&quot;: &quot;A String&quot;, # The user-specified Cloud Dataflow job name.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001388 #
1389 # Only one Job with a given name may exist in a project at any
1390 # given time. If a caller attempts to create a Job with the same
1391 # name as an already-existing Job, the attempt returns the
1392 # existing Job.
1393 #
1394 # The name must match the regular expression
1395 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
Bu Sun Kim65020912020-05-20 12:08:20 -07001396 &quot;steps&quot;: [ # Exactly one of step or steps_location should be specified.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001397 #
1398 # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001399 { # Defines a particular step within a Cloud Dataflow job.
1400 #
1401 # A job consists of multiple steps, each of which performs some
1402 # specific operation as part of the overall job. Data is typically
1403 # passed from one step to another as part of the job.
1404 #
Bu Sun Kim65020912020-05-20 12:08:20 -07001405 # Here&#x27;s an example of a sequence of steps which together implement a
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001406 # Map-Reduce job:
1407 #
1408 # * Read a collection of data from some source, parsing the
Bu Sun Kim65020912020-05-20 12:08:20 -07001409 # collection&#x27;s elements.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001410 #
1411 # * Validate the elements.
1412 #
1413 # * Apply a user-defined function to map each element to some value
1414 # and extract an element-specific key value.
1415 #
1416 # * Group elements with the same key into a single element with
1417 # that key, transforming a multiply-keyed collection into a
1418 # uniquely-keyed collection.
1419 #
1420 # * Write the elements out to some data sink.
1421 #
1422 # Note that the Cloud Dataflow service may be used to run many different
1423 # types of jobs, not just Map-Reduce.
Bu Sun Kim65020912020-05-20 12:08:20 -07001424 &quot;name&quot;: &quot;A String&quot;, # The name that identifies the step. This must be unique for each
Dan O'Mearadd494642020-05-01 07:42:23 -07001425 # step with respect to all other steps in the Cloud Dataflow job.
Bu Sun Kim65020912020-05-20 12:08:20 -07001426 &quot;kind&quot;: &quot;A String&quot;, # The kind of step in the Cloud Dataflow job.
1427 &quot;properties&quot;: { # Named properties associated with the step. Each kind of
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001428 # predefined step has its own required set of properties.
1429 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Bu Sun Kim65020912020-05-20 12:08:20 -07001430 &quot;a_key&quot;: &quot;&quot;, # Properties of the object.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001431 },
1432 },
1433 ],
Bu Sun Kim65020912020-05-20 12:08:20 -07001434 &quot;replacedByJobId&quot;: &quot;A String&quot;, # If another job is an update of this job (and thus, this job is in
1435 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1436 &quot;executionInfo&quot;: { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1437 # isn&#x27;t contained in the submitted job.
1438 &quot;stages&quot;: { # A mapping from each stage to the information about that stage.
1439 &quot;a_key&quot;: { # Contains information about how a particular
1440 # google.dataflow.v1beta3.Step will be executed.
1441 &quot;stepName&quot;: [ # The steps associated with the execution stage.
1442 # Note that stages may have several steps, and that a given step
1443 # might be run by more than one stage.
1444 &quot;A String&quot;,
1445 ],
1446 },
1447 },
1448 },
1449 &quot;currentState&quot;: &quot;A String&quot;, # The current state of the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001450 #
1451 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1452 # specified.
1453 #
1454 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1455 # terminal state. After a job has reached a terminal state, no
1456 # further state updates may be made.
1457 #
1458 # This field may be mutated by the Cloud Dataflow service;
1459 # callers cannot mutate it.
Bu Sun Kim65020912020-05-20 12:08:20 -07001460 &quot;location&quot;: &quot;A String&quot;, # The [regional endpoint]
1461 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1462 # contains this job.
1463 &quot;startTime&quot;: &quot;A String&quot;, # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1464 # Flexible resource scheduling jobs are started with some delay after job
1465 # creation, so start_time is unset before start and is updated when the
1466 # job is started by the Cloud Dataflow service. For other jobs, start_time
1467 # always equals to create_time and is immutable and set by the Cloud Dataflow
1468 # service.
1469 &quot;stepsLocation&quot;: &quot;A String&quot;, # The GCS location where the steps are stored.
1470 &quot;labels&quot;: { # User-defined labels for this job.
1471 #
1472 # The labels map can contain no more than 64 entries. Entries of the labels
1473 # map are UTF8 strings that comply with the following restrictions:
1474 #
1475 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
1476 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1477 # * Both keys and values are additionally constrained to be &lt;= 128 bytes in
1478 # size.
1479 &quot;a_key&quot;: &quot;A String&quot;,
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001480 },
Bu Sun Kim65020912020-05-20 12:08:20 -07001481 &quot;createTime&quot;: &quot;A String&quot;, # The timestamp when the job was initially created. Immutable and set by the
1482 # Cloud Dataflow service.
1483 &quot;requestedState&quot;: &quot;A String&quot;, # The job&#x27;s requested state.
1484 #
1485 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1486 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1487 # also be used to directly set a job&#x27;s requested state to
1488 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1489 # job if it has not already reached a terminal state.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001490 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001491 }</pre>
1492</div>
1493
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001494</body></html>