blob: a08f01e2577694b0b2c46d430b4809821e9fbde1 [file] [log] [blame]
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070075<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.templates.html">templates</a></h1>
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070076<h2>Instance Methods</h2>
77<p class="toc_element">
78 <code><a href="#create">create(projectId, body, x__xgafv=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040079<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040081 <code><a href="#get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040082<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070084 <code><a href="#launch">launch(projectId, body, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, location=None, gcsPath=None, validateOnly=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040085<p class="firstline">Launch a template.</p>
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070086<h3>Method Details</h3>
87<div class="method">
88 <code class="details" id="create">create(projectId, body, x__xgafv=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040089 <pre>Creates a Cloud Dataflow job from a template.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070090
91Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040092 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070093 body: object, The request body. (required)
94 The object takes the form of:
95
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040096{ # A request to create a Cloud Dataflow job from a template.
97 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040098 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
99 # template if not specified.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700100 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
101 # the service will use the network "default".
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400102 "zone": "A String", # The Compute Engine [availability
103 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400104 # for launching worker instances to run your pipeline.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700105 "additionalUserLabels": { # Additional user labels to be specified for the job.
106 # Keys and values should follow the restrictions specified in the [labeling
107 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
108 # page.
109 "a_key": "A String",
110 },
111 "additionalExperiments": [ # Additional experiment flags for the job.
112 "A String",
113 ],
Thomas Coffee2f245372017-03-27 10:39:26 -0700114 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
115 # Use with caution.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400116 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
117 # Must be a valid Cloud Storage URL, beginning with `gs://`.
118 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700119 "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400120 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
121 # available to your pipeline during execution, from 1 to 1000.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700122 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
123 # the form "regions/REGION/subnetworks/SUBNETWORK".
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800124 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400125 "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
126 # create the job.
127 # Must be a valid Cloud Storage URL, beginning with `gs://`.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700128 "location": "A String", # The [regional endpoint]
129 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
130 # which to direct the request.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400131 "parameters": { # The runtime parameters to pass to the job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700132 "a_key": "A String",
133 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400134 "jobName": "A String", # Required. The job name to use for the created job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700135 }
136
137 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400138 Allowed values
139 1 - v1 error format
140 2 - v2 error format
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700141
142Returns:
143 An object of the form:
144
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400145 { # Defines a job to be run by the Cloud Dataflow service.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700146 "labels": { # User-defined labels for this job.
147 #
148 # The labels map can contain no more than 64 entries. Entries of the labels
149 # map are UTF8 strings that comply with the following restrictions:
150 #
151 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
152 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
153 # * Both keys and values are additionally constrained to be <= 128 bytes in
154 # size.
155 "a_key": "A String",
156 },
157 "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
158 # by the metadata values provided here. Populated for ListJobs and all GetJob
159 # views SUMMARY and higher.
160 # ListJob response and Job SUMMARY view.
161 "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
162 "versionDisplayName": "A String", # A readable string describing the version of the SDK.
163 "version": "A String", # The version of the SDK used to run the job.
164 "sdkSupportStatus": "A String", # The support status for this SDK version.
165 },
166 "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
167 { # Metadata for a PubSub connector used by the job.
168 "topic": "A String", # Topic accessed in the connection.
169 "subscription": "A String", # Subscription used in the connection.
170 },
171 ],
172 "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
173 { # Metadata for a Datastore connector used by the job.
174 "projectId": "A String", # ProjectId accessed in the connection.
175 "namespace": "A String", # Namespace used in the connection.
176 },
177 ],
178 "fileDetails": [ # Identification of a File source used in the Dataflow job.
179 { # Metadata for a File connector used by the job.
180 "filePattern": "A String", # File Pattern used to access files by the connector.
181 },
182 ],
183 "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
184 { # Metadata for a Spanner connector used by the job.
185 "instanceId": "A String", # InstanceId accessed in the connection.
186 "projectId": "A String", # ProjectId accessed in the connection.
187 "databaseId": "A String", # DatabaseId accessed in the connection.
188 },
189 ],
190 "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
191 { # Metadata for a BigTable connector used by the job.
192 "instanceId": "A String", # InstanceId accessed in the connection.
193 "projectId": "A String", # ProjectId accessed in the connection.
194 "tableId": "A String", # TableId accessed in the connection.
195 },
196 ],
197 "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
198 { # Metadata for a BigQuery connector used by the job.
199 "projectId": "A String", # Project accessed in the connection.
200 "dataset": "A String", # Dataset accessed in the connection.
201 "table": "A String", # Table accessed in the connection.
202 "query": "A String", # Query used to access data in the connection.
203 },
204 ],
205 },
206 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
207 # A description of the user pipeline and stages through which it is executed.
208 # Created by Cloud Dataflow service. Only retrieved with
209 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
210 # form. This data is provided by the Dataflow service for ease of visualizing
211 # the pipeline and interpreting Dataflow provided metrics.
212 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
213 { # Description of the type, names/ids, and input/outputs for a transform.
214 "kind": "A String", # Type of transform.
215 "name": "A String", # User provided name for this transform instance.
216 "inputCollectionName": [ # User names for all collection inputs to this transform.
217 "A String",
218 ],
219 "displayData": [ # Transform-specific display data.
220 { # Data provided with a pipeline or transform to provide descriptive info.
221 "shortStrValue": "A String", # A possible additional shorter value to display.
222 # For example a java_class_name_value of com.mypackage.MyDoFn
223 # will be stored with MyDoFn as the short_str_value and
224 # com.mypackage.MyDoFn as the java_class_name value.
225 # short_str_value can be displayed and java_class_name_value
226 # will be displayed as a tooltip.
227 "durationValue": "A String", # Contains value if the data is of duration type.
228 "url": "A String", # An optional full URL.
229 "floatValue": 3.14, # Contains value if the data is of float type.
230 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
231 # language namespace (i.e. python module) which defines the display data.
232 # This allows a dax monitoring system to specially handle the data
233 # and perform custom rendering.
234 "javaClassValue": "A String", # Contains value if the data is of java class type.
235 "label": "A String", # An optional label to display in a dax UI for the element.
236 "boolValue": True or False, # Contains value if the data is of a boolean type.
237 "strValue": "A String", # Contains value if the data is of string type.
238 "key": "A String", # The key identifying the display data.
239 # This is intended to be used as a label for the display data
240 # when viewed in a dax monitoring system.
241 "int64Value": "A String", # Contains value if the data is of int64 type.
242 "timestampValue": "A String", # Contains value if the data is of timestamp type.
243 },
244 ],
245 "outputCollectionName": [ # User names for all collection outputs to this transform.
246 "A String",
247 ],
248 "id": "A String", # SDK generated id of this transform instance.
249 },
250 ],
251 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
252 { # Description of the composing transforms, names/ids, and input/outputs of a
253 # stage of execution. Some composing transforms and sources may have been
254 # generated by the Dataflow service during execution planning.
255 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
256 { # Description of an interstitial value between transforms in an execution
257 # stage.
258 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
259 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
260 # source is most closely associated.
261 "name": "A String", # Dataflow service generated name for this source.
262 },
263 ],
264 "kind": "A String", # Type of tranform this stage is executing.
265 "name": "A String", # Dataflow service generated name for this stage.
266 "outputSource": [ # Output sources for this stage.
267 { # Description of an input or output of an execution stage.
268 "userName": "A String", # Human-readable name for this source; may be user or system generated.
269 "sizeBytes": "A String", # Size of the source, if measurable.
270 "name": "A String", # Dataflow service generated name for this source.
271 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
272 # source is most closely associated.
273 },
274 ],
275 "inputSource": [ # Input sources for this stage.
276 { # Description of an input or output of an execution stage.
277 "userName": "A String", # Human-readable name for this source; may be user or system generated.
278 "sizeBytes": "A String", # Size of the source, if measurable.
279 "name": "A String", # Dataflow service generated name for this source.
280 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
281 # source is most closely associated.
282 },
283 ],
284 "componentTransform": [ # Transforms that comprise this execution stage.
285 { # Description of a transform executed as part of an execution stage.
286 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
287 "originalTransform": "A String", # User name for the original user transform with which this transform is
288 # most closely associated.
289 "name": "A String", # Dataflow service generated name for this source.
290 },
291 ],
292 "id": "A String", # Dataflow service generated id for this stage.
293 },
294 ],
295 "displayData": [ # Pipeline level display data.
296 { # Data provided with a pipeline or transform to provide descriptive info.
297 "shortStrValue": "A String", # A possible additional shorter value to display.
298 # For example a java_class_name_value of com.mypackage.MyDoFn
299 # will be stored with MyDoFn as the short_str_value and
300 # com.mypackage.MyDoFn as the java_class_name value.
301 # short_str_value can be displayed and java_class_name_value
302 # will be displayed as a tooltip.
303 "durationValue": "A String", # Contains value if the data is of duration type.
304 "url": "A String", # An optional full URL.
305 "floatValue": 3.14, # Contains value if the data is of float type.
306 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
307 # language namespace (i.e. python module) which defines the display data.
308 # This allows a dax monitoring system to specially handle the data
309 # and perform custom rendering.
310 "javaClassValue": "A String", # Contains value if the data is of java class type.
311 "label": "A String", # An optional label to display in a dax UI for the element.
312 "boolValue": True or False, # Contains value if the data is of a boolean type.
313 "strValue": "A String", # Contains value if the data is of string type.
314 "key": "A String", # The key identifying the display data.
315 # This is intended to be used as a label for the display data
316 # when viewed in a dax monitoring system.
317 "int64Value": "A String", # Contains value if the data is of int64 type.
318 "timestampValue": "A String", # Contains value if the data is of timestamp type.
319 },
320 ],
321 },
322 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
323 # callers cannot mutate it.
324 { # A message describing the state of a particular execution stage.
325 "executionStageName": "A String", # The name of the execution stage.
326 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
327 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
328 },
329 ],
330 "id": "A String", # The unique ID of this job.
331 #
332 # This field is set by the Cloud Dataflow service when the Job is
333 # created, and is immutable for the life of the job.
334 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
335 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
336 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
337 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
338 # corresponding name prefixes of the new job.
339 "a_key": "A String",
340 },
341 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
342 "version": { # A structure describing which components and their versions of the service
343 # are required in order to run the job.
344 "a_key": "", # Properties of the object.
345 },
346 "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
347 "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
348 # at rest, AKA a Customer Managed Encryption Key (CMEK).
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400349 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700350 # Format:
351 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
352 "internalExperiments": { # Experimental settings.
353 "a_key": "", # Properties of the object. Contains field @type with type URL.
354 },
355 "dataset": "A String", # The dataset for the current project where various workflow
356 # related tables are stored.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400357 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700358 # The supported resource type is:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400359 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700360 # Google BigQuery:
361 # bigquery.googleapis.com/{dataset}
362 "experiments": [ # The list of experiments to enable.
363 "A String",
364 ],
365 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
366 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
367 # options are passed through the service and are used to recreate the
368 # SDK pipeline options on the worker in a language agnostic and platform
369 # independent way.
370 "a_key": "", # Properties of the object.
371 },
372 "userAgent": { # A description of the process that generated the request.
373 "a_key": "", # Properties of the object.
374 },
375 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
376 # unspecified, the service will attempt to choose a reasonable
377 # default. This should be in the form of the API service name,
378 # e.g. "compute.googleapis.com".
379 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
380 # specified in order for the job to have workers.
381 { # Describes one particular pool of Cloud Dataflow workers to be
382 # instantiated by the Cloud Dataflow service in order to perform the
383 # computations required by a job. Note that a workflow job may use
384 # multiple pools, in order to match the various computational
385 # requirements of the various stages of the job.
386 "diskSourceImage": "A String", # Fully qualified source image for disks.
387 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
388 # using the standard Dataflow task runner. Users should ignore
389 # this field.
390 "workflowFileName": "A String", # The file to store the workflow in.
391 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
392 # will not be uploaded.
393 #
394 # The supported resource type is:
395 #
396 # Google Cloud Storage:
397 # storage.googleapis.com/{bucket}/{object}
398 # bucket.storage.googleapis.com/{object}
399 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
400 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
401 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
402 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
403 # "shuffle/v1beta1".
404 "workerId": "A String", # The ID of the worker running this pipeline.
405 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
406 #
407 # When workers access Google Cloud APIs, they logically do so via
408 # relative URLs. If this field is specified, it supplies the base
409 # URL to use for resolving these relative URLs. The normative
410 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
411 # Locators".
412 #
413 # If not specified, the default value is "http://www.googleapis.com/"
414 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
415 # "dataflow/v1b3/projects".
416 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
417 # storage.
418 #
419 # The supported resource type is:
420 #
421 # Google Cloud Storage:
422 #
423 # storage.googleapis.com/{bucket}/{object}
424 # bucket.storage.googleapis.com/{object}
425 },
426 "vmId": "A String", # The ID string of the VM.
427 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
428 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
429 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
430 # access the Cloud Dataflow API.
431 "A String",
432 ],
433 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
434 # taskrunner; e.g. "root".
435 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
436 #
437 # When workers access Google Cloud APIs, they logically do so via
438 # relative URLs. If this field is specified, it supplies the base
439 # URL to use for resolving these relative URLs. The normative
440 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
441 # Locators".
442 #
443 # If not specified, the default value is "http://www.googleapis.com/"
444 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
445 # taskrunner; e.g. "wheel".
446 "languageHint": "A String", # The suggested backend language.
447 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
448 # console.
449 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
450 "logDir": "A String", # The directory on the VM to store logs.
451 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
452 "harnessCommand": "A String", # The command to launch the worker harness.
453 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
454 # temporary storage.
455 #
456 # The supported resource type is:
457 #
458 # Google Cloud Storage:
459 # storage.googleapis.com/{bucket}/{object}
460 # bucket.storage.googleapis.com/{object}
461 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
462 },
463 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
464 # are supported.
465 "packages": [ # Packages to be installed on workers.
466 { # The packages that must be installed in order for a worker to run the
467 # steps of the Cloud Dataflow job that will be assigned to its worker
468 # pool.
469 #
470 # This is the mechanism by which the Cloud Dataflow SDK causes code to
471 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
472 # might use this to install jars containing the user's code and all of the
473 # various dependencies (libraries, data files, etc.) required in order
474 # for that code to run.
475 "location": "A String", # The resource to read the package from. The supported resource type is:
476 #
477 # Google Cloud Storage:
478 #
479 # storage.googleapis.com/{bucket}
480 # bucket.storage.googleapis.com/
481 "name": "A String", # The name of the package.
482 },
483 ],
484 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
485 # service will attempt to choose a reasonable default.
486 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
487 # the service will use the network "default".
488 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
489 # will attempt to choose a reasonable default.
490 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
491 # attempt to choose a reasonable default.
492 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
493 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
494 # `TEARDOWN_NEVER`.
495 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
496 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
497 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
498 # down.
499 #
500 # If the workers are not torn down by the service, they will
501 # continue to run and use Google Compute Engine VM resources in the
502 # user's project until they are explicitly terminated by the user.
503 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
504 # policy except for small, manually supervised test jobs.
505 #
506 # If unknown or unspecified, the service will attempt to choose a reasonable
507 # default.
508 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
509 # Compute Engine API.
510 "ipConfiguration": "A String", # Configuration for VM IPs.
511 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
512 # service will choose a number of threads (according to the number of cores
513 # on the selected machine type for batch, or 1 by convention for streaming).
514 "poolArgs": { # Extra arguments for this worker pool.
515 "a_key": "", # Properties of the object. Contains field @type with type URL.
516 },
517 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
518 # execute the job. If zero or unspecified, the service will
519 # attempt to choose a reasonable default.
520 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
521 # harness, residing in Google Container Registry.
522 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
523 # the form "regions/REGION/subnetworks/SUBNETWORK".
524 "dataDisks": [ # Data disks that are used by a VM in this workflow.
525 { # Describes the data disk used by a workflow job.
526 "mountPoint": "A String", # Directory in a VM where disk is mounted.
527 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
528 # attempt to choose a reasonable default.
529 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
530 # must be a disk type appropriate to the project and zone in which
531 # the workers will run. If unknown or unspecified, the service
532 # will attempt to choose a reasonable default.
533 #
534 # For example, the standard persistent disk type is a resource name
535 # typically ending in "pd-standard". If SSD persistent disks are
536 # available, the resource name typically ends with "pd-ssd". The
537 # actual valid values are defined the Google Compute Engine API,
538 # not by the Cloud Dataflow API; consult the Google Compute Engine
539 # documentation for more information about determining the set of
540 # available disk types for a particular project and zone.
541 #
542 # Google Compute Engine Disk types are local to a particular
543 # project in a particular zone, and so the resource name will
544 # typically look something like this:
545 #
546 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
547 },
548 ],
549 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
550 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
551 "algorithm": "A String", # The algorithm to use for autoscaling.
552 },
553 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
554 # select a default set of packages which are useful to worker
555 # harnesses written in a particular language.
556 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
557 # attempt to choose a reasonable default.
558 "metadata": { # Metadata to set on the Google Compute Engine VMs.
559 "a_key": "A String",
560 },
561 },
562 ],
563 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
564 # storage. The system will append the suffix "/temp-{JOBNAME} to
565 # this resource prefix, where {JOBNAME} is the value of the
566 # job_name field. The resulting bucket and object prefix is used
567 # as the prefix of the resources used to store temporary data
568 # needed during the job execution. NOTE: This will override the
569 # value in taskrunner_settings.
570 # The supported resource type is:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400571 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700572 # Google Cloud Storage:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400573 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700574 # storage.googleapis.com/{bucket}/{object}
575 # bucket.storage.googleapis.com/{object}
576 },
577 "location": "A String", # The [regional endpoint]
578 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
579 # contains this job.
580 "tempFiles": [ # A set of files the system should be aware of that are used
581 # for temporary storage. These temporary files will be
582 # removed on job completion.
583 # No duplicates are allowed.
584 # No file patterns are supported.
585 #
586 # The supported files are:
587 #
588 # Google Cloud Storage:
589 #
590 # storage.googleapis.com/{bucket}/{object}
591 # bucket.storage.googleapis.com/{object}
592 "A String",
593 ],
594 "type": "A String", # The type of Cloud Dataflow job.
595 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
596 # If this field is set, the service will ensure its uniqueness.
597 # The request to create a job will fail if the service has knowledge of a
598 # previously submitted job with the same client's ID and job name.
599 # The caller may use this field to ensure idempotence of job
600 # creation across retried attempts to create a job.
601 # By default, the field is empty and, in that case, the service ignores it.
602 "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
603 # snapshot.
604 "stepsLocation": "A String", # The GCS location where the steps are stored.
605 "currentStateTime": "A String", # The timestamp associated with the current state.
606 "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
607 # Flexible resource scheduling jobs are started with some delay after job
608 # creation, so start_time is unset before start and is updated when the
609 # job is started by the Cloud Dataflow service. For other jobs, start_time
610 # always equals to create_time and is immutable and set by the Cloud Dataflow
611 # service.
612 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
613 # Cloud Dataflow service.
614 "requestedState": "A String", # The job's requested state.
615 #
616 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
617 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
618 # also be used to directly set a job's requested state to
619 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
620 # job if it has not already reached a terminal state.
621 "name": "A String", # The user-specified Cloud Dataflow job name.
622 #
623 # Only one Job with a given name may exist in a project at any
624 # given time. If a caller attempts to create a Job with the same
625 # name as an already-existing Job, the attempt returns the
626 # existing Job.
627 #
628 # The name must match the regular expression
629 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
630 "steps": [ # Exactly one of step or steps_location should be specified.
631 #
632 # The top-level steps that constitute the entire job.
633 { # Defines a particular step within a Cloud Dataflow job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400634 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700635 # A job consists of multiple steps, each of which performs some
636 # specific operation as part of the overall job. Data is typically
637 # passed from one step to another as part of the job.
638 #
639 # Here's an example of a sequence of steps which together implement a
640 # Map-Reduce job:
641 #
642 # * Read a collection of data from some source, parsing the
643 # collection's elements.
644 #
645 # * Validate the elements.
646 #
647 # * Apply a user-defined function to map each element to some value
648 # and extract an element-specific key value.
649 #
650 # * Group elements with the same key into a single element with
651 # that key, transforming a multiply-keyed collection into a
652 # uniquely-keyed collection.
653 #
654 # * Write the elements out to some data sink.
655 #
656 # Note that the Cloud Dataflow service may be used to run many different
657 # types of jobs, not just Map-Reduce.
658 "kind": "A String", # The kind of step in the Cloud Dataflow job.
659 "properties": { # Named properties associated with the step. Each kind of
660 # predefined step has its own required set of properties.
661 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
662 "a_key": "", # Properties of the object.
663 },
664 "name": "A String", # The name that identifies the step. This must be unique for each
665 # step with respect to all other steps in the Cloud Dataflow job.
666 },
667 ],
668 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
669 # of the job it replaced.
670 #
671 # When sending a `CreateJobRequest`, you can update a job by specifying it
672 # here. The job named here is stopped, and its intermediate state is
673 # transferred to this job.
674 "currentState": "A String", # The current state of the job.
675 #
676 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
677 # specified.
678 #
679 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
680 # terminal state. After a job has reached a terminal state, no
681 # further state updates may be made.
682 #
683 # This field may be mutated by the Cloud Dataflow service;
684 # callers cannot mutate it.
685 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
686 # isn't contained in the submitted job.
687 "stages": { # A mapping from each stage to the information about that stage.
688 "a_key": { # Contains information about how a particular
689 # google.dataflow.v1beta3.Step will be executed.
690 "stepName": [ # The steps associated with the execution stage.
691 # Note that stages may have several steps, and that a given step
692 # might be run by more than one stage.
693 "A String",
694 ],
695 },
696 },
697 },
698 }</pre>
699</div>
700
701<div class="method">
702 <code class="details" id="get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</code>
703 <pre>Get the template associated with a template.
704
705Args:
706 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
707 gcsPath: string, Required. A Cloud Storage path to the template from which to
708create the job.
709Must be valid Cloud Storage URL, beginning with 'gs://'.
710 location: string, The [regional endpoint]
711(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
712which to direct the request.
713 x__xgafv: string, V1 error format.
714 Allowed values
715 1 - v1 error format
716 2 - v2 error format
717 view: string, The view to retrieve. Defaults to METADATA_ONLY.
718
719Returns:
720 An object of the form:
721
722 { # The response to a GetTemplate request.
723 "status": { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the
724 # request will be indicated in the error_details.
725 # different programming environments, including REST APIs and RPC APIs. It is
726 # used by [gRPC](https://github.com/grpc). The error model is designed to be:
727 #
728 # - Simple to use and understand for most users
729 # - Flexible enough to meet unexpected needs
730 #
731 # # Overview
732 #
733 # The `Status` message contains three pieces of data: error code, error
734 # message, and error details. The error code should be an enum value of
735 # google.rpc.Code, but it may accept additional error codes if needed. The
736 # error message should be a developer-facing English message that helps
737 # developers *understand* and *resolve* the error. If a localized user-facing
738 # error message is needed, put the localized message in the error details or
739 # localize it in the client. The optional error details may contain arbitrary
740 # information about the error. There is a predefined set of error detail types
741 # in the package `google.rpc` that can be used for common error conditions.
742 #
743 # # Language mapping
744 #
745 # The `Status` message is the logical representation of the error model, but it
746 # is not necessarily the actual wire format. When the `Status` message is
747 # exposed in different client libraries and different wire protocols, it can be
748 # mapped differently. For example, it will likely be mapped to some exceptions
749 # in Java, but more likely mapped to some error codes in C.
750 #
751 # # Other uses
752 #
753 # The error model and the `Status` message can be used in a variety of
754 # environments, either with or without APIs, to provide a
755 # consistent developer experience across different environments.
756 #
757 # Example uses of this error model include:
758 #
759 # - Partial errors. If a service needs to return partial errors to the client,
760 # it may embed the `Status` in the normal response to indicate the partial
761 # errors.
762 #
763 # - Workflow errors. A typical workflow has multiple steps. Each step may
764 # have a `Status` message for error reporting.
765 #
766 # - Batch operations. If a client uses batch request and batch response, the
767 # `Status` message should be used directly inside batch response, one for
768 # each error sub-response.
769 #
770 # - Asynchronous operations. If an API call embeds asynchronous operation
771 # results in its response, the status of those operations should be
772 # represented directly using the `Status` message.
773 #
774 # - Logging. If some API errors are stored in logs, the message `Status` could
775 # be used directly after any stripping needed for security/privacy reasons.
776 "message": "A String", # A developer-facing error message, which should be in English. Any
777 # user-facing error message should be localized and sent in the
778 # google.rpc.Status.details field, or localized by the client.
779 "code": 42, # The status code, which should be an enum value of google.rpc.Code.
780 "details": [ # A list of messages that carry the error details. There is a common set of
781 # message types for APIs to use.
782 {
783 "a_key": "", # Properties of the object. Contains field @type with type URL.
784 },
785 ],
786 },
787 "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
788 # parameters, etc.
789 "name": "A String", # Required. The name of the template.
790 "parameters": [ # The parameters for the template.
791 { # Metadata for a specific parameter.
792 "regexes": [ # Optional. Regexes that the parameter must match.
793 "A String",
794 ],
795 "helpText": "A String", # Required. The help text to display for the parameter.
796 "name": "A String", # Required. The name of the parameter.
797 "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
798 "label": "A String", # Required. The label to display for the parameter.
799 },
800 ],
801 "description": "A String", # Optional. A description of the template.
802 },
803 }</pre>
804</div>
805
806<div class="method">
807 <code class="details" id="launch">launch(projectId, body, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, location=None, gcsPath=None, validateOnly=None)</code>
808 <pre>Launch a template.
809
810Args:
811 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
812 body: object, The request body. (required)
813 The object takes the form of:
814
815{ # Parameters to provide to the template being launched.
816 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
817 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
818 # template if not specified.
819 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
820 # the service will use the network "default".
821 "zone": "A String", # The Compute Engine [availability
822 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
823 # for launching worker instances to run your pipeline.
824 "additionalUserLabels": { # Additional user labels to be specified for the job.
825 # Keys and values should follow the restrictions specified in the [labeling
826 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
827 # page.
828 "a_key": "A String",
829 },
830 "additionalExperiments": [ # Additional experiment flags for the job.
831 "A String",
832 ],
833 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
834 # Use with caution.
835 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
836 # Must be a valid Cloud Storage URL, beginning with `gs://`.
837 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
838 "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
839 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
840 # available to your pipeline during execution, from 1 to 1000.
841 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
842 # the form "regions/REGION/subnetworks/SUBNETWORK".
843 },
844 "parameters": { # The runtime parameters to pass to the job.
845 "a_key": "A String",
846 },
847 "jobName": "A String", # Required. The job name to use for the created job.
848 }
849
850 dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.
851The file must be a Json serialized DynamicTemplateFieSpec object.
852 x__xgafv: string, V1 error format.
853 Allowed values
854 1 - v1 error format
855 2 - v2 error format
856 dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.
857Must be a valid Cloud Storage URL, beginning with `gs://`.
858 location: string, The [regional endpoint]
859(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
860which to direct the request.
861 gcsPath: string, A Cloud Storage path to the template from which to create
862the job.
863Must be valid Cloud Storage URL, beginning with 'gs://'.
864 validateOnly: boolean, If true, the request is validated but not actually executed.
865Defaults to false.
866
867Returns:
868 An object of the form:
869
870 { # Response to the request to launch a template.
871 "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
872 # the job was successfully launched.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400873 "labels": { # User-defined labels for this job.
874 #
875 # The labels map can contain no more than 64 entries. Entries of the labels
876 # map are UTF8 strings that comply with the following restrictions:
877 #
878 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
879 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
880 # * Both keys and values are additionally constrained to be <= 128 bytes in
881 # size.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700882 "a_key": "A String",
883 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700884 "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
885 # by the metadata values provided here. Populated for ListJobs and all GetJob
886 # views SUMMARY and higher.
887 # ListJob response and Job SUMMARY view.
888 "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
889 "versionDisplayName": "A String", # A readable string describing the version of the SDK.
890 "version": "A String", # The version of the SDK used to run the job.
891 "sdkSupportStatus": "A String", # The support status for this SDK version.
892 },
893 "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
894 { # Metadata for a PubSub connector used by the job.
895 "topic": "A String", # Topic accessed in the connection.
896 "subscription": "A String", # Subscription used in the connection.
897 },
898 ],
899 "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
900 { # Metadata for a Datastore connector used by the job.
901 "projectId": "A String", # ProjectId accessed in the connection.
902 "namespace": "A String", # Namespace used in the connection.
903 },
904 ],
905 "fileDetails": [ # Identification of a File source used in the Dataflow job.
906 { # Metadata for a File connector used by the job.
907 "filePattern": "A String", # File Pattern used to access files by the connector.
908 },
909 ],
910 "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
911 { # Metadata for a Spanner connector used by the job.
912 "instanceId": "A String", # InstanceId accessed in the connection.
913 "projectId": "A String", # ProjectId accessed in the connection.
914 "databaseId": "A String", # DatabaseId accessed in the connection.
915 },
916 ],
917 "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
918 { # Metadata for a BigTable connector used by the job.
919 "instanceId": "A String", # InstanceId accessed in the connection.
920 "projectId": "A String", # ProjectId accessed in the connection.
921 "tableId": "A String", # TableId accessed in the connection.
922 },
923 ],
924 "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
925 { # Metadata for a BigQuery connector used by the job.
926 "projectId": "A String", # Project accessed in the connection.
927 "dataset": "A String", # Dataset accessed in the connection.
928 "table": "A String", # Table accessed in the connection.
929 "query": "A String", # Query used to access data in the connection.
930 },
931 ],
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700932 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700933 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
934 # A description of the user pipeline and stages through which it is executed.
935 # Created by Cloud Dataflow service. Only retrieved with
936 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
937 # form. This data is provided by the Dataflow service for ease of visualizing
938 # the pipeline and interpreting Dataflow provided metrics.
939 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
940 { # Description of the type, names/ids, and input/outputs for a transform.
941 "kind": "A String", # Type of transform.
942 "name": "A String", # User provided name for this transform instance.
943 "inputCollectionName": [ # User names for all collection inputs to this transform.
944 "A String",
945 ],
946 "displayData": [ # Transform-specific display data.
947 { # Data provided with a pipeline or transform to provide descriptive info.
948 "shortStrValue": "A String", # A possible additional shorter value to display.
949 # For example a java_class_name_value of com.mypackage.MyDoFn
950 # will be stored with MyDoFn as the short_str_value and
951 # com.mypackage.MyDoFn as the java_class_name value.
952 # short_str_value can be displayed and java_class_name_value
953 # will be displayed as a tooltip.
954 "durationValue": "A String", # Contains value if the data is of duration type.
955 "url": "A String", # An optional full URL.
956 "floatValue": 3.14, # Contains value if the data is of float type.
957 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
958 # language namespace (i.e. python module) which defines the display data.
959 # This allows a dax monitoring system to specially handle the data
960 # and perform custom rendering.
961 "javaClassValue": "A String", # Contains value if the data is of java class type.
962 "label": "A String", # An optional label to display in a dax UI for the element.
963 "boolValue": True or False, # Contains value if the data is of a boolean type.
964 "strValue": "A String", # Contains value if the data is of string type.
965 "key": "A String", # The key identifying the display data.
966 # This is intended to be used as a label for the display data
967 # when viewed in a dax monitoring system.
968 "int64Value": "A String", # Contains value if the data is of int64 type.
969 "timestampValue": "A String", # Contains value if the data is of timestamp type.
970 },
971 ],
972 "outputCollectionName": [ # User names for all collection outputs to this transform.
973 "A String",
974 ],
975 "id": "A String", # SDK generated id of this transform instance.
976 },
977 ],
978 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
979 { # Description of the composing transforms, names/ids, and input/outputs of a
980 # stage of execution. Some composing transforms and sources may have been
981 # generated by the Dataflow service during execution planning.
982 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
983 { # Description of an interstitial value between transforms in an execution
984 # stage.
985 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
986 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
987 # source is most closely associated.
988 "name": "A String", # Dataflow service generated name for this source.
989 },
990 ],
991 "kind": "A String", # Type of tranform this stage is executing.
992 "name": "A String", # Dataflow service generated name for this stage.
993 "outputSource": [ # Output sources for this stage.
994 { # Description of an input or output of an execution stage.
995 "userName": "A String", # Human-readable name for this source; may be user or system generated.
996 "sizeBytes": "A String", # Size of the source, if measurable.
997 "name": "A String", # Dataflow service generated name for this source.
998 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
999 # source is most closely associated.
1000 },
1001 ],
1002 "inputSource": [ # Input sources for this stage.
1003 { # Description of an input or output of an execution stage.
1004 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1005 "sizeBytes": "A String", # Size of the source, if measurable.
1006 "name": "A String", # Dataflow service generated name for this source.
1007 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1008 # source is most closely associated.
1009 },
1010 ],
1011 "componentTransform": [ # Transforms that comprise this execution stage.
1012 { # Description of a transform executed as part of an execution stage.
1013 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1014 "originalTransform": "A String", # User name for the original user transform with which this transform is
1015 # most closely associated.
1016 "name": "A String", # Dataflow service generated name for this source.
1017 },
1018 ],
1019 "id": "A String", # Dataflow service generated id for this stage.
1020 },
1021 ],
1022 "displayData": [ # Pipeline level display data.
1023 { # Data provided with a pipeline or transform to provide descriptive info.
1024 "shortStrValue": "A String", # A possible additional shorter value to display.
1025 # For example a java_class_name_value of com.mypackage.MyDoFn
1026 # will be stored with MyDoFn as the short_str_value and
1027 # com.mypackage.MyDoFn as the java_class_name value.
1028 # short_str_value can be displayed and java_class_name_value
1029 # will be displayed as a tooltip.
1030 "durationValue": "A String", # Contains value if the data is of duration type.
1031 "url": "A String", # An optional full URL.
1032 "floatValue": 3.14, # Contains value if the data is of float type.
1033 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1034 # language namespace (i.e. python module) which defines the display data.
1035 # This allows a dax monitoring system to specially handle the data
1036 # and perform custom rendering.
1037 "javaClassValue": "A String", # Contains value if the data is of java class type.
1038 "label": "A String", # An optional label to display in a dax UI for the element.
1039 "boolValue": True or False, # Contains value if the data is of a boolean type.
1040 "strValue": "A String", # Contains value if the data is of string type.
1041 "key": "A String", # The key identifying the display data.
1042 # This is intended to be used as a label for the display data
1043 # when viewed in a dax monitoring system.
1044 "int64Value": "A String", # Contains value if the data is of int64 type.
1045 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1046 },
1047 ],
1048 },
1049 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1050 # callers cannot mutate it.
1051 { # A message describing the state of a particular execution stage.
1052 "executionStageName": "A String", # The name of the execution stage.
1053 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1054 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1055 },
1056 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001057 "id": "A String", # The unique ID of this job.
1058 #
1059 # This field is set by the Cloud Dataflow service when the Job is
1060 # created, and is immutable for the life of the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001061 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1062 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1063 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1064 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1065 # corresponding name prefixes of the new job.
1066 "a_key": "A String",
1067 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001068 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1069 "version": { # A structure describing which components and their versions of the service
1070 # are required in order to run the job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001071 "a_key": "", # Properties of the object.
1072 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001073 "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
1074 "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
1075 # at rest, AKA a Customer Managed Encryption Key (CMEK).
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001076 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001077 # Format:
1078 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001079 "internalExperiments": { # Experimental settings.
1080 "a_key": "", # Properties of the object. Contains field @type with type URL.
1081 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001082 "dataset": "A String", # The dataset for the current project where various workflow
1083 # related tables are stored.
1084 #
1085 # The supported resource type is:
1086 #
1087 # Google BigQuery:
1088 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001089 "experiments": [ # The list of experiments to enable.
1090 "A String",
1091 ],
Sai Cheemalapatiea3a5e12016-10-12 14:05:53 -07001092 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001093 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1094 # options are passed through the service and are used to recreate the
1095 # SDK pipeline options on the worker in a language agnostic and platform
1096 # independent way.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001097 "a_key": "", # Properties of the object.
1098 },
1099 "userAgent": { # A description of the process that generated the request.
1100 "a_key": "", # Properties of the object.
1101 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001102 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1103 # unspecified, the service will attempt to choose a reasonable
1104 # default. This should be in the form of the API service name,
1105 # e.g. "compute.googleapis.com".
1106 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1107 # specified in order for the job to have workers.
1108 { # Describes one particular pool of Cloud Dataflow workers to be
1109 # instantiated by the Cloud Dataflow service in order to perform the
1110 # computations required by a job. Note that a workflow job may use
1111 # multiple pools, in order to match the various computational
1112 # requirements of the various stages of the job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001113 "diskSourceImage": "A String", # Fully qualified source image for disks.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001114 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1115 # using the standard Dataflow task runner. Users should ignore
1116 # this field.
1117 "workflowFileName": "A String", # The file to store the workflow in.
1118 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1119 # will not be uploaded.
1120 #
1121 # The supported resource type is:
1122 #
1123 # Google Cloud Storage:
1124 # storage.googleapis.com/{bucket}/{object}
1125 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001126 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001127 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1128 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1129 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1130 # "shuffle/v1beta1".
1131 "workerId": "A String", # The ID of the worker running this pipeline.
1132 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1133 #
1134 # When workers access Google Cloud APIs, they logically do so via
1135 # relative URLs. If this field is specified, it supplies the base
1136 # URL to use for resolving these relative URLs. The normative
1137 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1138 # Locators".
1139 #
1140 # If not specified, the default value is "http://www.googleapis.com/"
1141 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1142 # "dataflow/v1b3/projects".
1143 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1144 # storage.
1145 #
1146 # The supported resource type is:
1147 #
1148 # Google Cloud Storage:
1149 #
1150 # storage.googleapis.com/{bucket}/{object}
1151 # bucket.storage.googleapis.com/{object}
1152 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001153 "vmId": "A String", # The ID string of the VM.
1154 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1155 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1156 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1157 # access the Cloud Dataflow API.
1158 "A String",
1159 ],
1160 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1161 # taskrunner; e.g. "root".
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001162 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1163 #
1164 # When workers access Google Cloud APIs, they logically do so via
1165 # relative URLs. If this field is specified, it supplies the base
1166 # URL to use for resolving these relative URLs. The normative
1167 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1168 # Locators".
1169 #
1170 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001171 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1172 # taskrunner; e.g. "wheel".
1173 "languageHint": "A String", # The suggested backend language.
1174 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1175 # console.
1176 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1177 "logDir": "A String", # The directory on the VM to store logs.
1178 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001179 "harnessCommand": "A String", # The command to launch the worker harness.
1180 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1181 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001182 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001183 # The supported resource type is:
1184 #
1185 # Google Cloud Storage:
1186 # storage.googleapis.com/{bucket}/{object}
1187 # bucket.storage.googleapis.com/{object}
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001188 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001189 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001190 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1191 # are supported.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001192 "packages": [ # Packages to be installed on workers.
1193 { # The packages that must be installed in order for a worker to run the
1194 # steps of the Cloud Dataflow job that will be assigned to its worker
1195 # pool.
1196 #
1197 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1198 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1199 # might use this to install jars containing the user's code and all of the
1200 # various dependencies (libraries, data files, etc.) required in order
1201 # for that code to run.
1202 "location": "A String", # The resource to read the package from. The supported resource type is:
1203 #
1204 # Google Cloud Storage:
1205 #
1206 # storage.googleapis.com/{bucket}
1207 # bucket.storage.googleapis.com/
1208 "name": "A String", # The name of the package.
1209 },
1210 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001211 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1212 # service will attempt to choose a reasonable default.
1213 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1214 # the service will use the network "default".
1215 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1216 # will attempt to choose a reasonable default.
1217 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1218 # attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001219 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1220 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1221 # `TEARDOWN_NEVER`.
1222 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1223 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1224 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1225 # down.
1226 #
1227 # If the workers are not torn down by the service, they will
1228 # continue to run and use Google Compute Engine VM resources in the
1229 # user's project until they are explicitly terminated by the user.
1230 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1231 # policy except for small, manually supervised test jobs.
1232 #
1233 # If unknown or unspecified, the service will attempt to choose a reasonable
1234 # default.
1235 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1236 # Compute Engine API.
1237 "ipConfiguration": "A String", # Configuration for VM IPs.
1238 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1239 # service will choose a number of threads (according to the number of cores
1240 # on the selected machine type for batch, or 1 by convention for streaming).
1241 "poolArgs": { # Extra arguments for this worker pool.
1242 "a_key": "", # Properties of the object. Contains field @type with type URL.
1243 },
1244 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1245 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001246 # attempt to choose a reasonable default.
1247 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1248 # harness, residing in Google Container Registry.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001249 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1250 # the form "regions/REGION/subnetworks/SUBNETWORK".
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001251 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1252 { # Describes the data disk used by a workflow job.
1253 "mountPoint": "A String", # Directory in a VM where disk is mounted.
1254 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1255 # attempt to choose a reasonable default.
1256 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1257 # must be a disk type appropriate to the project and zone in which
1258 # the workers will run. If unknown or unspecified, the service
1259 # will attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001260 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001261 # For example, the standard persistent disk type is a resource name
1262 # typically ending in "pd-standard". If SSD persistent disks are
1263 # available, the resource name typically ends with "pd-ssd". The
1264 # actual valid values are defined the Google Compute Engine API,
1265 # not by the Cloud Dataflow API; consult the Google Compute Engine
1266 # documentation for more information about determining the set of
1267 # available disk types for a particular project and zone.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001268 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001269 # Google Compute Engine Disk types are local to a particular
1270 # project in a particular zone, and so the resource name will
1271 # typically look something like this:
1272 #
1273 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001274 },
1275 ],
1276 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1277 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1278 "algorithm": "A String", # The algorithm to use for autoscaling.
1279 },
1280 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1281 # select a default set of packages which are useful to worker
1282 # harnesses written in a particular language.
1283 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1284 # attempt to choose a reasonable default.
1285 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1286 "a_key": "A String",
1287 },
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001288 },
1289 ],
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001290 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1291 # storage. The system will append the suffix "/temp-{JOBNAME} to
1292 # this resource prefix, where {JOBNAME} is the value of the
1293 # job_name field. The resulting bucket and object prefix is used
1294 # as the prefix of the resources used to store temporary data
1295 # needed during the job execution. NOTE: This will override the
1296 # value in taskrunner_settings.
1297 # The supported resource type is:
1298 #
1299 # Google Cloud Storage:
1300 #
1301 # storage.googleapis.com/{bucket}/{object}
1302 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001303 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001304 "location": "A String", # The [regional endpoint]
1305 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1306 # contains this job.
1307 "tempFiles": [ # A set of files the system should be aware of that are used
1308 # for temporary storage. These temporary files will be
1309 # removed on job completion.
1310 # No duplicates are allowed.
1311 # No file patterns are supported.
1312 #
1313 # The supported files are:
1314 #
1315 # Google Cloud Storage:
1316 #
1317 # storage.googleapis.com/{bucket}/{object}
1318 # bucket.storage.googleapis.com/{object}
1319 "A String",
1320 ],
1321 "type": "A String", # The type of Cloud Dataflow job.
1322 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1323 # If this field is set, the service will ensure its uniqueness.
1324 # The request to create a job will fail if the service has knowledge of a
1325 # previously submitted job with the same client's ID and job name.
1326 # The caller may use this field to ensure idempotence of job
1327 # creation across retried attempts to create a job.
1328 # By default, the field is empty and, in that case, the service ignores it.
1329 "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1330 # snapshot.
1331 "stepsLocation": "A String", # The GCS location where the steps are stored.
1332 "currentStateTime": "A String", # The timestamp associated with the current state.
1333 "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1334 # Flexible resource scheduling jobs are started with some delay after job
1335 # creation, so start_time is unset before start and is updated when the
1336 # job is started by the Cloud Dataflow service. For other jobs, start_time
1337 # always equals to create_time and is immutable and set by the Cloud Dataflow
1338 # service.
1339 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1340 # Cloud Dataflow service.
1341 "requestedState": "A String", # The job's requested state.
1342 #
1343 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1344 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1345 # also be used to directly set a job's requested state to
1346 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1347 # job if it has not already reached a terminal state.
1348 "name": "A String", # The user-specified Cloud Dataflow job name.
1349 #
1350 # Only one Job with a given name may exist in a project at any
1351 # given time. If a caller attempts to create a Job with the same
1352 # name as an already-existing Job, the attempt returns the
1353 # existing Job.
1354 #
1355 # The name must match the regular expression
1356 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1357 "steps": [ # Exactly one of step or steps_location should be specified.
1358 #
1359 # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001360 { # Defines a particular step within a Cloud Dataflow job.
1361 #
1362 # A job consists of multiple steps, each of which performs some
1363 # specific operation as part of the overall job. Data is typically
1364 # passed from one step to another as part of the job.
1365 #
1366 # Here's an example of a sequence of steps which together implement a
1367 # Map-Reduce job:
1368 #
1369 # * Read a collection of data from some source, parsing the
1370 # collection's elements.
1371 #
1372 # * Validate the elements.
1373 #
1374 # * Apply a user-defined function to map each element to some value
1375 # and extract an element-specific key value.
1376 #
1377 # * Group elements with the same key into a single element with
1378 # that key, transforming a multiply-keyed collection into a
1379 # uniquely-keyed collection.
1380 #
1381 # * Write the elements out to some data sink.
1382 #
1383 # Note that the Cloud Dataflow service may be used to run many different
1384 # types of jobs, not just Map-Reduce.
1385 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1386 "properties": { # Named properties associated with the step. Each kind of
1387 # predefined step has its own required set of properties.
1388 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001389 "a_key": "", # Properties of the object.
1390 },
Thomas Coffee2f245372017-03-27 10:39:26 -07001391 "name": "A String", # The name that identifies the step. This must be unique for each
1392 # step with respect to all other steps in the Cloud Dataflow job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001393 },
1394 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07001395 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1396 # of the job it replaced.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001397 #
Thomas Coffee2f245372017-03-27 10:39:26 -07001398 # When sending a `CreateJobRequest`, you can update a job by specifying it
1399 # here. The job named here is stopped, and its intermediate state is
1400 # transferred to this job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001401 "currentState": "A String", # The current state of the job.
1402 #
1403 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1404 # specified.
1405 #
1406 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1407 # terminal state. After a job has reached a terminal state, no
1408 # further state updates may be made.
1409 #
1410 # This field may be mutated by the Cloud Dataflow service;
1411 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001412 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1413 # isn't contained in the submitted job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001414 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001415 "a_key": { # Contains information about how a particular
1416 # google.dataflow.v1beta3.Step will be executed.
1417 "stepName": [ # The steps associated with the execution stage.
1418 # Note that stages may have several steps, and that a given step
1419 # might be run by more than one stage.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001420 "A String",
1421 ],
1422 },
1423 },
1424 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001425 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001426 }</pre>
1427</div>
1428
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001429</body></html>