blob: 96d99f62bdf716039f973b4fe624d535f4fc48d9 [file] [log] [blame]
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070075<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.templates.html">templates</a></h1>
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040076<h2>Instance Methods</h2>
77<p class="toc_element">
78 <code><a href="#create">create(projectId, location, body, x__xgafv=None)</a></code></p>
79<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
81 <code><a href="#get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</a></code></p>
82<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070084 <code><a href="#launch">launch(projectId, location, body, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, gcsPath=None, validateOnly=None)</a></code></p>
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040085<p class="firstline">Launch a template.</p>
86<h3>Method Details</h3>
87<div class="method">
88 <code class="details" id="create">create(projectId, location, body, x__xgafv=None)</code>
89 <pre>Creates a Cloud Dataflow job from a template.
90
91Args:
92 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070093 location: string, The [regional endpoint]
94(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
95which to direct the request. (required)
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040096 body: object, The request body. (required)
97 The object takes the form of:
98
99{ # A request to create a Cloud Dataflow job from a template.
100 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
101 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
102 # template if not specified.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700103 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
104 # the service will use the network "default".
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400105 "zone": "A String", # The Compute Engine [availability
106 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
107 # for launching worker instances to run your pipeline.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700108 "additionalUserLabels": { # Additional user labels to be specified for the job.
109 # Keys and values should follow the restrictions specified in the [labeling
110 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
111 # page.
112 "a_key": "A String",
113 },
114 "additionalExperiments": [ # Additional experiment flags for the job.
115 "A String",
116 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400117 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
118 # Use with caution.
119 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
120 # Must be a valid Cloud Storage URL, beginning with `gs://`.
121 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700122 "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400123 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
124 # available to your pipeline during execution, from 1 to 1000.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700125 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
126 # the form "regions/REGION/subnetworks/SUBNETWORK".
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400127 },
128 "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
129 # create the job.
130 # Must be a valid Cloud Storage URL, beginning with `gs://`.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700131 "location": "A String", # The [regional endpoint]
132 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
133 # which to direct the request.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400134 "parameters": { # The runtime parameters to pass to the job.
135 "a_key": "A String",
136 },
137 "jobName": "A String", # Required. The job name to use for the created job.
138 }
139
140 x__xgafv: string, V1 error format.
141 Allowed values
142 1 - v1 error format
143 2 - v2 error format
144
145Returns:
146 An object of the form:
147
148 { # Defines a job to be run by the Cloud Dataflow service.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700149 "labels": { # User-defined labels for this job.
150 #
151 # The labels map can contain no more than 64 entries. Entries of the labels
152 # map are UTF8 strings that comply with the following restrictions:
153 #
154 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
155 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
156 # * Both keys and values are additionally constrained to be <= 128 bytes in
157 # size.
158 "a_key": "A String",
159 },
160 "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
161 # by the metadata values provided here. Populated for ListJobs and all GetJob
162 # views SUMMARY and higher.
163 # ListJob response and Job SUMMARY view.
164 "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
165 "versionDisplayName": "A String", # A readable string describing the version of the SDK.
166 "version": "A String", # The version of the SDK used to run the job.
167 "sdkSupportStatus": "A String", # The support status for this SDK version.
168 },
169 "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
170 { # Metadata for a PubSub connector used by the job.
171 "topic": "A String", # Topic accessed in the connection.
172 "subscription": "A String", # Subscription used in the connection.
173 },
174 ],
175 "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
176 { # Metadata for a Datastore connector used by the job.
177 "projectId": "A String", # ProjectId accessed in the connection.
178 "namespace": "A String", # Namespace used in the connection.
179 },
180 ],
181 "fileDetails": [ # Identification of a File source used in the Dataflow job.
182 { # Metadata for a File connector used by the job.
183 "filePattern": "A String", # File Pattern used to access files by the connector.
184 },
185 ],
186 "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
187 { # Metadata for a Spanner connector used by the job.
188 "instanceId": "A String", # InstanceId accessed in the connection.
189 "projectId": "A String", # ProjectId accessed in the connection.
190 "databaseId": "A String", # DatabaseId accessed in the connection.
191 },
192 ],
193 "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
194 { # Metadata for a BigTable connector used by the job.
195 "instanceId": "A String", # InstanceId accessed in the connection.
196 "projectId": "A String", # ProjectId accessed in the connection.
197 "tableId": "A String", # TableId accessed in the connection.
198 },
199 ],
200 "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
201 { # Metadata for a BigQuery connector used by the job.
202 "projectId": "A String", # Project accessed in the connection.
203 "dataset": "A String", # Dataset accessed in the connection.
204 "table": "A String", # Table accessed in the connection.
205 "query": "A String", # Query used to access data in the connection.
206 },
207 ],
208 },
209 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
210 # A description of the user pipeline and stages through which it is executed.
211 # Created by Cloud Dataflow service. Only retrieved with
212 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
213 # form. This data is provided by the Dataflow service for ease of visualizing
214 # the pipeline and interpreting Dataflow provided metrics.
215 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
216 { # Description of the type, names/ids, and input/outputs for a transform.
217 "kind": "A String", # Type of transform.
218 "name": "A String", # User provided name for this transform instance.
219 "inputCollectionName": [ # User names for all collection inputs to this transform.
220 "A String",
221 ],
222 "displayData": [ # Transform-specific display data.
223 { # Data provided with a pipeline or transform to provide descriptive info.
224 "shortStrValue": "A String", # A possible additional shorter value to display.
225 # For example a java_class_name_value of com.mypackage.MyDoFn
226 # will be stored with MyDoFn as the short_str_value and
227 # com.mypackage.MyDoFn as the java_class_name value.
228 # short_str_value can be displayed and java_class_name_value
229 # will be displayed as a tooltip.
230 "durationValue": "A String", # Contains value if the data is of duration type.
231 "url": "A String", # An optional full URL.
232 "floatValue": 3.14, # Contains value if the data is of float type.
233 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
234 # language namespace (i.e. python module) which defines the display data.
235 # This allows a dax monitoring system to specially handle the data
236 # and perform custom rendering.
237 "javaClassValue": "A String", # Contains value if the data is of java class type.
238 "label": "A String", # An optional label to display in a dax UI for the element.
239 "boolValue": True or False, # Contains value if the data is of a boolean type.
240 "strValue": "A String", # Contains value if the data is of string type.
241 "key": "A String", # The key identifying the display data.
242 # This is intended to be used as a label for the display data
243 # when viewed in a dax monitoring system.
244 "int64Value": "A String", # Contains value if the data is of int64 type.
245 "timestampValue": "A String", # Contains value if the data is of timestamp type.
246 },
247 ],
248 "outputCollectionName": [ # User names for all collection outputs to this transform.
249 "A String",
250 ],
251 "id": "A String", # SDK generated id of this transform instance.
252 },
253 ],
254 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
255 { # Description of the composing transforms, names/ids, and input/outputs of a
256 # stage of execution. Some composing transforms and sources may have been
257 # generated by the Dataflow service during execution planning.
258 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
259 { # Description of an interstitial value between transforms in an execution
260 # stage.
261 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
262 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
263 # source is most closely associated.
264 "name": "A String", # Dataflow service generated name for this source.
265 },
266 ],
267 "kind": "A String", # Type of tranform this stage is executing.
268 "name": "A String", # Dataflow service generated name for this stage.
269 "outputSource": [ # Output sources for this stage.
270 { # Description of an input or output of an execution stage.
271 "userName": "A String", # Human-readable name for this source; may be user or system generated.
272 "sizeBytes": "A String", # Size of the source, if measurable.
273 "name": "A String", # Dataflow service generated name for this source.
274 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
275 # source is most closely associated.
276 },
277 ],
278 "inputSource": [ # Input sources for this stage.
279 { # Description of an input or output of an execution stage.
280 "userName": "A String", # Human-readable name for this source; may be user or system generated.
281 "sizeBytes": "A String", # Size of the source, if measurable.
282 "name": "A String", # Dataflow service generated name for this source.
283 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
284 # source is most closely associated.
285 },
286 ],
287 "componentTransform": [ # Transforms that comprise this execution stage.
288 { # Description of a transform executed as part of an execution stage.
289 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
290 "originalTransform": "A String", # User name for the original user transform with which this transform is
291 # most closely associated.
292 "name": "A String", # Dataflow service generated name for this source.
293 },
294 ],
295 "id": "A String", # Dataflow service generated id for this stage.
296 },
297 ],
298 "displayData": [ # Pipeline level display data.
299 { # Data provided with a pipeline or transform to provide descriptive info.
300 "shortStrValue": "A String", # A possible additional shorter value to display.
301 # For example a java_class_name_value of com.mypackage.MyDoFn
302 # will be stored with MyDoFn as the short_str_value and
303 # com.mypackage.MyDoFn as the java_class_name value.
304 # short_str_value can be displayed and java_class_name_value
305 # will be displayed as a tooltip.
306 "durationValue": "A String", # Contains value if the data is of duration type.
307 "url": "A String", # An optional full URL.
308 "floatValue": 3.14, # Contains value if the data is of float type.
309 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
310 # language namespace (i.e. python module) which defines the display data.
311 # This allows a dax monitoring system to specially handle the data
312 # and perform custom rendering.
313 "javaClassValue": "A String", # Contains value if the data is of java class type.
314 "label": "A String", # An optional label to display in a dax UI for the element.
315 "boolValue": True or False, # Contains value if the data is of a boolean type.
316 "strValue": "A String", # Contains value if the data is of string type.
317 "key": "A String", # The key identifying the display data.
318 # This is intended to be used as a label for the display data
319 # when viewed in a dax monitoring system.
320 "int64Value": "A String", # Contains value if the data is of int64 type.
321 "timestampValue": "A String", # Contains value if the data is of timestamp type.
322 },
323 ],
324 },
325 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
326 # callers cannot mutate it.
327 { # A message describing the state of a particular execution stage.
328 "executionStageName": "A String", # The name of the execution stage.
329 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
330 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
331 },
332 ],
333 "id": "A String", # The unique ID of this job.
334 #
335 # This field is set by the Cloud Dataflow service when the Job is
336 # created, and is immutable for the life of the job.
337 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
338 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
339 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
340 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
341 # corresponding name prefixes of the new job.
342 "a_key": "A String",
343 },
344 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
345 "version": { # A structure describing which components and their versions of the service
346 # are required in order to run the job.
347 "a_key": "", # Properties of the object.
348 },
349 "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
350 "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
351 # at rest, AKA a Customer Managed Encryption Key (CMEK).
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400352 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700353 # Format:
354 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
355 "internalExperiments": { # Experimental settings.
356 "a_key": "", # Properties of the object. Contains field @type with type URL.
357 },
358 "dataset": "A String", # The dataset for the current project where various workflow
359 # related tables are stored.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400360 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700361 # The supported resource type is:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400362 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700363 # Google BigQuery:
364 # bigquery.googleapis.com/{dataset}
365 "experiments": [ # The list of experiments to enable.
366 "A String",
367 ],
368 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
369 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
370 # options are passed through the service and are used to recreate the
371 # SDK pipeline options on the worker in a language agnostic and platform
372 # independent way.
373 "a_key": "", # Properties of the object.
374 },
375 "userAgent": { # A description of the process that generated the request.
376 "a_key": "", # Properties of the object.
377 },
378 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
379 # unspecified, the service will attempt to choose a reasonable
380 # default. This should be in the form of the API service name,
381 # e.g. "compute.googleapis.com".
382 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
383 # specified in order for the job to have workers.
384 { # Describes one particular pool of Cloud Dataflow workers to be
385 # instantiated by the Cloud Dataflow service in order to perform the
386 # computations required by a job. Note that a workflow job may use
387 # multiple pools, in order to match the various computational
388 # requirements of the various stages of the job.
389 "diskSourceImage": "A String", # Fully qualified source image for disks.
390 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
391 # using the standard Dataflow task runner. Users should ignore
392 # this field.
393 "workflowFileName": "A String", # The file to store the workflow in.
394 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
395 # will not be uploaded.
396 #
397 # The supported resource type is:
398 #
399 # Google Cloud Storage:
400 # storage.googleapis.com/{bucket}/{object}
401 # bucket.storage.googleapis.com/{object}
402 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
403 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
404 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
405 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
406 # "shuffle/v1beta1".
407 "workerId": "A String", # The ID of the worker running this pipeline.
408 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
409 #
410 # When workers access Google Cloud APIs, they logically do so via
411 # relative URLs. If this field is specified, it supplies the base
412 # URL to use for resolving these relative URLs. The normative
413 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
414 # Locators".
415 #
416 # If not specified, the default value is "http://www.googleapis.com/"
417 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
418 # "dataflow/v1b3/projects".
419 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
420 # storage.
421 #
422 # The supported resource type is:
423 #
424 # Google Cloud Storage:
425 #
426 # storage.googleapis.com/{bucket}/{object}
427 # bucket.storage.googleapis.com/{object}
428 },
429 "vmId": "A String", # The ID string of the VM.
430 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
431 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
432 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
433 # access the Cloud Dataflow API.
434 "A String",
435 ],
436 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
437 # taskrunner; e.g. "root".
438 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
439 #
440 # When workers access Google Cloud APIs, they logically do so via
441 # relative URLs. If this field is specified, it supplies the base
442 # URL to use for resolving these relative URLs. The normative
443 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
444 # Locators".
445 #
446 # If not specified, the default value is "http://www.googleapis.com/"
447 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
448 # taskrunner; e.g. "wheel".
449 "languageHint": "A String", # The suggested backend language.
450 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
451 # console.
452 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
453 "logDir": "A String", # The directory on the VM to store logs.
454 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
455 "harnessCommand": "A String", # The command to launch the worker harness.
456 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
457 # temporary storage.
458 #
459 # The supported resource type is:
460 #
461 # Google Cloud Storage:
462 # storage.googleapis.com/{bucket}/{object}
463 # bucket.storage.googleapis.com/{object}
464 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
465 },
466 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
467 # are supported.
468 "packages": [ # Packages to be installed on workers.
469 { # The packages that must be installed in order for a worker to run the
470 # steps of the Cloud Dataflow job that will be assigned to its worker
471 # pool.
472 #
473 # This is the mechanism by which the Cloud Dataflow SDK causes code to
474 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
475 # might use this to install jars containing the user's code and all of the
476 # various dependencies (libraries, data files, etc.) required in order
477 # for that code to run.
478 "location": "A String", # The resource to read the package from. The supported resource type is:
479 #
480 # Google Cloud Storage:
481 #
482 # storage.googleapis.com/{bucket}
483 # bucket.storage.googleapis.com/
484 "name": "A String", # The name of the package.
485 },
486 ],
487 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
488 # service will attempt to choose a reasonable default.
489 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
490 # the service will use the network "default".
491 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
492 # will attempt to choose a reasonable default.
493 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
494 # attempt to choose a reasonable default.
495 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
496 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
497 # `TEARDOWN_NEVER`.
498 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
499 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
500 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
501 # down.
502 #
503 # If the workers are not torn down by the service, they will
504 # continue to run and use Google Compute Engine VM resources in the
505 # user's project until they are explicitly terminated by the user.
506 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
507 # policy except for small, manually supervised test jobs.
508 #
509 # If unknown or unspecified, the service will attempt to choose a reasonable
510 # default.
511 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
512 # Compute Engine API.
513 "ipConfiguration": "A String", # Configuration for VM IPs.
514 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
515 # service will choose a number of threads (according to the number of cores
516 # on the selected machine type for batch, or 1 by convention for streaming).
517 "poolArgs": { # Extra arguments for this worker pool.
518 "a_key": "", # Properties of the object. Contains field @type with type URL.
519 },
520 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
521 # execute the job. If zero or unspecified, the service will
522 # attempt to choose a reasonable default.
523 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
524 # harness, residing in Google Container Registry.
525 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
526 # the form "regions/REGION/subnetworks/SUBNETWORK".
527 "dataDisks": [ # Data disks that are used by a VM in this workflow.
528 { # Describes the data disk used by a workflow job.
529 "mountPoint": "A String", # Directory in a VM where disk is mounted.
530 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
531 # attempt to choose a reasonable default.
532 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
533 # must be a disk type appropriate to the project and zone in which
534 # the workers will run. If unknown or unspecified, the service
535 # will attempt to choose a reasonable default.
536 #
537 # For example, the standard persistent disk type is a resource name
538 # typically ending in "pd-standard". If SSD persistent disks are
539 # available, the resource name typically ends with "pd-ssd". The
540 # actual valid values are defined the Google Compute Engine API,
541 # not by the Cloud Dataflow API; consult the Google Compute Engine
542 # documentation for more information about determining the set of
543 # available disk types for a particular project and zone.
544 #
545 # Google Compute Engine Disk types are local to a particular
546 # project in a particular zone, and so the resource name will
547 # typically look something like this:
548 #
549 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
550 },
551 ],
552 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
553 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
554 "algorithm": "A String", # The algorithm to use for autoscaling.
555 },
556 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
557 # select a default set of packages which are useful to worker
558 # harnesses written in a particular language.
559 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
560 # attempt to choose a reasonable default.
561 "metadata": { # Metadata to set on the Google Compute Engine VMs.
562 "a_key": "A String",
563 },
564 },
565 ],
566 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
567 # storage. The system will append the suffix "/temp-{JOBNAME} to
568 # this resource prefix, where {JOBNAME} is the value of the
569 # job_name field. The resulting bucket and object prefix is used
570 # as the prefix of the resources used to store temporary data
571 # needed during the job execution. NOTE: This will override the
572 # value in taskrunner_settings.
573 # The supported resource type is:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400574 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700575 # Google Cloud Storage:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400576 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700577 # storage.googleapis.com/{bucket}/{object}
578 # bucket.storage.googleapis.com/{object}
579 },
580 "location": "A String", # The [regional endpoint]
581 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
582 # contains this job.
583 "tempFiles": [ # A set of files the system should be aware of that are used
584 # for temporary storage. These temporary files will be
585 # removed on job completion.
586 # No duplicates are allowed.
587 # No file patterns are supported.
588 #
589 # The supported files are:
590 #
591 # Google Cloud Storage:
592 #
593 # storage.googleapis.com/{bucket}/{object}
594 # bucket.storage.googleapis.com/{object}
595 "A String",
596 ],
597 "type": "A String", # The type of Cloud Dataflow job.
598 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
599 # If this field is set, the service will ensure its uniqueness.
600 # The request to create a job will fail if the service has knowledge of a
601 # previously submitted job with the same client's ID and job name.
602 # The caller may use this field to ensure idempotence of job
603 # creation across retried attempts to create a job.
604 # By default, the field is empty and, in that case, the service ignores it.
605 "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
606 # snapshot.
607 "stepsLocation": "A String", # The GCS location where the steps are stored.
608 "currentStateTime": "A String", # The timestamp associated with the current state.
609 "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
610 # Flexible resource scheduling jobs are started with some delay after job
611 # creation, so start_time is unset before start and is updated when the
612 # job is started by the Cloud Dataflow service. For other jobs, start_time
613 # always equals to create_time and is immutable and set by the Cloud Dataflow
614 # service.
615 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
616 # Cloud Dataflow service.
617 "requestedState": "A String", # The job's requested state.
618 #
619 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
620 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
621 # also be used to directly set a job's requested state to
622 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
623 # job if it has not already reached a terminal state.
624 "name": "A String", # The user-specified Cloud Dataflow job name.
625 #
626 # Only one Job with a given name may exist in a project at any
627 # given time. If a caller attempts to create a Job with the same
628 # name as an already-existing Job, the attempt returns the
629 # existing Job.
630 #
631 # The name must match the regular expression
632 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
633 "steps": [ # Exactly one of step or steps_location should be specified.
634 #
635 # The top-level steps that constitute the entire job.
636 { # Defines a particular step within a Cloud Dataflow job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400637 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700638 # A job consists of multiple steps, each of which performs some
639 # specific operation as part of the overall job. Data is typically
640 # passed from one step to another as part of the job.
641 #
642 # Here's an example of a sequence of steps which together implement a
643 # Map-Reduce job:
644 #
645 # * Read a collection of data from some source, parsing the
646 # collection's elements.
647 #
648 # * Validate the elements.
649 #
650 # * Apply a user-defined function to map each element to some value
651 # and extract an element-specific key value.
652 #
653 # * Group elements with the same key into a single element with
654 # that key, transforming a multiply-keyed collection into a
655 # uniquely-keyed collection.
656 #
657 # * Write the elements out to some data sink.
658 #
659 # Note that the Cloud Dataflow service may be used to run many different
660 # types of jobs, not just Map-Reduce.
661 "kind": "A String", # The kind of step in the Cloud Dataflow job.
662 "properties": { # Named properties associated with the step. Each kind of
663 # predefined step has its own required set of properties.
664 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
665 "a_key": "", # Properties of the object.
666 },
667 "name": "A String", # The name that identifies the step. This must be unique for each
668 # step with respect to all other steps in the Cloud Dataflow job.
669 },
670 ],
671 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
672 # of the job it replaced.
673 #
674 # When sending a `CreateJobRequest`, you can update a job by specifying it
675 # here. The job named here is stopped, and its intermediate state is
676 # transferred to this job.
677 "currentState": "A String", # The current state of the job.
678 #
679 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
680 # specified.
681 #
682 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
683 # terminal state. After a job has reached a terminal state, no
684 # further state updates may be made.
685 #
686 # This field may be mutated by the Cloud Dataflow service;
687 # callers cannot mutate it.
688 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
689 # isn't contained in the submitted job.
690 "stages": { # A mapping from each stage to the information about that stage.
691 "a_key": { # Contains information about how a particular
692 # google.dataflow.v1beta3.Step will be executed.
693 "stepName": [ # The steps associated with the execution stage.
694 # Note that stages may have several steps, and that a given step
695 # might be run by more than one stage.
696 "A String",
697 ],
698 },
699 },
700 },
701 }</pre>
702</div>
703
704<div class="method">
705 <code class="details" id="get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</code>
706 <pre>Get the template associated with a template.
707
708Args:
709 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
710 location: string, The [regional endpoint]
711(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
712which to direct the request. (required)
713 gcsPath: string, Required. A Cloud Storage path to the template from which to
714create the job.
715Must be valid Cloud Storage URL, beginning with 'gs://'.
716 x__xgafv: string, V1 error format.
717 Allowed values
718 1 - v1 error format
719 2 - v2 error format
720 view: string, The view to retrieve. Defaults to METADATA_ONLY.
721
722Returns:
723 An object of the form:
724
725 { # The response to a GetTemplate request.
726 "status": { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the
727 # request will be indicated in the error_details.
728 # different programming environments, including REST APIs and RPC APIs. It is
729 # used by [gRPC](https://github.com/grpc). The error model is designed to be:
730 #
731 # - Simple to use and understand for most users
732 # - Flexible enough to meet unexpected needs
733 #
734 # # Overview
735 #
736 # The `Status` message contains three pieces of data: error code, error
737 # message, and error details. The error code should be an enum value of
738 # google.rpc.Code, but it may accept additional error codes if needed. The
739 # error message should be a developer-facing English message that helps
740 # developers *understand* and *resolve* the error. If a localized user-facing
741 # error message is needed, put the localized message in the error details or
742 # localize it in the client. The optional error details may contain arbitrary
743 # information about the error. There is a predefined set of error detail types
744 # in the package `google.rpc` that can be used for common error conditions.
745 #
746 # # Language mapping
747 #
748 # The `Status` message is the logical representation of the error model, but it
749 # is not necessarily the actual wire format. When the `Status` message is
750 # exposed in different client libraries and different wire protocols, it can be
751 # mapped differently. For example, it will likely be mapped to some exceptions
752 # in Java, but more likely mapped to some error codes in C.
753 #
754 # # Other uses
755 #
756 # The error model and the `Status` message can be used in a variety of
757 # environments, either with or without APIs, to provide a
758 # consistent developer experience across different environments.
759 #
760 # Example uses of this error model include:
761 #
762 # - Partial errors. If a service needs to return partial errors to the client,
763 # it may embed the `Status` in the normal response to indicate the partial
764 # errors.
765 #
766 # - Workflow errors. A typical workflow has multiple steps. Each step may
767 # have a `Status` message for error reporting.
768 #
769 # - Batch operations. If a client uses batch request and batch response, the
770 # `Status` message should be used directly inside batch response, one for
771 # each error sub-response.
772 #
773 # - Asynchronous operations. If an API call embeds asynchronous operation
774 # results in its response, the status of those operations should be
775 # represented directly using the `Status` message.
776 #
777 # - Logging. If some API errors are stored in logs, the message `Status` could
778 # be used directly after any stripping needed for security/privacy reasons.
779 "message": "A String", # A developer-facing error message, which should be in English. Any
780 # user-facing error message should be localized and sent in the
781 # google.rpc.Status.details field, or localized by the client.
782 "code": 42, # The status code, which should be an enum value of google.rpc.Code.
783 "details": [ # A list of messages that carry the error details. There is a common set of
784 # message types for APIs to use.
785 {
786 "a_key": "", # Properties of the object. Contains field @type with type URL.
787 },
788 ],
789 },
790 "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
791 # parameters, etc.
792 "name": "A String", # Required. The name of the template.
793 "parameters": [ # The parameters for the template.
794 { # Metadata for a specific parameter.
795 "regexes": [ # Optional. Regexes that the parameter must match.
796 "A String",
797 ],
798 "helpText": "A String", # Required. The help text to display for the parameter.
799 "name": "A String", # Required. The name of the parameter.
800 "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
801 "label": "A String", # Required. The label to display for the parameter.
802 },
803 ],
804 "description": "A String", # Optional. A description of the template.
805 },
806 }</pre>
807</div>
808
809<div class="method">
810 <code class="details" id="launch">launch(projectId, location, body, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, gcsPath=None, validateOnly=None)</code>
811 <pre>Launch a template.
812
813Args:
814 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
815 location: string, The [regional endpoint]
816(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
817which to direct the request. (required)
818 body: object, The request body. (required)
819 The object takes the form of:
820
821{ # Parameters to provide to the template being launched.
822 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
823 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
824 # template if not specified.
825 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
826 # the service will use the network "default".
827 "zone": "A String", # The Compute Engine [availability
828 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
829 # for launching worker instances to run your pipeline.
830 "additionalUserLabels": { # Additional user labels to be specified for the job.
831 # Keys and values should follow the restrictions specified in the [labeling
832 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
833 # page.
834 "a_key": "A String",
835 },
836 "additionalExperiments": [ # Additional experiment flags for the job.
837 "A String",
838 ],
839 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
840 # Use with caution.
841 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
842 # Must be a valid Cloud Storage URL, beginning with `gs://`.
843 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
844 "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
845 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
846 # available to your pipeline during execution, from 1 to 1000.
847 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
848 # the form "regions/REGION/subnetworks/SUBNETWORK".
849 },
850 "parameters": { # The runtime parameters to pass to the job.
851 "a_key": "A String",
852 },
853 "jobName": "A String", # Required. The job name to use for the created job.
854 }
855
856 dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.
857The file must be a Json serialized DynamicTemplateFieSpec object.
858 x__xgafv: string, V1 error format.
859 Allowed values
860 1 - v1 error format
861 2 - v2 error format
862 dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.
863Must be a valid Cloud Storage URL, beginning with `gs://`.
864 gcsPath: string, A Cloud Storage path to the template from which to create
865the job.
866Must be valid Cloud Storage URL, beginning with 'gs://'.
867 validateOnly: boolean, If true, the request is validated but not actually executed.
868Defaults to false.
869
870Returns:
871 An object of the form:
872
873 { # Response to the request to launch a template.
874 "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
875 # the job was successfully launched.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400876 "labels": { # User-defined labels for this job.
877 #
878 # The labels map can contain no more than 64 entries. Entries of the labels
879 # map are UTF8 strings that comply with the following restrictions:
880 #
881 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
882 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
883 # * Both keys and values are additionally constrained to be <= 128 bytes in
884 # size.
885 "a_key": "A String",
886 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700887 "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
888 # by the metadata values provided here. Populated for ListJobs and all GetJob
889 # views SUMMARY and higher.
890 # ListJob response and Job SUMMARY view.
891 "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
892 "versionDisplayName": "A String", # A readable string describing the version of the SDK.
893 "version": "A String", # The version of the SDK used to run the job.
894 "sdkSupportStatus": "A String", # The support status for this SDK version.
895 },
896 "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
897 { # Metadata for a PubSub connector used by the job.
898 "topic": "A String", # Topic accessed in the connection.
899 "subscription": "A String", # Subscription used in the connection.
900 },
901 ],
902 "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
903 { # Metadata for a Datastore connector used by the job.
904 "projectId": "A String", # ProjectId accessed in the connection.
905 "namespace": "A String", # Namespace used in the connection.
906 },
907 ],
908 "fileDetails": [ # Identification of a File source used in the Dataflow job.
909 { # Metadata for a File connector used by the job.
910 "filePattern": "A String", # File Pattern used to access files by the connector.
911 },
912 ],
913 "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
914 { # Metadata for a Spanner connector used by the job.
915 "instanceId": "A String", # InstanceId accessed in the connection.
916 "projectId": "A String", # ProjectId accessed in the connection.
917 "databaseId": "A String", # DatabaseId accessed in the connection.
918 },
919 ],
920 "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
921 { # Metadata for a BigTable connector used by the job.
922 "instanceId": "A String", # InstanceId accessed in the connection.
923 "projectId": "A String", # ProjectId accessed in the connection.
924 "tableId": "A String", # TableId accessed in the connection.
925 },
926 ],
927 "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
928 { # Metadata for a BigQuery connector used by the job.
929 "projectId": "A String", # Project accessed in the connection.
930 "dataset": "A String", # Dataset accessed in the connection.
931 "table": "A String", # Table accessed in the connection.
932 "query": "A String", # Query used to access data in the connection.
933 },
934 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400935 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700936 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
937 # A description of the user pipeline and stages through which it is executed.
938 # Created by Cloud Dataflow service. Only retrieved with
939 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
940 # form. This data is provided by the Dataflow service for ease of visualizing
941 # the pipeline and interpreting Dataflow provided metrics.
942 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
943 { # Description of the type, names/ids, and input/outputs for a transform.
944 "kind": "A String", # Type of transform.
945 "name": "A String", # User provided name for this transform instance.
946 "inputCollectionName": [ # User names for all collection inputs to this transform.
947 "A String",
948 ],
949 "displayData": [ # Transform-specific display data.
950 { # Data provided with a pipeline or transform to provide descriptive info.
951 "shortStrValue": "A String", # A possible additional shorter value to display.
952 # For example a java_class_name_value of com.mypackage.MyDoFn
953 # will be stored with MyDoFn as the short_str_value and
954 # com.mypackage.MyDoFn as the java_class_name value.
955 # short_str_value can be displayed and java_class_name_value
956 # will be displayed as a tooltip.
957 "durationValue": "A String", # Contains value if the data is of duration type.
958 "url": "A String", # An optional full URL.
959 "floatValue": 3.14, # Contains value if the data is of float type.
960 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
961 # language namespace (i.e. python module) which defines the display data.
962 # This allows a dax monitoring system to specially handle the data
963 # and perform custom rendering.
964 "javaClassValue": "A String", # Contains value if the data is of java class type.
965 "label": "A String", # An optional label to display in a dax UI for the element.
966 "boolValue": True or False, # Contains value if the data is of a boolean type.
967 "strValue": "A String", # Contains value if the data is of string type.
968 "key": "A String", # The key identifying the display data.
969 # This is intended to be used as a label for the display data
970 # when viewed in a dax monitoring system.
971 "int64Value": "A String", # Contains value if the data is of int64 type.
972 "timestampValue": "A String", # Contains value if the data is of timestamp type.
973 },
974 ],
975 "outputCollectionName": [ # User names for all collection outputs to this transform.
976 "A String",
977 ],
978 "id": "A String", # SDK generated id of this transform instance.
979 },
980 ],
981 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
982 { # Description of the composing transforms, names/ids, and input/outputs of a
983 # stage of execution. Some composing transforms and sources may have been
984 # generated by the Dataflow service during execution planning.
985 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
986 { # Description of an interstitial value between transforms in an execution
987 # stage.
988 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
989 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
990 # source is most closely associated.
991 "name": "A String", # Dataflow service generated name for this source.
992 },
993 ],
994 "kind": "A String", # Type of tranform this stage is executing.
995 "name": "A String", # Dataflow service generated name for this stage.
996 "outputSource": [ # Output sources for this stage.
997 { # Description of an input or output of an execution stage.
998 "userName": "A String", # Human-readable name for this source; may be user or system generated.
999 "sizeBytes": "A String", # Size of the source, if measurable.
1000 "name": "A String", # Dataflow service generated name for this source.
1001 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1002 # source is most closely associated.
1003 },
1004 ],
1005 "inputSource": [ # Input sources for this stage.
1006 { # Description of an input or output of an execution stage.
1007 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1008 "sizeBytes": "A String", # Size of the source, if measurable.
1009 "name": "A String", # Dataflow service generated name for this source.
1010 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1011 # source is most closely associated.
1012 },
1013 ],
1014 "componentTransform": [ # Transforms that comprise this execution stage.
1015 { # Description of a transform executed as part of an execution stage.
1016 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1017 "originalTransform": "A String", # User name for the original user transform with which this transform is
1018 # most closely associated.
1019 "name": "A String", # Dataflow service generated name for this source.
1020 },
1021 ],
1022 "id": "A String", # Dataflow service generated id for this stage.
1023 },
1024 ],
1025 "displayData": [ # Pipeline level display data.
1026 { # Data provided with a pipeline or transform to provide descriptive info.
1027 "shortStrValue": "A String", # A possible additional shorter value to display.
1028 # For example a java_class_name_value of com.mypackage.MyDoFn
1029 # will be stored with MyDoFn as the short_str_value and
1030 # com.mypackage.MyDoFn as the java_class_name value.
1031 # short_str_value can be displayed and java_class_name_value
1032 # will be displayed as a tooltip.
1033 "durationValue": "A String", # Contains value if the data is of duration type.
1034 "url": "A String", # An optional full URL.
1035 "floatValue": 3.14, # Contains value if the data is of float type.
1036 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1037 # language namespace (i.e. python module) which defines the display data.
1038 # This allows a dax monitoring system to specially handle the data
1039 # and perform custom rendering.
1040 "javaClassValue": "A String", # Contains value if the data is of java class type.
1041 "label": "A String", # An optional label to display in a dax UI for the element.
1042 "boolValue": True or False, # Contains value if the data is of a boolean type.
1043 "strValue": "A String", # Contains value if the data is of string type.
1044 "key": "A String", # The key identifying the display data.
1045 # This is intended to be used as a label for the display data
1046 # when viewed in a dax monitoring system.
1047 "int64Value": "A String", # Contains value if the data is of int64 type.
1048 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1049 },
1050 ],
1051 },
1052 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1053 # callers cannot mutate it.
1054 { # A message describing the state of a particular execution stage.
1055 "executionStageName": "A String", # The name of the execution stage.
1056 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1057 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1058 },
1059 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001060 "id": "A String", # The unique ID of this job.
1061 #
1062 # This field is set by the Cloud Dataflow service when the Job is
1063 # created, and is immutable for the life of the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001064 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1065 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1066 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1067 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1068 # corresponding name prefixes of the new job.
1069 "a_key": "A String",
1070 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001071 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1072 "version": { # A structure describing which components and their versions of the service
1073 # are required in order to run the job.
1074 "a_key": "", # Properties of the object.
1075 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001076 "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
1077 "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
1078 # at rest, AKA a Customer Managed Encryption Key (CMEK).
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001079 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001080 # Format:
1081 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001082 "internalExperiments": { # Experimental settings.
1083 "a_key": "", # Properties of the object. Contains field @type with type URL.
1084 },
1085 "dataset": "A String", # The dataset for the current project where various workflow
1086 # related tables are stored.
1087 #
1088 # The supported resource type is:
1089 #
1090 # Google BigQuery:
1091 # bigquery.googleapis.com/{dataset}
1092 "experiments": [ # The list of experiments to enable.
1093 "A String",
1094 ],
1095 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1096 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1097 # options are passed through the service and are used to recreate the
1098 # SDK pipeline options on the worker in a language agnostic and platform
1099 # independent way.
1100 "a_key": "", # Properties of the object.
1101 },
1102 "userAgent": { # A description of the process that generated the request.
1103 "a_key": "", # Properties of the object.
1104 },
1105 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1106 # unspecified, the service will attempt to choose a reasonable
1107 # default. This should be in the form of the API service name,
1108 # e.g. "compute.googleapis.com".
1109 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1110 # specified in order for the job to have workers.
1111 { # Describes one particular pool of Cloud Dataflow workers to be
1112 # instantiated by the Cloud Dataflow service in order to perform the
1113 # computations required by a job. Note that a workflow job may use
1114 # multiple pools, in order to match the various computational
1115 # requirements of the various stages of the job.
1116 "diskSourceImage": "A String", # Fully qualified source image for disks.
1117 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1118 # using the standard Dataflow task runner. Users should ignore
1119 # this field.
1120 "workflowFileName": "A String", # The file to store the workflow in.
1121 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1122 # will not be uploaded.
1123 #
1124 # The supported resource type is:
1125 #
1126 # Google Cloud Storage:
1127 # storage.googleapis.com/{bucket}/{object}
1128 # bucket.storage.googleapis.com/{object}
1129 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1130 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1131 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1132 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1133 # "shuffle/v1beta1".
1134 "workerId": "A String", # The ID of the worker running this pipeline.
1135 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1136 #
1137 # When workers access Google Cloud APIs, they logically do so via
1138 # relative URLs. If this field is specified, it supplies the base
1139 # URL to use for resolving these relative URLs. The normative
1140 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1141 # Locators".
1142 #
1143 # If not specified, the default value is "http://www.googleapis.com/"
1144 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1145 # "dataflow/v1b3/projects".
1146 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1147 # storage.
1148 #
1149 # The supported resource type is:
1150 #
1151 # Google Cloud Storage:
1152 #
1153 # storage.googleapis.com/{bucket}/{object}
1154 # bucket.storage.googleapis.com/{object}
1155 },
1156 "vmId": "A String", # The ID string of the VM.
1157 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1158 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1159 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1160 # access the Cloud Dataflow API.
1161 "A String",
1162 ],
1163 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1164 # taskrunner; e.g. "root".
1165 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1166 #
1167 # When workers access Google Cloud APIs, they logically do so via
1168 # relative URLs. If this field is specified, it supplies the base
1169 # URL to use for resolving these relative URLs. The normative
1170 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1171 # Locators".
1172 #
1173 # If not specified, the default value is "http://www.googleapis.com/"
1174 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1175 # taskrunner; e.g. "wheel".
1176 "languageHint": "A String", # The suggested backend language.
1177 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1178 # console.
1179 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1180 "logDir": "A String", # The directory on the VM to store logs.
1181 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1182 "harnessCommand": "A String", # The command to launch the worker harness.
1183 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1184 # temporary storage.
1185 #
1186 # The supported resource type is:
1187 #
1188 # Google Cloud Storage:
1189 # storage.googleapis.com/{bucket}/{object}
1190 # bucket.storage.googleapis.com/{object}
1191 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1192 },
1193 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1194 # are supported.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001195 "packages": [ # Packages to be installed on workers.
1196 { # The packages that must be installed in order for a worker to run the
1197 # steps of the Cloud Dataflow job that will be assigned to its worker
1198 # pool.
1199 #
1200 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1201 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1202 # might use this to install jars containing the user's code and all of the
1203 # various dependencies (libraries, data files, etc.) required in order
1204 # for that code to run.
1205 "location": "A String", # The resource to read the package from. The supported resource type is:
1206 #
1207 # Google Cloud Storage:
1208 #
1209 # storage.googleapis.com/{bucket}
1210 # bucket.storage.googleapis.com/
1211 "name": "A String", # The name of the package.
1212 },
1213 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001214 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1215 # service will attempt to choose a reasonable default.
1216 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1217 # the service will use the network "default".
1218 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1219 # will attempt to choose a reasonable default.
1220 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1221 # attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001222 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1223 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1224 # `TEARDOWN_NEVER`.
1225 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1226 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1227 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1228 # down.
1229 #
1230 # If the workers are not torn down by the service, they will
1231 # continue to run and use Google Compute Engine VM resources in the
1232 # user's project until they are explicitly terminated by the user.
1233 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1234 # policy except for small, manually supervised test jobs.
1235 #
1236 # If unknown or unspecified, the service will attempt to choose a reasonable
1237 # default.
1238 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1239 # Compute Engine API.
1240 "ipConfiguration": "A String", # Configuration for VM IPs.
1241 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1242 # service will choose a number of threads (according to the number of cores
1243 # on the selected machine type for batch, or 1 by convention for streaming).
1244 "poolArgs": { # Extra arguments for this worker pool.
1245 "a_key": "", # Properties of the object. Contains field @type with type URL.
1246 },
1247 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1248 # execute the job. If zero or unspecified, the service will
1249 # attempt to choose a reasonable default.
1250 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1251 # harness, residing in Google Container Registry.
1252 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1253 # the form "regions/REGION/subnetworks/SUBNETWORK".
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001254 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1255 { # Describes the data disk used by a workflow job.
1256 "mountPoint": "A String", # Directory in a VM where disk is mounted.
1257 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1258 # attempt to choose a reasonable default.
1259 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1260 # must be a disk type appropriate to the project and zone in which
1261 # the workers will run. If unknown or unspecified, the service
1262 # will attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001263 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001264 # For example, the standard persistent disk type is a resource name
1265 # typically ending in "pd-standard". If SSD persistent disks are
1266 # available, the resource name typically ends with "pd-ssd". The
1267 # actual valid values are defined the Google Compute Engine API,
1268 # not by the Cloud Dataflow API; consult the Google Compute Engine
1269 # documentation for more information about determining the set of
1270 # available disk types for a particular project and zone.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001271 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001272 # Google Compute Engine Disk types are local to a particular
1273 # project in a particular zone, and so the resource name will
1274 # typically look something like this:
1275 #
1276 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001277 },
1278 ],
1279 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1280 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1281 "algorithm": "A String", # The algorithm to use for autoscaling.
1282 },
1283 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1284 # select a default set of packages which are useful to worker
1285 # harnesses written in a particular language.
1286 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1287 # attempt to choose a reasonable default.
1288 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1289 "a_key": "A String",
1290 },
1291 },
1292 ],
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001293 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1294 # storage. The system will append the suffix "/temp-{JOBNAME} to
1295 # this resource prefix, where {JOBNAME} is the value of the
1296 # job_name field. The resulting bucket and object prefix is used
1297 # as the prefix of the resources used to store temporary data
1298 # needed during the job execution. NOTE: This will override the
1299 # value in taskrunner_settings.
1300 # The supported resource type is:
1301 #
1302 # Google Cloud Storage:
1303 #
1304 # storage.googleapis.com/{bucket}/{object}
1305 # bucket.storage.googleapis.com/{object}
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001306 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001307 "location": "A String", # The [regional endpoint]
1308 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1309 # contains this job.
1310 "tempFiles": [ # A set of files the system should be aware of that are used
1311 # for temporary storage. These temporary files will be
1312 # removed on job completion.
1313 # No duplicates are allowed.
1314 # No file patterns are supported.
1315 #
1316 # The supported files are:
1317 #
1318 # Google Cloud Storage:
1319 #
1320 # storage.googleapis.com/{bucket}/{object}
1321 # bucket.storage.googleapis.com/{object}
1322 "A String",
1323 ],
1324 "type": "A String", # The type of Cloud Dataflow job.
1325 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1326 # If this field is set, the service will ensure its uniqueness.
1327 # The request to create a job will fail if the service has knowledge of a
1328 # previously submitted job with the same client's ID and job name.
1329 # The caller may use this field to ensure idempotence of job
1330 # creation across retried attempts to create a job.
1331 # By default, the field is empty and, in that case, the service ignores it.
1332 "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1333 # snapshot.
1334 "stepsLocation": "A String", # The GCS location where the steps are stored.
1335 "currentStateTime": "A String", # The timestamp associated with the current state.
1336 "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1337 # Flexible resource scheduling jobs are started with some delay after job
1338 # creation, so start_time is unset before start and is updated when the
1339 # job is started by the Cloud Dataflow service. For other jobs, start_time
1340 # always equals to create_time and is immutable and set by the Cloud Dataflow
1341 # service.
1342 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1343 # Cloud Dataflow service.
1344 "requestedState": "A String", # The job's requested state.
1345 #
1346 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1347 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1348 # also be used to directly set a job's requested state to
1349 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1350 # job if it has not already reached a terminal state.
1351 "name": "A String", # The user-specified Cloud Dataflow job name.
1352 #
1353 # Only one Job with a given name may exist in a project at any
1354 # given time. If a caller attempts to create a Job with the same
1355 # name as an already-existing Job, the attempt returns the
1356 # existing Job.
1357 #
1358 # The name must match the regular expression
1359 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1360 "steps": [ # Exactly one of step or steps_location should be specified.
1361 #
1362 # The top-level steps that constitute the entire job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001363 { # Defines a particular step within a Cloud Dataflow job.
1364 #
1365 # A job consists of multiple steps, each of which performs some
1366 # specific operation as part of the overall job. Data is typically
1367 # passed from one step to another as part of the job.
1368 #
1369 # Here's an example of a sequence of steps which together implement a
1370 # Map-Reduce job:
1371 #
1372 # * Read a collection of data from some source, parsing the
1373 # collection's elements.
1374 #
1375 # * Validate the elements.
1376 #
1377 # * Apply a user-defined function to map each element to some value
1378 # and extract an element-specific key value.
1379 #
1380 # * Group elements with the same key into a single element with
1381 # that key, transforming a multiply-keyed collection into a
1382 # uniquely-keyed collection.
1383 #
1384 # * Write the elements out to some data sink.
1385 #
1386 # Note that the Cloud Dataflow service may be used to run many different
1387 # types of jobs, not just Map-Reduce.
1388 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1389 "properties": { # Named properties associated with the step. Each kind of
1390 # predefined step has its own required set of properties.
1391 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
1392 "a_key": "", # Properties of the object.
1393 },
1394 "name": "A String", # The name that identifies the step. This must be unique for each
1395 # step with respect to all other steps in the Cloud Dataflow job.
1396 },
1397 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001398 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1399 # of the job it replaced.
1400 #
1401 # When sending a `CreateJobRequest`, you can update a job by specifying it
1402 # here. The job named here is stopped, and its intermediate state is
1403 # transferred to this job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001404 "currentState": "A String", # The current state of the job.
1405 #
1406 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1407 # specified.
1408 #
1409 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1410 # terminal state. After a job has reached a terminal state, no
1411 # further state updates may be made.
1412 #
1413 # This field may be mutated by the Cloud Dataflow service;
1414 # callers cannot mutate it.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001415 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1416 # isn't contained in the submitted job.
1417 "stages": { # A mapping from each stage to the information about that stage.
1418 "a_key": { # Contains information about how a particular
1419 # google.dataflow.v1beta3.Step will be executed.
1420 "stepName": [ # The steps associated with the execution stage.
1421 # Note that stages may have several steps, and that a given step
1422 # might be run by more than one stage.
1423 "A String",
1424 ],
1425 },
1426 },
1427 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001428 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001429 }</pre>
1430</div>
1431
1432</body></html>