blob: 13ed0fd3abbf3851eff2f5830c6fe44815a58be8 [file] [log] [blame]
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070075<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.templates.html">templates</a></h1>
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040076<h2>Instance Methods</h2>
77<p class="toc_element">
Dan O'Mearadd494642020-05-01 07:42:23 -070078 <code><a href="#create">create(projectId, location, body=None, x__xgafv=None)</a></code></p>
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040079<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
81 <code><a href="#get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</a></code></p>
82<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
Dan O'Mearadd494642020-05-01 07:42:23 -070084 <code><a href="#launch">launch(projectId, location, body=None, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, gcsPath=None, validateOnly=None)</a></code></p>
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040085<p class="firstline">Launch a template.</p>
86<h3>Method Details</h3>
87<div class="method">
Dan O'Mearadd494642020-05-01 07:42:23 -070088 <code class="details" id="create">create(projectId, location, body=None, x__xgafv=None)</code>
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040089 <pre>Creates a Cloud Dataflow job from a template.
90
91Args:
92 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070093 location: string, The [regional endpoint]
94(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
95which to direct the request. (required)
Dan O'Mearadd494642020-05-01 07:42:23 -070096 body: object, The request body.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040097 The object takes the form of:
98
99{ # A request to create a Cloud Dataflow job from a template.
100 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700101 "workerRegion": "A String", # The Compute Engine region
102 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
103 # which worker processing should occur, e.g. "us-west1". Mutually exclusive
104 # with worker_zone. If neither worker_region nor worker_zone is specified,
105 # default to the control plane's region.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400106 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
107 # template if not specified.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700108 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
109 # the service will use the network "default".
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400110 "zone": "A String", # The Compute Engine [availability
111 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
112 # for launching worker instances to run your pipeline.
Dan O'Mearadd494642020-05-01 07:42:23 -0700113 # In the future, worker_zone will take precedence.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700114 "additionalUserLabels": { # Additional user labels to be specified for the job.
115 # Keys and values should follow the restrictions specified in the [labeling
116 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
117 # page.
118 "a_key": "A String",
119 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700120 "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700121 "additionalExperiments": [ # Additional experiment flags for the job.
122 "A String",
123 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700124 "ipConfiguration": "A String", # Configuration for VM IPs.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400125 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
126 # Use with caution.
127 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
128 # Must be a valid Cloud Storage URL, beginning with `gs://`.
129 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
Dan O'Mearadd494642020-05-01 07:42:23 -0700130 "kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.
131 # Key format is:
132 # projects/&lt;project&gt;/locations/&lt;location&gt;/keyRings/&lt;keyring&gt;/cryptoKeys/&lt;key&gt;
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400133 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
134 # available to your pipeline during execution, from 1 to 1000.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700135 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
136 # the form "regions/REGION/subnetworks/SUBNETWORK".
Dan O'Mearadd494642020-05-01 07:42:23 -0700137 "workerZone": "A String", # The Compute Engine zone
138 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
139 # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
140 # with worker_region. If neither worker_region nor worker_zone is specified,
141 # a zone in the control plane's region is chosen based on available capacity.
142 # If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400143 },
144 "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
145 # create the job.
146 # Must be a valid Cloud Storage URL, beginning with `gs://`.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700147 "location": "A String", # The [regional endpoint]
148 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
149 # which to direct the request.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400150 "parameters": { # The runtime parameters to pass to the job.
151 "a_key": "A String",
152 },
153 "jobName": "A String", # Required. The job name to use for the created job.
154 }
155
156 x__xgafv: string, V1 error format.
157 Allowed values
158 1 - v1 error format
159 2 - v2 error format
160
161Returns:
162 An object of the form:
163
164 { # Defines a job to be run by the Cloud Dataflow service.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700165 "labels": { # User-defined labels for this job.
166 #
167 # The labels map can contain no more than 64 entries. Entries of the labels
168 # map are UTF8 strings that comply with the following restrictions:
169 #
170 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
171 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
Dan O'Mearadd494642020-05-01 07:42:23 -0700172 # * Both keys and values are additionally constrained to be &lt;= 128 bytes in
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700173 # size.
174 "a_key": "A String",
175 },
176 "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
177 # by the metadata values provided here. Populated for ListJobs and all GetJob
178 # views SUMMARY and higher.
179 # ListJob response and Job SUMMARY view.
180 "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
181 "versionDisplayName": "A String", # A readable string describing the version of the SDK.
182 "version": "A String", # The version of the SDK used to run the job.
183 "sdkSupportStatus": "A String", # The support status for this SDK version.
184 },
185 "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
186 { # Metadata for a PubSub connector used by the job.
187 "topic": "A String", # Topic accessed in the connection.
188 "subscription": "A String", # Subscription used in the connection.
189 },
190 ],
191 "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
192 { # Metadata for a Datastore connector used by the job.
193 "projectId": "A String", # ProjectId accessed in the connection.
194 "namespace": "A String", # Namespace used in the connection.
195 },
196 ],
197 "fileDetails": [ # Identification of a File source used in the Dataflow job.
198 { # Metadata for a File connector used by the job.
199 "filePattern": "A String", # File Pattern used to access files by the connector.
200 },
201 ],
202 "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
203 { # Metadata for a Spanner connector used by the job.
204 "instanceId": "A String", # InstanceId accessed in the connection.
205 "projectId": "A String", # ProjectId accessed in the connection.
206 "databaseId": "A String", # DatabaseId accessed in the connection.
207 },
208 ],
209 "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
210 { # Metadata for a BigTable connector used by the job.
211 "instanceId": "A String", # InstanceId accessed in the connection.
212 "projectId": "A String", # ProjectId accessed in the connection.
213 "tableId": "A String", # TableId accessed in the connection.
214 },
215 ],
216 "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
217 { # Metadata for a BigQuery connector used by the job.
218 "projectId": "A String", # Project accessed in the connection.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700219 "query": "A String", # Query used to access data in the connection.
Dan O'Mearadd494642020-05-01 07:42:23 -0700220 "table": "A String", # Table accessed in the connection.
221 "dataset": "A String", # Dataset accessed in the connection.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700222 },
223 ],
224 },
225 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
226 # A description of the user pipeline and stages through which it is executed.
227 # Created by Cloud Dataflow service. Only retrieved with
228 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
229 # form. This data is provided by the Dataflow service for ease of visualizing
230 # the pipeline and interpreting Dataflow provided metrics.
231 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
232 { # Description of the type, names/ids, and input/outputs for a transform.
233 "kind": "A String", # Type of transform.
234 "name": "A String", # User provided name for this transform instance.
235 "inputCollectionName": [ # User names for all collection inputs to this transform.
236 "A String",
237 ],
238 "displayData": [ # Transform-specific display data.
239 { # Data provided with a pipeline or transform to provide descriptive info.
Dan O'Mearadd494642020-05-01 07:42:23 -0700240 "key": "A String", # The key identifying the display data.
241 # This is intended to be used as a label for the display data
242 # when viewed in a dax monitoring system.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700243 "shortStrValue": "A String", # A possible additional shorter value to display.
244 # For example a java_class_name_value of com.mypackage.MyDoFn
245 # will be stored with MyDoFn as the short_str_value and
246 # com.mypackage.MyDoFn as the java_class_name value.
247 # short_str_value can be displayed and java_class_name_value
248 # will be displayed as a tooltip.
Dan O'Mearadd494642020-05-01 07:42:23 -0700249 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700250 "url": "A String", # An optional full URL.
251 "floatValue": 3.14, # Contains value if the data is of float type.
252 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
253 # language namespace (i.e. python module) which defines the display data.
254 # This allows a dax monitoring system to specially handle the data
255 # and perform custom rendering.
256 "javaClassValue": "A String", # Contains value if the data is of java class type.
257 "label": "A String", # An optional label to display in a dax UI for the element.
258 "boolValue": True or False, # Contains value if the data is of a boolean type.
259 "strValue": "A String", # Contains value if the data is of string type.
Dan O'Mearadd494642020-05-01 07:42:23 -0700260 "durationValue": "A String", # Contains value if the data is of duration type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700261 "int64Value": "A String", # Contains value if the data is of int64 type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700262 },
263 ],
264 "outputCollectionName": [ # User names for all collection outputs to this transform.
265 "A String",
266 ],
267 "id": "A String", # SDK generated id of this transform instance.
268 },
269 ],
270 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
271 { # Description of the composing transforms, names/ids, and input/outputs of a
272 # stage of execution. Some composing transforms and sources may have been
273 # generated by the Dataflow service during execution planning.
274 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
275 { # Description of an interstitial value between transforms in an execution
276 # stage.
277 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
278 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
279 # source is most closely associated.
280 "name": "A String", # Dataflow service generated name for this source.
281 },
282 ],
283 "kind": "A String", # Type of tranform this stage is executing.
284 "name": "A String", # Dataflow service generated name for this stage.
285 "outputSource": [ # Output sources for this stage.
286 { # Description of an input or output of an execution stage.
287 "userName": "A String", # Human-readable name for this source; may be user or system generated.
288 "sizeBytes": "A String", # Size of the source, if measurable.
289 "name": "A String", # Dataflow service generated name for this source.
290 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
291 # source is most closely associated.
292 },
293 ],
294 "inputSource": [ # Input sources for this stage.
295 { # Description of an input or output of an execution stage.
296 "userName": "A String", # Human-readable name for this source; may be user or system generated.
297 "sizeBytes": "A String", # Size of the source, if measurable.
298 "name": "A String", # Dataflow service generated name for this source.
299 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
300 # source is most closely associated.
301 },
302 ],
303 "componentTransform": [ # Transforms that comprise this execution stage.
304 { # Description of a transform executed as part of an execution stage.
305 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
306 "originalTransform": "A String", # User name for the original user transform with which this transform is
307 # most closely associated.
308 "name": "A String", # Dataflow service generated name for this source.
309 },
310 ],
311 "id": "A String", # Dataflow service generated id for this stage.
312 },
313 ],
314 "displayData": [ # Pipeline level display data.
315 { # Data provided with a pipeline or transform to provide descriptive info.
Dan O'Mearadd494642020-05-01 07:42:23 -0700316 "key": "A String", # The key identifying the display data.
317 # This is intended to be used as a label for the display data
318 # when viewed in a dax monitoring system.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700319 "shortStrValue": "A String", # A possible additional shorter value to display.
320 # For example a java_class_name_value of com.mypackage.MyDoFn
321 # will be stored with MyDoFn as the short_str_value and
322 # com.mypackage.MyDoFn as the java_class_name value.
323 # short_str_value can be displayed and java_class_name_value
324 # will be displayed as a tooltip.
Dan O'Mearadd494642020-05-01 07:42:23 -0700325 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700326 "url": "A String", # An optional full URL.
327 "floatValue": 3.14, # Contains value if the data is of float type.
328 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
329 # language namespace (i.e. python module) which defines the display data.
330 # This allows a dax monitoring system to specially handle the data
331 # and perform custom rendering.
332 "javaClassValue": "A String", # Contains value if the data is of java class type.
333 "label": "A String", # An optional label to display in a dax UI for the element.
334 "boolValue": True or False, # Contains value if the data is of a boolean type.
335 "strValue": "A String", # Contains value if the data is of string type.
Dan O'Mearadd494642020-05-01 07:42:23 -0700336 "durationValue": "A String", # Contains value if the data is of duration type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700337 "int64Value": "A String", # Contains value if the data is of int64 type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700338 },
339 ],
340 },
341 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
342 # callers cannot mutate it.
343 { # A message describing the state of a particular execution stage.
344 "executionStageName": "A String", # The name of the execution stage.
345 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
346 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
347 },
348 ],
349 "id": "A String", # The unique ID of this job.
350 #
351 # This field is set by the Cloud Dataflow service when the Job is
352 # created, and is immutable for the life of the job.
353 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
354 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
355 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
356 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
357 # corresponding name prefixes of the new job.
358 "a_key": "A String",
359 },
360 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700361 "workerRegion": "A String", # The Compute Engine region
362 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
363 # which worker processing should occur, e.g. "us-west1". Mutually exclusive
364 # with worker_zone. If neither worker_region nor worker_zone is specified,
365 # default to the control plane's region.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700366 "version": { # A structure describing which components and their versions of the service
367 # are required in order to run the job.
368 "a_key": "", # Properties of the object.
369 },
370 "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
371 "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
372 # at rest, AKA a Customer Managed Encryption Key (CMEK).
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400373 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700374 # Format:
375 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
376 "internalExperiments": { # Experimental settings.
377 "a_key": "", # Properties of the object. Contains field @type with type URL.
378 },
379 "dataset": "A String", # The dataset for the current project where various workflow
380 # related tables are stored.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400381 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700382 # The supported resource type is:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400383 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700384 # Google BigQuery:
385 # bigquery.googleapis.com/{dataset}
386 "experiments": [ # The list of experiments to enable.
387 "A String",
388 ],
389 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
390 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
391 # options are passed through the service and are used to recreate the
392 # SDK pipeline options on the worker in a language agnostic and platform
393 # independent way.
394 "a_key": "", # Properties of the object.
395 },
396 "userAgent": { # A description of the process that generated the request.
397 "a_key": "", # Properties of the object.
398 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700399 "workerZone": "A String", # The Compute Engine zone
400 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
401 # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
402 # with worker_region. If neither worker_region nor worker_zone is specified,
403 # a zone in the control plane's region is chosen based on available capacity.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700404 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
405 # specified in order for the job to have workers.
406 { # Describes one particular pool of Cloud Dataflow workers to be
407 # instantiated by the Cloud Dataflow service in order to perform the
408 # computations required by a job. Note that a workflow job may use
409 # multiple pools, in order to match the various computational
410 # requirements of the various stages of the job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700411 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
412 # harness, residing in Google Container Registry.
413 #
414 # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
415 "ipConfiguration": "A String", # Configuration for VM IPs.
416 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
417 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
418 "algorithm": "A String", # The algorithm to use for autoscaling.
419 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700420 "diskSourceImage": "A String", # Fully qualified source image for disks.
Dan O'Mearadd494642020-05-01 07:42:23 -0700421 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
422 # the service will use the network "default".
423 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
424 # will attempt to choose a reasonable default.
425 "metadata": { # Metadata to set on the Google Compute Engine VMs.
426 "a_key": "A String",
427 },
428 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
429 # service will attempt to choose a reasonable default.
430 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
431 # Compute Engine API.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700432 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
433 # using the standard Dataflow task runner. Users should ignore
434 # this field.
435 "workflowFileName": "A String", # The file to store the workflow in.
436 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
437 # will not be uploaded.
438 #
439 # The supported resource type is:
440 #
441 # Google Cloud Storage:
442 # storage.googleapis.com/{bucket}/{object}
443 # bucket.storage.googleapis.com/{object}
444 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
Dan O'Mearadd494642020-05-01 07:42:23 -0700445 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
446 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
447 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
448 "vmId": "A String", # The ID string of the VM.
449 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
450 # taskrunner; e.g. "wheel".
451 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
452 # taskrunner; e.g. "root".
453 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
454 # access the Cloud Dataflow API.
455 "A String",
456 ],
457 "languageHint": "A String", # The suggested backend language.
458 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
459 # console.
460 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
461 "logDir": "A String", # The directory on the VM to store logs.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700462 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
463 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
464 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
465 # "shuffle/v1beta1".
466 "workerId": "A String", # The ID of the worker running this pipeline.
467 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
468 #
469 # When workers access Google Cloud APIs, they logically do so via
470 # relative URLs. If this field is specified, it supplies the base
471 # URL to use for resolving these relative URLs. The normative
472 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
473 # Locators".
474 #
475 # If not specified, the default value is "http://www.googleapis.com/"
476 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
477 # "dataflow/v1b3/projects".
478 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
479 # storage.
480 #
481 # The supported resource type is:
482 #
483 # Google Cloud Storage:
484 #
485 # storage.googleapis.com/{bucket}/{object}
486 # bucket.storage.googleapis.com/{object}
487 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700488 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
489 "harnessCommand": "A String", # The command to launch the worker harness.
490 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
491 # temporary storage.
492 #
493 # The supported resource type is:
494 #
495 # Google Cloud Storage:
496 # storage.googleapis.com/{bucket}/{object}
497 # bucket.storage.googleapis.com/{object}
Dan O'Mearadd494642020-05-01 07:42:23 -0700498 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
499 #
500 # When workers access Google Cloud APIs, they logically do so via
501 # relative URLs. If this field is specified, it supplies the base
502 # URL to use for resolving these relative URLs. The normative
503 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
504 # Locators".
505 #
506 # If not specified, the default value is "http://www.googleapis.com/"
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700507 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700508 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
509 # service will choose a number of threads (according to the number of cores
510 # on the selected machine type for batch, or 1 by convention for streaming).
511 "poolArgs": { # Extra arguments for this worker pool.
512 "a_key": "", # Properties of the object. Contains field @type with type URL.
513 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700514 "packages": [ # Packages to be installed on workers.
515 { # The packages that must be installed in order for a worker to run the
516 # steps of the Cloud Dataflow job that will be assigned to its worker
517 # pool.
518 #
519 # This is the mechanism by which the Cloud Dataflow SDK causes code to
520 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
521 # might use this to install jars containing the user's code and all of the
522 # various dependencies (libraries, data files, etc.) required in order
523 # for that code to run.
524 "location": "A String", # The resource to read the package from. The supported resource type is:
525 #
526 # Google Cloud Storage:
527 #
528 # storage.googleapis.com/{bucket}
529 # bucket.storage.googleapis.com/
530 "name": "A String", # The name of the package.
531 },
532 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700533 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
534 # select a default set of packages which are useful to worker
535 # harnesses written in a particular language.
536 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
537 # are supported.
538 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700539 # attempt to choose a reasonable default.
540 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
541 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
542 # `TEARDOWN_NEVER`.
543 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
544 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
545 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
546 # down.
547 #
548 # If the workers are not torn down by the service, they will
549 # continue to run and use Google Compute Engine VM resources in the
550 # user's project until they are explicitly terminated by the user.
551 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
552 # policy except for small, manually supervised test jobs.
553 #
554 # If unknown or unspecified, the service will attempt to choose a reasonable
555 # default.
Dan O'Mearadd494642020-05-01 07:42:23 -0700556 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
557 # attempt to choose a reasonable default.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700558 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
559 # execute the job. If zero or unspecified, the service will
560 # attempt to choose a reasonable default.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700561 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
562 # the form "regions/REGION/subnetworks/SUBNETWORK".
563 "dataDisks": [ # Data disks that are used by a VM in this workflow.
564 { # Describes the data disk used by a workflow job.
565 "mountPoint": "A String", # Directory in a VM where disk is mounted.
566 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
567 # attempt to choose a reasonable default.
568 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
569 # must be a disk type appropriate to the project and zone in which
570 # the workers will run. If unknown or unspecified, the service
571 # will attempt to choose a reasonable default.
572 #
573 # For example, the standard persistent disk type is a resource name
574 # typically ending in "pd-standard". If SSD persistent disks are
575 # available, the resource name typically ends with "pd-ssd". The
576 # actual valid values are defined the Google Compute Engine API,
577 # not by the Cloud Dataflow API; consult the Google Compute Engine
578 # documentation for more information about determining the set of
579 # available disk types for a particular project and zone.
580 #
581 # Google Compute Engine Disk types are local to a particular
582 # project in a particular zone, and so the resource name will
583 # typically look something like this:
584 #
585 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
586 },
587 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700588 "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
589 # only be set in the Fn API path. For non-cross-language pipelines this
590 # should have only one entry. Cross-language pipelines will have two or more
591 # entries.
592 { # Defines a SDK harness container for executing Dataflow pipelines.
593 "containerImage": "A String", # A docker container image that resides in Google Container Registry.
594 "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
595 # container instance with this image. If false (or unset) recommends using
596 # more than one core per SDK container instance with this image for
597 # efficiency. Note that Dataflow service may choose to override this property
598 # if needed.
599 },
600 ],
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700601 },
602 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700603 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
604 # unspecified, the service will attempt to choose a reasonable
605 # default. This should be in the form of the API service name,
606 # e.g. "compute.googleapis.com".
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700607 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
608 # storage. The system will append the suffix "/temp-{JOBNAME} to
609 # this resource prefix, where {JOBNAME} is the value of the
610 # job_name field. The resulting bucket and object prefix is used
611 # as the prefix of the resources used to store temporary data
612 # needed during the job execution. NOTE: This will override the
613 # value in taskrunner_settings.
614 # The supported resource type is:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400615 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700616 # Google Cloud Storage:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400617 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700618 # storage.googleapis.com/{bucket}/{object}
619 # bucket.storage.googleapis.com/{object}
620 },
621 "location": "A String", # The [regional endpoint]
622 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
623 # contains this job.
624 "tempFiles": [ # A set of files the system should be aware of that are used
625 # for temporary storage. These temporary files will be
626 # removed on job completion.
627 # No duplicates are allowed.
628 # No file patterns are supported.
629 #
630 # The supported files are:
631 #
632 # Google Cloud Storage:
633 #
634 # storage.googleapis.com/{bucket}/{object}
635 # bucket.storage.googleapis.com/{object}
636 "A String",
637 ],
638 "type": "A String", # The type of Cloud Dataflow job.
639 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
640 # If this field is set, the service will ensure its uniqueness.
641 # The request to create a job will fail if the service has knowledge of a
642 # previously submitted job with the same client's ID and job name.
643 # The caller may use this field to ensure idempotence of job
644 # creation across retried attempts to create a job.
645 # By default, the field is empty and, in that case, the service ignores it.
646 "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
647 # snapshot.
648 "stepsLocation": "A String", # The GCS location where the steps are stored.
649 "currentStateTime": "A String", # The timestamp associated with the current state.
650 "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
651 # Flexible resource scheduling jobs are started with some delay after job
652 # creation, so start_time is unset before start and is updated when the
653 # job is started by the Cloud Dataflow service. For other jobs, start_time
654 # always equals to create_time and is immutable and set by the Cloud Dataflow
655 # service.
656 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
657 # Cloud Dataflow service.
658 "requestedState": "A String", # The job's requested state.
659 #
660 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
661 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
662 # also be used to directly set a job's requested state to
663 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
664 # job if it has not already reached a terminal state.
665 "name": "A String", # The user-specified Cloud Dataflow job name.
666 #
667 # Only one Job with a given name may exist in a project at any
668 # given time. If a caller attempts to create a Job with the same
669 # name as an already-existing Job, the attempt returns the
670 # existing Job.
671 #
672 # The name must match the regular expression
673 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
674 "steps": [ # Exactly one of step or steps_location should be specified.
675 #
676 # The top-level steps that constitute the entire job.
677 { # Defines a particular step within a Cloud Dataflow job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400678 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700679 # A job consists of multiple steps, each of which performs some
680 # specific operation as part of the overall job. Data is typically
681 # passed from one step to another as part of the job.
682 #
683 # Here's an example of a sequence of steps which together implement a
684 # Map-Reduce job:
685 #
686 # * Read a collection of data from some source, parsing the
687 # collection's elements.
688 #
689 # * Validate the elements.
690 #
691 # * Apply a user-defined function to map each element to some value
692 # and extract an element-specific key value.
693 #
694 # * Group elements with the same key into a single element with
695 # that key, transforming a multiply-keyed collection into a
696 # uniquely-keyed collection.
697 #
698 # * Write the elements out to some data sink.
699 #
700 # Note that the Cloud Dataflow service may be used to run many different
701 # types of jobs, not just Map-Reduce.
702 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700703 "name": "A String", # The name that identifies the step. This must be unique for each
704 # step with respect to all other steps in the Cloud Dataflow job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700705 "properties": { # Named properties associated with the step. Each kind of
706 # predefined step has its own required set of properties.
707 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
708 "a_key": "", # Properties of the object.
709 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700710 },
711 ],
712 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
713 # of the job it replaced.
714 #
715 # When sending a `CreateJobRequest`, you can update a job by specifying it
716 # here. The job named here is stopped, and its intermediate state is
717 # transferred to this job.
718 "currentState": "A String", # The current state of the job.
719 #
720 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
721 # specified.
722 #
723 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
724 # terminal state. After a job has reached a terminal state, no
725 # further state updates may be made.
726 #
727 # This field may be mutated by the Cloud Dataflow service;
728 # callers cannot mutate it.
729 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
730 # isn't contained in the submitted job.
731 "stages": { # A mapping from each stage to the information about that stage.
732 "a_key": { # Contains information about how a particular
733 # google.dataflow.v1beta3.Step will be executed.
734 "stepName": [ # The steps associated with the execution stage.
735 # Note that stages may have several steps, and that a given step
736 # might be run by more than one stage.
737 "A String",
738 ],
739 },
740 },
741 },
742 }</pre>
743</div>
744
745<div class="method">
746 <code class="details" id="get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</code>
747 <pre>Get the template associated with a template.
748
749Args:
750 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
751 location: string, The [regional endpoint]
752(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
753which to direct the request. (required)
754 gcsPath: string, Required. A Cloud Storage path to the template from which to
755create the job.
756Must be valid Cloud Storage URL, beginning with 'gs://'.
757 x__xgafv: string, V1 error format.
758 Allowed values
759 1 - v1 error format
760 2 - v2 error format
761 view: string, The view to retrieve. Defaults to METADATA_ONLY.
762
763Returns:
764 An object of the form:
765
766 { # The response to a GetTemplate request.
767 "status": { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the
768 # request will be indicated in the error_details.
769 # different programming environments, including REST APIs and RPC APIs. It is
Dan O'Mearadd494642020-05-01 07:42:23 -0700770 # used by [gRPC](https://github.com/grpc). Each `Status` message contains
771 # three pieces of data: error code, error message, and error details.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700772 #
Dan O'Mearadd494642020-05-01 07:42:23 -0700773 # You can find out more about this error model and how to work with it in the
774 # [API Design Guide](https://cloud.google.com/apis/design/errors).
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700775 "message": "A String", # A developer-facing error message, which should be in English. Any
776 # user-facing error message should be localized and sent in the
777 # google.rpc.Status.details field, or localized by the client.
778 "code": 42, # The status code, which should be an enum value of google.rpc.Code.
779 "details": [ # A list of messages that carry the error details. There is a common set of
780 # message types for APIs to use.
781 {
782 "a_key": "", # Properties of the object. Contains field @type with type URL.
783 },
784 ],
785 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700786 "templateType": "A String", # Template Type.
787 "runtimeMetadata": { # RuntimeMetadata describing a runtime environment. # Describes the runtime metadata with SDKInfo and available parameters.
788 "parameters": [ # The parameters for the template.
789 { # Metadata for a specific parameter.
790 "name": "A String", # Required. The name of the parameter.
791 "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
792 "regexes": [ # Optional. Regexes that the parameter must match.
793 "A String",
794 ],
795 "label": "A String", # Required. The label to display for the parameter.
796 "helpText": "A String", # Required. The help text to display for the parameter.
797 "paramType": "A String", # Optional. The type of the parameter.
798 # Used for selecting input picker.
799 },
800 ],
801 "sdkInfo": { # SDK Information. # SDK Info for the template.
802 "version": "A String", # Optional. The SDK version.
803 "language": "A String", # Required. The SDK Language.
804 },
805 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700806 "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
807 # parameters, etc.
808 "name": "A String", # Required. The name of the template.
809 "parameters": [ # The parameters for the template.
810 { # Metadata for a specific parameter.
Dan O'Mearadd494642020-05-01 07:42:23 -0700811 "name": "A String", # Required. The name of the parameter.
812 "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700813 "regexes": [ # Optional. Regexes that the parameter must match.
814 "A String",
815 ],
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700816 "label": "A String", # Required. The label to display for the parameter.
Dan O'Mearadd494642020-05-01 07:42:23 -0700817 "helpText": "A String", # Required. The help text to display for the parameter.
818 "paramType": "A String", # Optional. The type of the parameter.
819 # Used for selecting input picker.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700820 },
821 ],
822 "description": "A String", # Optional. A description of the template.
823 },
824 }</pre>
825</div>
826
827<div class="method">
Dan O'Mearadd494642020-05-01 07:42:23 -0700828 <code class="details" id="launch">launch(projectId, location, body=None, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, gcsPath=None, validateOnly=None)</code>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700829 <pre>Launch a template.
830
831Args:
832 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
833 location: string, The [regional endpoint]
834(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
835which to direct the request. (required)
Dan O'Mearadd494642020-05-01 07:42:23 -0700836 body: object, The request body.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700837 The object takes the form of:
838
839{ # Parameters to provide to the template being launched.
840 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700841 "workerRegion": "A String", # The Compute Engine region
842 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
843 # which worker processing should occur, e.g. "us-west1". Mutually exclusive
844 # with worker_zone. If neither worker_region nor worker_zone is specified,
845 # default to the control plane's region.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700846 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
847 # template if not specified.
848 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
849 # the service will use the network "default".
850 "zone": "A String", # The Compute Engine [availability
851 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
852 # for launching worker instances to run your pipeline.
Dan O'Mearadd494642020-05-01 07:42:23 -0700853 # In the future, worker_zone will take precedence.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700854 "additionalUserLabels": { # Additional user labels to be specified for the job.
855 # Keys and values should follow the restrictions specified in the [labeling
856 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
857 # page.
858 "a_key": "A String",
859 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700860 "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700861 "additionalExperiments": [ # Additional experiment flags for the job.
862 "A String",
863 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700864 "ipConfiguration": "A String", # Configuration for VM IPs.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700865 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
866 # Use with caution.
867 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
868 # Must be a valid Cloud Storage URL, beginning with `gs://`.
869 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
Dan O'Mearadd494642020-05-01 07:42:23 -0700870 "kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.
871 # Key format is:
872 # projects/&lt;project&gt;/locations/&lt;location&gt;/keyRings/&lt;keyring&gt;/cryptoKeys/&lt;key&gt;
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700873 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
874 # available to your pipeline during execution, from 1 to 1000.
875 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
876 # the form "regions/REGION/subnetworks/SUBNETWORK".
Dan O'Mearadd494642020-05-01 07:42:23 -0700877 "workerZone": "A String", # The Compute Engine zone
878 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
879 # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
880 # with worker_region. If neither worker_region nor worker_zone is specified,
881 # a zone in the control plane's region is chosen based on available capacity.
882 # If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700883 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700884 "transformNameMapping": { # Only applicable when updating a pipeline. Map of transform name prefixes of
885 # the job to be replaced to the corresponding name prefixes of the new job.
886 "a_key": "A String",
887 },
888 "update": True or False, # If set, replace the existing pipeline with the name specified by jobName
889 # with this pipeline, preserving state.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700890 "parameters": { # The runtime parameters to pass to the job.
891 "a_key": "A String",
892 },
893 "jobName": "A String", # Required. The job name to use for the created job.
894 }
895
896 dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.
897The file must be a Json serialized DynamicTemplateFieSpec object.
898 x__xgafv: string, V1 error format.
899 Allowed values
900 1 - v1 error format
901 2 - v2 error format
902 dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.
903Must be a valid Cloud Storage URL, beginning with `gs://`.
904 gcsPath: string, A Cloud Storage path to the template from which to create
905the job.
906Must be valid Cloud Storage URL, beginning with 'gs://'.
907 validateOnly: boolean, If true, the request is validated but not actually executed.
908Defaults to false.
909
910Returns:
911 An object of the form:
912
913 { # Response to the request to launch a template.
914 "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
915 # the job was successfully launched.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400916 "labels": { # User-defined labels for this job.
917 #
918 # The labels map can contain no more than 64 entries. Entries of the labels
919 # map are UTF8 strings that comply with the following restrictions:
920 #
921 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
922 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
Dan O'Mearadd494642020-05-01 07:42:23 -0700923 # * Both keys and values are additionally constrained to be &lt;= 128 bytes in
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400924 # size.
925 "a_key": "A String",
926 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700927 "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
928 # by the metadata values provided here. Populated for ListJobs and all GetJob
929 # views SUMMARY and higher.
930 # ListJob response and Job SUMMARY view.
931 "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
932 "versionDisplayName": "A String", # A readable string describing the version of the SDK.
933 "version": "A String", # The version of the SDK used to run the job.
934 "sdkSupportStatus": "A String", # The support status for this SDK version.
935 },
936 "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
937 { # Metadata for a PubSub connector used by the job.
938 "topic": "A String", # Topic accessed in the connection.
939 "subscription": "A String", # Subscription used in the connection.
940 },
941 ],
942 "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
943 { # Metadata for a Datastore connector used by the job.
944 "projectId": "A String", # ProjectId accessed in the connection.
945 "namespace": "A String", # Namespace used in the connection.
946 },
947 ],
948 "fileDetails": [ # Identification of a File source used in the Dataflow job.
949 { # Metadata for a File connector used by the job.
950 "filePattern": "A String", # File Pattern used to access files by the connector.
951 },
952 ],
953 "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
954 { # Metadata for a Spanner connector used by the job.
955 "instanceId": "A String", # InstanceId accessed in the connection.
956 "projectId": "A String", # ProjectId accessed in the connection.
957 "databaseId": "A String", # DatabaseId accessed in the connection.
958 },
959 ],
960 "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
961 { # Metadata for a BigTable connector used by the job.
962 "instanceId": "A String", # InstanceId accessed in the connection.
963 "projectId": "A String", # ProjectId accessed in the connection.
964 "tableId": "A String", # TableId accessed in the connection.
965 },
966 ],
967 "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
968 { # Metadata for a BigQuery connector used by the job.
969 "projectId": "A String", # Project accessed in the connection.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700970 "query": "A String", # Query used to access data in the connection.
Dan O'Mearadd494642020-05-01 07:42:23 -0700971 "table": "A String", # Table accessed in the connection.
972 "dataset": "A String", # Dataset accessed in the connection.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700973 },
974 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400975 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700976 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
977 # A description of the user pipeline and stages through which it is executed.
978 # Created by Cloud Dataflow service. Only retrieved with
979 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
980 # form. This data is provided by the Dataflow service for ease of visualizing
981 # the pipeline and interpreting Dataflow provided metrics.
982 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
983 { # Description of the type, names/ids, and input/outputs for a transform.
984 "kind": "A String", # Type of transform.
985 "name": "A String", # User provided name for this transform instance.
986 "inputCollectionName": [ # User names for all collection inputs to this transform.
987 "A String",
988 ],
989 "displayData": [ # Transform-specific display data.
990 { # Data provided with a pipeline or transform to provide descriptive info.
Dan O'Mearadd494642020-05-01 07:42:23 -0700991 "key": "A String", # The key identifying the display data.
992 # This is intended to be used as a label for the display data
993 # when viewed in a dax monitoring system.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700994 "shortStrValue": "A String", # A possible additional shorter value to display.
995 # For example a java_class_name_value of com.mypackage.MyDoFn
996 # will be stored with MyDoFn as the short_str_value and
997 # com.mypackage.MyDoFn as the java_class_name value.
998 # short_str_value can be displayed and java_class_name_value
999 # will be displayed as a tooltip.
Dan O'Mearadd494642020-05-01 07:42:23 -07001000 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001001 "url": "A String", # An optional full URL.
1002 "floatValue": 3.14, # Contains value if the data is of float type.
1003 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1004 # language namespace (i.e. python module) which defines the display data.
1005 # This allows a dax monitoring system to specially handle the data
1006 # and perform custom rendering.
1007 "javaClassValue": "A String", # Contains value if the data is of java class type.
1008 "label": "A String", # An optional label to display in a dax UI for the element.
1009 "boolValue": True or False, # Contains value if the data is of a boolean type.
1010 "strValue": "A String", # Contains value if the data is of string type.
Dan O'Mearadd494642020-05-01 07:42:23 -07001011 "durationValue": "A String", # Contains value if the data is of duration type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001012 "int64Value": "A String", # Contains value if the data is of int64 type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001013 },
1014 ],
1015 "outputCollectionName": [ # User names for all collection outputs to this transform.
1016 "A String",
1017 ],
1018 "id": "A String", # SDK generated id of this transform instance.
1019 },
1020 ],
1021 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1022 { # Description of the composing transforms, names/ids, and input/outputs of a
1023 # stage of execution. Some composing transforms and sources may have been
1024 # generated by the Dataflow service during execution planning.
1025 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1026 { # Description of an interstitial value between transforms in an execution
1027 # stage.
1028 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1029 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1030 # source is most closely associated.
1031 "name": "A String", # Dataflow service generated name for this source.
1032 },
1033 ],
1034 "kind": "A String", # Type of tranform this stage is executing.
1035 "name": "A String", # Dataflow service generated name for this stage.
1036 "outputSource": [ # Output sources for this stage.
1037 { # Description of an input or output of an execution stage.
1038 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1039 "sizeBytes": "A String", # Size of the source, if measurable.
1040 "name": "A String", # Dataflow service generated name for this source.
1041 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1042 # source is most closely associated.
1043 },
1044 ],
1045 "inputSource": [ # Input sources for this stage.
1046 { # Description of an input or output of an execution stage.
1047 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1048 "sizeBytes": "A String", # Size of the source, if measurable.
1049 "name": "A String", # Dataflow service generated name for this source.
1050 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1051 # source is most closely associated.
1052 },
1053 ],
1054 "componentTransform": [ # Transforms that comprise this execution stage.
1055 { # Description of a transform executed as part of an execution stage.
1056 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1057 "originalTransform": "A String", # User name for the original user transform with which this transform is
1058 # most closely associated.
1059 "name": "A String", # Dataflow service generated name for this source.
1060 },
1061 ],
1062 "id": "A String", # Dataflow service generated id for this stage.
1063 },
1064 ],
1065 "displayData": [ # Pipeline level display data.
1066 { # Data provided with a pipeline or transform to provide descriptive info.
Dan O'Mearadd494642020-05-01 07:42:23 -07001067 "key": "A String", # The key identifying the display data.
1068 # This is intended to be used as a label for the display data
1069 # when viewed in a dax monitoring system.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001070 "shortStrValue": "A String", # A possible additional shorter value to display.
1071 # For example a java_class_name_value of com.mypackage.MyDoFn
1072 # will be stored with MyDoFn as the short_str_value and
1073 # com.mypackage.MyDoFn as the java_class_name value.
1074 # short_str_value can be displayed and java_class_name_value
1075 # will be displayed as a tooltip.
Dan O'Mearadd494642020-05-01 07:42:23 -07001076 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001077 "url": "A String", # An optional full URL.
1078 "floatValue": 3.14, # Contains value if the data is of float type.
1079 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1080 # language namespace (i.e. python module) which defines the display data.
1081 # This allows a dax monitoring system to specially handle the data
1082 # and perform custom rendering.
1083 "javaClassValue": "A String", # Contains value if the data is of java class type.
1084 "label": "A String", # An optional label to display in a dax UI for the element.
1085 "boolValue": True or False, # Contains value if the data is of a boolean type.
1086 "strValue": "A String", # Contains value if the data is of string type.
Dan O'Mearadd494642020-05-01 07:42:23 -07001087 "durationValue": "A String", # Contains value if the data is of duration type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001088 "int64Value": "A String", # Contains value if the data is of int64 type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001089 },
1090 ],
1091 },
1092 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1093 # callers cannot mutate it.
1094 { # A message describing the state of a particular execution stage.
1095 "executionStageName": "A String", # The name of the execution stage.
1096 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1097 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1098 },
1099 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001100 "id": "A String", # The unique ID of this job.
1101 #
1102 # This field is set by the Cloud Dataflow service when the Job is
1103 # created, and is immutable for the life of the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001104 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1105 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1106 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1107 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1108 # corresponding name prefixes of the new job.
1109 "a_key": "A String",
1110 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001111 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -07001112 "workerRegion": "A String", # The Compute Engine region
1113 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
1114 # which worker processing should occur, e.g. "us-west1". Mutually exclusive
1115 # with worker_zone. If neither worker_region nor worker_zone is specified,
1116 # default to the control plane's region.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001117 "version": { # A structure describing which components and their versions of the service
1118 # are required in order to run the job.
1119 "a_key": "", # Properties of the object.
1120 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001121 "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
1122 "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
1123 # at rest, AKA a Customer Managed Encryption Key (CMEK).
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001124 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001125 # Format:
1126 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001127 "internalExperiments": { # Experimental settings.
1128 "a_key": "", # Properties of the object. Contains field @type with type URL.
1129 },
1130 "dataset": "A String", # The dataset for the current project where various workflow
1131 # related tables are stored.
1132 #
1133 # The supported resource type is:
1134 #
1135 # Google BigQuery:
1136 # bigquery.googleapis.com/{dataset}
1137 "experiments": [ # The list of experiments to enable.
1138 "A String",
1139 ],
1140 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1141 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1142 # options are passed through the service and are used to recreate the
1143 # SDK pipeline options on the worker in a language agnostic and platform
1144 # independent way.
1145 "a_key": "", # Properties of the object.
1146 },
1147 "userAgent": { # A description of the process that generated the request.
1148 "a_key": "", # Properties of the object.
1149 },
Dan O'Mearadd494642020-05-01 07:42:23 -07001150 "workerZone": "A String", # The Compute Engine zone
1151 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
1152 # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
1153 # with worker_region. If neither worker_region nor worker_zone is specified,
1154 # a zone in the control plane's region is chosen based on available capacity.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001155 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1156 # specified in order for the job to have workers.
1157 { # Describes one particular pool of Cloud Dataflow workers to be
1158 # instantiated by the Cloud Dataflow service in order to perform the
1159 # computations required by a job. Note that a workflow job may use
1160 # multiple pools, in order to match the various computational
1161 # requirements of the various stages of the job.
Dan O'Mearadd494642020-05-01 07:42:23 -07001162 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1163 # harness, residing in Google Container Registry.
1164 #
1165 # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
1166 "ipConfiguration": "A String", # Configuration for VM IPs.
1167 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1168 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1169 "algorithm": "A String", # The algorithm to use for autoscaling.
1170 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001171 "diskSourceImage": "A String", # Fully qualified source image for disks.
Dan O'Mearadd494642020-05-01 07:42:23 -07001172 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1173 # the service will use the network "default".
1174 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1175 # will attempt to choose a reasonable default.
1176 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1177 "a_key": "A String",
1178 },
1179 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1180 # service will attempt to choose a reasonable default.
1181 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1182 # Compute Engine API.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001183 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1184 # using the standard Dataflow task runner. Users should ignore
1185 # this field.
1186 "workflowFileName": "A String", # The file to store the workflow in.
1187 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1188 # will not be uploaded.
1189 #
1190 # The supported resource type is:
1191 #
1192 # Google Cloud Storage:
1193 # storage.googleapis.com/{bucket}/{object}
1194 # bucket.storage.googleapis.com/{object}
1195 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
Dan O'Mearadd494642020-05-01 07:42:23 -07001196 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1197 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1198 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1199 "vmId": "A String", # The ID string of the VM.
1200 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1201 # taskrunner; e.g. "wheel".
1202 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1203 # taskrunner; e.g. "root".
1204 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1205 # access the Cloud Dataflow API.
1206 "A String",
1207 ],
1208 "languageHint": "A String", # The suggested backend language.
1209 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1210 # console.
1211 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1212 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001213 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1214 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1215 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1216 # "shuffle/v1beta1".
1217 "workerId": "A String", # The ID of the worker running this pipeline.
1218 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1219 #
1220 # When workers access Google Cloud APIs, they logically do so via
1221 # relative URLs. If this field is specified, it supplies the base
1222 # URL to use for resolving these relative URLs. The normative
1223 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1224 # Locators".
1225 #
1226 # If not specified, the default value is "http://www.googleapis.com/"
1227 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1228 # "dataflow/v1b3/projects".
1229 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1230 # storage.
1231 #
1232 # The supported resource type is:
1233 #
1234 # Google Cloud Storage:
1235 #
1236 # storage.googleapis.com/{bucket}/{object}
1237 # bucket.storage.googleapis.com/{object}
1238 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001239 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1240 "harnessCommand": "A String", # The command to launch the worker harness.
1241 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1242 # temporary storage.
1243 #
1244 # The supported resource type is:
1245 #
1246 # Google Cloud Storage:
1247 # storage.googleapis.com/{bucket}/{object}
1248 # bucket.storage.googleapis.com/{object}
Dan O'Mearadd494642020-05-01 07:42:23 -07001249 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1250 #
1251 # When workers access Google Cloud APIs, they logically do so via
1252 # relative URLs. If this field is specified, it supplies the base
1253 # URL to use for resolving these relative URLs. The normative
1254 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1255 # Locators".
1256 #
1257 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001258 },
Dan O'Mearadd494642020-05-01 07:42:23 -07001259 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1260 # service will choose a number of threads (according to the number of cores
1261 # on the selected machine type for batch, or 1 by convention for streaming).
1262 "poolArgs": { # Extra arguments for this worker pool.
1263 "a_key": "", # Properties of the object. Contains field @type with type URL.
1264 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001265 "packages": [ # Packages to be installed on workers.
1266 { # The packages that must be installed in order for a worker to run the
1267 # steps of the Cloud Dataflow job that will be assigned to its worker
1268 # pool.
1269 #
1270 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1271 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1272 # might use this to install jars containing the user's code and all of the
1273 # various dependencies (libraries, data files, etc.) required in order
1274 # for that code to run.
1275 "location": "A String", # The resource to read the package from. The supported resource type is:
1276 #
1277 # Google Cloud Storage:
1278 #
1279 # storage.googleapis.com/{bucket}
1280 # bucket.storage.googleapis.com/
1281 "name": "A String", # The name of the package.
1282 },
1283 ],
Dan O'Mearadd494642020-05-01 07:42:23 -07001284 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1285 # select a default set of packages which are useful to worker
1286 # harnesses written in a particular language.
1287 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1288 # are supported.
1289 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001290 # attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001291 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1292 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1293 # `TEARDOWN_NEVER`.
1294 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1295 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1296 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1297 # down.
1298 #
1299 # If the workers are not torn down by the service, they will
1300 # continue to run and use Google Compute Engine VM resources in the
1301 # user's project until they are explicitly terminated by the user.
1302 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1303 # policy except for small, manually supervised test jobs.
1304 #
1305 # If unknown or unspecified, the service will attempt to choose a reasonable
1306 # default.
Dan O'Mearadd494642020-05-01 07:42:23 -07001307 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1308 # attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001309 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1310 # execute the job. If zero or unspecified, the service will
1311 # attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001312 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1313 # the form "regions/REGION/subnetworks/SUBNETWORK".
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001314 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1315 { # Describes the data disk used by a workflow job.
1316 "mountPoint": "A String", # Directory in a VM where disk is mounted.
1317 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1318 # attempt to choose a reasonable default.
1319 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1320 # must be a disk type appropriate to the project and zone in which
1321 # the workers will run. If unknown or unspecified, the service
1322 # will attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001323 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001324 # For example, the standard persistent disk type is a resource name
1325 # typically ending in "pd-standard". If SSD persistent disks are
1326 # available, the resource name typically ends with "pd-ssd". The
1327 # actual valid values are defined the Google Compute Engine API,
1328 # not by the Cloud Dataflow API; consult the Google Compute Engine
1329 # documentation for more information about determining the set of
1330 # available disk types for a particular project and zone.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001331 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001332 # Google Compute Engine Disk types are local to a particular
1333 # project in a particular zone, and so the resource name will
1334 # typically look something like this:
1335 #
1336 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001337 },
1338 ],
Dan O'Mearadd494642020-05-01 07:42:23 -07001339 "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
1340 # only be set in the Fn API path. For non-cross-language pipelines this
1341 # should have only one entry. Cross-language pipelines will have two or more
1342 # entries.
1343 { # Defines a SDK harness container for executing Dataflow pipelines.
1344 "containerImage": "A String", # A docker container image that resides in Google Container Registry.
1345 "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
1346 # container instance with this image. If false (or unset) recommends using
1347 # more than one core per SDK container instance with this image for
1348 # efficiency. Note that Dataflow service may choose to override this property
1349 # if needed.
1350 },
1351 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001352 },
1353 ],
Dan O'Mearadd494642020-05-01 07:42:23 -07001354 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1355 # unspecified, the service will attempt to choose a reasonable
1356 # default. This should be in the form of the API service name,
1357 # e.g. "compute.googleapis.com".
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001358 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1359 # storage. The system will append the suffix "/temp-{JOBNAME} to
1360 # this resource prefix, where {JOBNAME} is the value of the
1361 # job_name field. The resulting bucket and object prefix is used
1362 # as the prefix of the resources used to store temporary data
1363 # needed during the job execution. NOTE: This will override the
1364 # value in taskrunner_settings.
1365 # The supported resource type is:
1366 #
1367 # Google Cloud Storage:
1368 #
1369 # storage.googleapis.com/{bucket}/{object}
1370 # bucket.storage.googleapis.com/{object}
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001371 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001372 "location": "A String", # The [regional endpoint]
1373 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1374 # contains this job.
1375 "tempFiles": [ # A set of files the system should be aware of that are used
1376 # for temporary storage. These temporary files will be
1377 # removed on job completion.
1378 # No duplicates are allowed.
1379 # No file patterns are supported.
1380 #
1381 # The supported files are:
1382 #
1383 # Google Cloud Storage:
1384 #
1385 # storage.googleapis.com/{bucket}/{object}
1386 # bucket.storage.googleapis.com/{object}
1387 "A String",
1388 ],
1389 "type": "A String", # The type of Cloud Dataflow job.
1390 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1391 # If this field is set, the service will ensure its uniqueness.
1392 # The request to create a job will fail if the service has knowledge of a
1393 # previously submitted job with the same client's ID and job name.
1394 # The caller may use this field to ensure idempotence of job
1395 # creation across retried attempts to create a job.
1396 # By default, the field is empty and, in that case, the service ignores it.
1397 "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1398 # snapshot.
1399 "stepsLocation": "A String", # The GCS location where the steps are stored.
1400 "currentStateTime": "A String", # The timestamp associated with the current state.
1401 "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1402 # Flexible resource scheduling jobs are started with some delay after job
1403 # creation, so start_time is unset before start and is updated when the
1404 # job is started by the Cloud Dataflow service. For other jobs, start_time
1405 # always equals to create_time and is immutable and set by the Cloud Dataflow
1406 # service.
1407 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1408 # Cloud Dataflow service.
1409 "requestedState": "A String", # The job's requested state.
1410 #
1411 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1412 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1413 # also be used to directly set a job's requested state to
1414 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1415 # job if it has not already reached a terminal state.
1416 "name": "A String", # The user-specified Cloud Dataflow job name.
1417 #
1418 # Only one Job with a given name may exist in a project at any
1419 # given time. If a caller attempts to create a Job with the same
1420 # name as an already-existing Job, the attempt returns the
1421 # existing Job.
1422 #
1423 # The name must match the regular expression
1424 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1425 "steps": [ # Exactly one of step or steps_location should be specified.
1426 #
1427 # The top-level steps that constitute the entire job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001428 { # Defines a particular step within a Cloud Dataflow job.
1429 #
1430 # A job consists of multiple steps, each of which performs some
1431 # specific operation as part of the overall job. Data is typically
1432 # passed from one step to another as part of the job.
1433 #
1434 # Here's an example of a sequence of steps which together implement a
1435 # Map-Reduce job:
1436 #
1437 # * Read a collection of data from some source, parsing the
1438 # collection's elements.
1439 #
1440 # * Validate the elements.
1441 #
1442 # * Apply a user-defined function to map each element to some value
1443 # and extract an element-specific key value.
1444 #
1445 # * Group elements with the same key into a single element with
1446 # that key, transforming a multiply-keyed collection into a
1447 # uniquely-keyed collection.
1448 #
1449 # * Write the elements out to some data sink.
1450 #
1451 # Note that the Cloud Dataflow service may be used to run many different
1452 # types of jobs, not just Map-Reduce.
1453 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Dan O'Mearadd494642020-05-01 07:42:23 -07001454 "name": "A String", # The name that identifies the step. This must be unique for each
1455 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001456 "properties": { # Named properties associated with the step. Each kind of
1457 # predefined step has its own required set of properties.
1458 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
1459 "a_key": "", # Properties of the object.
1460 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001461 },
1462 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001463 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1464 # of the job it replaced.
1465 #
1466 # When sending a `CreateJobRequest`, you can update a job by specifying it
1467 # here. The job named here is stopped, and its intermediate state is
1468 # transferred to this job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001469 "currentState": "A String", # The current state of the job.
1470 #
1471 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1472 # specified.
1473 #
1474 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1475 # terminal state. After a job has reached a terminal state, no
1476 # further state updates may be made.
1477 #
1478 # This field may be mutated by the Cloud Dataflow service;
1479 # callers cannot mutate it.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001480 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1481 # isn't contained in the submitted job.
1482 "stages": { # A mapping from each stage to the information about that stage.
1483 "a_key": { # Contains information about how a particular
1484 # google.dataflow.v1beta3.Step will be executed.
1485 "stepName": [ # The steps associated with the execution stage.
1486 # Note that stages may have several steps, and that a given step
1487 # might be run by more than one stage.
1488 "A String",
1489 ],
1490 },
1491 },
1492 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001493 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001494 }</pre>
1495</div>
1496
1497</body></html>