blob: b26398f1240a6f9792384634365be552ad358d2b [file] [log] [blame]
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070075<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.templates.html">templates</a></h1>
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070076<h2>Instance Methods</h2>
77<p class="toc_element">
Dan O'Mearadd494642020-05-01 07:42:23 -070078 <code><a href="#create">create(projectId, body=None, x__xgafv=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040079<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -040081 <code><a href="#get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040082<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
Dan O'Mearadd494642020-05-01 07:42:23 -070084 <code><a href="#launch">launch(projectId, body=None, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, location=None, gcsPath=None, validateOnly=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040085<p class="firstline">Launch a template.</p>
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070086<h3>Method Details</h3>
87<div class="method">
Dan O'Mearadd494642020-05-01 07:42:23 -070088 <code class="details" id="create">create(projectId, body=None, x__xgafv=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040089 <pre>Creates a Cloud Dataflow job from a template.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070090
91Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040092 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
Dan O'Mearadd494642020-05-01 07:42:23 -070093 body: object, The request body.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070094 The object takes the form of:
95
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040096{ # A request to create a Cloud Dataflow job from a template.
97 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -070098 "workerRegion": "A String", # The Compute Engine region
99 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
100 # which worker processing should occur, e.g. "us-west1". Mutually exclusive
101 # with worker_zone. If neither worker_region nor worker_zone is specified,
102 # default to the control plane's region.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400103 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
104 # template if not specified.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700105 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
106 # the service will use the network "default".
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400107 "zone": "A String", # The Compute Engine [availability
108 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400109 # for launching worker instances to run your pipeline.
Dan O'Mearadd494642020-05-01 07:42:23 -0700110 # In the future, worker_zone will take precedence.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700111 "additionalUserLabels": { # Additional user labels to be specified for the job.
112 # Keys and values should follow the restrictions specified in the [labeling
113 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
114 # page.
115 "a_key": "A String",
116 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700117 "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700118 "additionalExperiments": [ # Additional experiment flags for the job.
119 "A String",
120 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700121 "ipConfiguration": "A String", # Configuration for VM IPs.
Thomas Coffee2f245372017-03-27 10:39:26 -0700122 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
123 # Use with caution.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400124 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
125 # Must be a valid Cloud Storage URL, beginning with `gs://`.
126 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
Dan O'Mearadd494642020-05-01 07:42:23 -0700127 "kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.
128 # Key format is:
129 # projects/&lt;project&gt;/locations/&lt;location&gt;/keyRings/&lt;keyring&gt;/cryptoKeys/&lt;key&gt;
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400130 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
131 # available to your pipeline during execution, from 1 to 1000.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700132 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
133 # the form "regions/REGION/subnetworks/SUBNETWORK".
Dan O'Mearadd494642020-05-01 07:42:23 -0700134 "workerZone": "A String", # The Compute Engine zone
135 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
136 # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
137 # with worker_region. If neither worker_region nor worker_zone is specified,
138 # a zone in the control plane's region is chosen based on available capacity.
139 # If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800140 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400141 "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
142 # create the job.
143 # Must be a valid Cloud Storage URL, beginning with `gs://`.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700144 "location": "A String", # The [regional endpoint]
145 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
146 # which to direct the request.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400147 "parameters": { # The runtime parameters to pass to the job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700148 "a_key": "A String",
149 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400150 "jobName": "A String", # Required. The job name to use for the created job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700151 }
152
153 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400154 Allowed values
155 1 - v1 error format
156 2 - v2 error format
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700157
158Returns:
159 An object of the form:
160
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400161 { # Defines a job to be run by the Cloud Dataflow service.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700162 "labels": { # User-defined labels for this job.
163 #
164 # The labels map can contain no more than 64 entries. Entries of the labels
165 # map are UTF8 strings that comply with the following restrictions:
166 #
167 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
168 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
Dan O'Mearadd494642020-05-01 07:42:23 -0700169 # * Both keys and values are additionally constrained to be &lt;= 128 bytes in
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700170 # size.
171 "a_key": "A String",
172 },
173 "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
174 # by the metadata values provided here. Populated for ListJobs and all GetJob
175 # views SUMMARY and higher.
176 # ListJob response and Job SUMMARY view.
177 "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
178 "versionDisplayName": "A String", # A readable string describing the version of the SDK.
179 "version": "A String", # The version of the SDK used to run the job.
180 "sdkSupportStatus": "A String", # The support status for this SDK version.
181 },
182 "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
183 { # Metadata for a PubSub connector used by the job.
184 "topic": "A String", # Topic accessed in the connection.
185 "subscription": "A String", # Subscription used in the connection.
186 },
187 ],
188 "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
189 { # Metadata for a Datastore connector used by the job.
190 "projectId": "A String", # ProjectId accessed in the connection.
191 "namespace": "A String", # Namespace used in the connection.
192 },
193 ],
194 "fileDetails": [ # Identification of a File source used in the Dataflow job.
195 { # Metadata for a File connector used by the job.
196 "filePattern": "A String", # File Pattern used to access files by the connector.
197 },
198 ],
199 "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
200 { # Metadata for a Spanner connector used by the job.
201 "instanceId": "A String", # InstanceId accessed in the connection.
202 "projectId": "A String", # ProjectId accessed in the connection.
203 "databaseId": "A String", # DatabaseId accessed in the connection.
204 },
205 ],
206 "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
207 { # Metadata for a BigTable connector used by the job.
208 "instanceId": "A String", # InstanceId accessed in the connection.
209 "projectId": "A String", # ProjectId accessed in the connection.
210 "tableId": "A String", # TableId accessed in the connection.
211 },
212 ],
213 "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
214 { # Metadata for a BigQuery connector used by the job.
215 "projectId": "A String", # Project accessed in the connection.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700216 "query": "A String", # Query used to access data in the connection.
Dan O'Mearadd494642020-05-01 07:42:23 -0700217 "table": "A String", # Table accessed in the connection.
218 "dataset": "A String", # Dataset accessed in the connection.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700219 },
220 ],
221 },
222 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
223 # A description of the user pipeline and stages through which it is executed.
224 # Created by Cloud Dataflow service. Only retrieved with
225 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
226 # form. This data is provided by the Dataflow service for ease of visualizing
227 # the pipeline and interpreting Dataflow provided metrics.
228 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
229 { # Description of the type, names/ids, and input/outputs for a transform.
230 "kind": "A String", # Type of transform.
231 "name": "A String", # User provided name for this transform instance.
232 "inputCollectionName": [ # User names for all collection inputs to this transform.
233 "A String",
234 ],
235 "displayData": [ # Transform-specific display data.
236 { # Data provided with a pipeline or transform to provide descriptive info.
Dan O'Mearadd494642020-05-01 07:42:23 -0700237 "key": "A String", # The key identifying the display data.
238 # This is intended to be used as a label for the display data
239 # when viewed in a dax monitoring system.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700240 "shortStrValue": "A String", # A possible additional shorter value to display.
241 # For example a java_class_name_value of com.mypackage.MyDoFn
242 # will be stored with MyDoFn as the short_str_value and
243 # com.mypackage.MyDoFn as the java_class_name value.
244 # short_str_value can be displayed and java_class_name_value
245 # will be displayed as a tooltip.
Dan O'Mearadd494642020-05-01 07:42:23 -0700246 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700247 "url": "A String", # An optional full URL.
248 "floatValue": 3.14, # Contains value if the data is of float type.
249 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
250 # language namespace (i.e. python module) which defines the display data.
251 # This allows a dax monitoring system to specially handle the data
252 # and perform custom rendering.
253 "javaClassValue": "A String", # Contains value if the data is of java class type.
254 "label": "A String", # An optional label to display in a dax UI for the element.
255 "boolValue": True or False, # Contains value if the data is of a boolean type.
256 "strValue": "A String", # Contains value if the data is of string type.
Dan O'Mearadd494642020-05-01 07:42:23 -0700257 "durationValue": "A String", # Contains value if the data is of duration type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700258 "int64Value": "A String", # Contains value if the data is of int64 type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700259 },
260 ],
261 "outputCollectionName": [ # User names for all collection outputs to this transform.
262 "A String",
263 ],
264 "id": "A String", # SDK generated id of this transform instance.
265 },
266 ],
267 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
268 { # Description of the composing transforms, names/ids, and input/outputs of a
269 # stage of execution. Some composing transforms and sources may have been
270 # generated by the Dataflow service during execution planning.
271 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
272 { # Description of an interstitial value between transforms in an execution
273 # stage.
274 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
275 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
276 # source is most closely associated.
277 "name": "A String", # Dataflow service generated name for this source.
278 },
279 ],
280 "kind": "A String", # Type of tranform this stage is executing.
281 "name": "A String", # Dataflow service generated name for this stage.
282 "outputSource": [ # Output sources for this stage.
283 { # Description of an input or output of an execution stage.
284 "userName": "A String", # Human-readable name for this source; may be user or system generated.
285 "sizeBytes": "A String", # Size of the source, if measurable.
286 "name": "A String", # Dataflow service generated name for this source.
287 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
288 # source is most closely associated.
289 },
290 ],
291 "inputSource": [ # Input sources for this stage.
292 { # Description of an input or output of an execution stage.
293 "userName": "A String", # Human-readable name for this source; may be user or system generated.
294 "sizeBytes": "A String", # Size of the source, if measurable.
295 "name": "A String", # Dataflow service generated name for this source.
296 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
297 # source is most closely associated.
298 },
299 ],
300 "componentTransform": [ # Transforms that comprise this execution stage.
301 { # Description of a transform executed as part of an execution stage.
302 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
303 "originalTransform": "A String", # User name for the original user transform with which this transform is
304 # most closely associated.
305 "name": "A String", # Dataflow service generated name for this source.
306 },
307 ],
308 "id": "A String", # Dataflow service generated id for this stage.
309 },
310 ],
311 "displayData": [ # Pipeline level display data.
312 { # Data provided with a pipeline or transform to provide descriptive info.
Dan O'Mearadd494642020-05-01 07:42:23 -0700313 "key": "A String", # The key identifying the display data.
314 # This is intended to be used as a label for the display data
315 # when viewed in a dax monitoring system.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700316 "shortStrValue": "A String", # A possible additional shorter value to display.
317 # For example a java_class_name_value of com.mypackage.MyDoFn
318 # will be stored with MyDoFn as the short_str_value and
319 # com.mypackage.MyDoFn as the java_class_name value.
320 # short_str_value can be displayed and java_class_name_value
321 # will be displayed as a tooltip.
Dan O'Mearadd494642020-05-01 07:42:23 -0700322 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700323 "url": "A String", # An optional full URL.
324 "floatValue": 3.14, # Contains value if the data is of float type.
325 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
326 # language namespace (i.e. python module) which defines the display data.
327 # This allows a dax monitoring system to specially handle the data
328 # and perform custom rendering.
329 "javaClassValue": "A String", # Contains value if the data is of java class type.
330 "label": "A String", # An optional label to display in a dax UI for the element.
331 "boolValue": True or False, # Contains value if the data is of a boolean type.
332 "strValue": "A String", # Contains value if the data is of string type.
Dan O'Mearadd494642020-05-01 07:42:23 -0700333 "durationValue": "A String", # Contains value if the data is of duration type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700334 "int64Value": "A String", # Contains value if the data is of int64 type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700335 },
336 ],
337 },
338 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
339 # callers cannot mutate it.
340 { # A message describing the state of a particular execution stage.
341 "executionStageName": "A String", # The name of the execution stage.
342 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
343 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
344 },
345 ],
346 "id": "A String", # The unique ID of this job.
347 #
348 # This field is set by the Cloud Dataflow service when the Job is
349 # created, and is immutable for the life of the job.
350 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
351 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
352 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
353 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
354 # corresponding name prefixes of the new job.
355 "a_key": "A String",
356 },
357 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700358 "workerRegion": "A String", # The Compute Engine region
359 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
360 # which worker processing should occur, e.g. "us-west1". Mutually exclusive
361 # with worker_zone. If neither worker_region nor worker_zone is specified,
362 # default to the control plane's region.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700363 "version": { # A structure describing which components and their versions of the service
364 # are required in order to run the job.
365 "a_key": "", # Properties of the object.
366 },
367 "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
368 "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
369 # at rest, AKA a Customer Managed Encryption Key (CMEK).
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400370 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700371 # Format:
372 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
373 "internalExperiments": { # Experimental settings.
374 "a_key": "", # Properties of the object. Contains field @type with type URL.
375 },
376 "dataset": "A String", # The dataset for the current project where various workflow
377 # related tables are stored.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400378 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700379 # The supported resource type is:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400380 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700381 # Google BigQuery:
382 # bigquery.googleapis.com/{dataset}
383 "experiments": [ # The list of experiments to enable.
384 "A String",
385 ],
386 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
387 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
388 # options are passed through the service and are used to recreate the
389 # SDK pipeline options on the worker in a language agnostic and platform
390 # independent way.
391 "a_key": "", # Properties of the object.
392 },
393 "userAgent": { # A description of the process that generated the request.
394 "a_key": "", # Properties of the object.
395 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700396 "workerZone": "A String", # The Compute Engine zone
397 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
398 # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
399 # with worker_region. If neither worker_region nor worker_zone is specified,
400 # a zone in the control plane's region is chosen based on available capacity.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700401 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
402 # specified in order for the job to have workers.
403 { # Describes one particular pool of Cloud Dataflow workers to be
404 # instantiated by the Cloud Dataflow service in order to perform the
405 # computations required by a job. Note that a workflow job may use
406 # multiple pools, in order to match the various computational
407 # requirements of the various stages of the job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700408 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
409 # harness, residing in Google Container Registry.
410 #
411 # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
412 "ipConfiguration": "A String", # Configuration for VM IPs.
413 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
414 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
415 "algorithm": "A String", # The algorithm to use for autoscaling.
416 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700417 "diskSourceImage": "A String", # Fully qualified source image for disks.
Dan O'Mearadd494642020-05-01 07:42:23 -0700418 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
419 # the service will use the network "default".
420 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
421 # will attempt to choose a reasonable default.
422 "metadata": { # Metadata to set on the Google Compute Engine VMs.
423 "a_key": "A String",
424 },
425 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
426 # service will attempt to choose a reasonable default.
427 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
428 # Compute Engine API.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700429 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
430 # using the standard Dataflow task runner. Users should ignore
431 # this field.
432 "workflowFileName": "A String", # The file to store the workflow in.
433 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
434 # will not be uploaded.
435 #
436 # The supported resource type is:
437 #
438 # Google Cloud Storage:
439 # storage.googleapis.com/{bucket}/{object}
440 # bucket.storage.googleapis.com/{object}
441 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
Dan O'Mearadd494642020-05-01 07:42:23 -0700442 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
443 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
444 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
445 "vmId": "A String", # The ID string of the VM.
446 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
447 # taskrunner; e.g. "wheel".
448 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
449 # taskrunner; e.g. "root".
450 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
451 # access the Cloud Dataflow API.
452 "A String",
453 ],
454 "languageHint": "A String", # The suggested backend language.
455 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
456 # console.
457 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
458 "logDir": "A String", # The directory on the VM to store logs.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700459 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
460 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
461 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
462 # "shuffle/v1beta1".
463 "workerId": "A String", # The ID of the worker running this pipeline.
464 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
465 #
466 # When workers access Google Cloud APIs, they logically do so via
467 # relative URLs. If this field is specified, it supplies the base
468 # URL to use for resolving these relative URLs. The normative
469 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
470 # Locators".
471 #
472 # If not specified, the default value is "http://www.googleapis.com/"
473 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
474 # "dataflow/v1b3/projects".
475 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
476 # storage.
477 #
478 # The supported resource type is:
479 #
480 # Google Cloud Storage:
481 #
482 # storage.googleapis.com/{bucket}/{object}
483 # bucket.storage.googleapis.com/{object}
484 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700485 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
486 "harnessCommand": "A String", # The command to launch the worker harness.
487 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
488 # temporary storage.
489 #
490 # The supported resource type is:
491 #
492 # Google Cloud Storage:
493 # storage.googleapis.com/{bucket}/{object}
494 # bucket.storage.googleapis.com/{object}
Dan O'Mearadd494642020-05-01 07:42:23 -0700495 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
496 #
497 # When workers access Google Cloud APIs, they logically do so via
498 # relative URLs. If this field is specified, it supplies the base
499 # URL to use for resolving these relative URLs. The normative
500 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
501 # Locators".
502 #
503 # If not specified, the default value is "http://www.googleapis.com/"
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700504 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700505 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
506 # service will choose a number of threads (according to the number of cores
507 # on the selected machine type for batch, or 1 by convention for streaming).
508 "poolArgs": { # Extra arguments for this worker pool.
509 "a_key": "", # Properties of the object. Contains field @type with type URL.
510 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700511 "packages": [ # Packages to be installed on workers.
512 { # The packages that must be installed in order for a worker to run the
513 # steps of the Cloud Dataflow job that will be assigned to its worker
514 # pool.
515 #
516 # This is the mechanism by which the Cloud Dataflow SDK causes code to
517 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
518 # might use this to install jars containing the user's code and all of the
519 # various dependencies (libraries, data files, etc.) required in order
520 # for that code to run.
521 "location": "A String", # The resource to read the package from. The supported resource type is:
522 #
523 # Google Cloud Storage:
524 #
525 # storage.googleapis.com/{bucket}
526 # bucket.storage.googleapis.com/
527 "name": "A String", # The name of the package.
528 },
529 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700530 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
531 # select a default set of packages which are useful to worker
532 # harnesses written in a particular language.
533 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
534 # are supported.
535 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700536 # attempt to choose a reasonable default.
537 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
538 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
539 # `TEARDOWN_NEVER`.
540 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
541 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
542 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
543 # down.
544 #
545 # If the workers are not torn down by the service, they will
546 # continue to run and use Google Compute Engine VM resources in the
547 # user's project until they are explicitly terminated by the user.
548 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
549 # policy except for small, manually supervised test jobs.
550 #
551 # If unknown or unspecified, the service will attempt to choose a reasonable
552 # default.
Dan O'Mearadd494642020-05-01 07:42:23 -0700553 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
554 # attempt to choose a reasonable default.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700555 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
556 # execute the job. If zero or unspecified, the service will
557 # attempt to choose a reasonable default.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700558 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
559 # the form "regions/REGION/subnetworks/SUBNETWORK".
560 "dataDisks": [ # Data disks that are used by a VM in this workflow.
561 { # Describes the data disk used by a workflow job.
562 "mountPoint": "A String", # Directory in a VM where disk is mounted.
563 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
564 # attempt to choose a reasonable default.
565 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
566 # must be a disk type appropriate to the project and zone in which
567 # the workers will run. If unknown or unspecified, the service
568 # will attempt to choose a reasonable default.
569 #
570 # For example, the standard persistent disk type is a resource name
571 # typically ending in "pd-standard". If SSD persistent disks are
572 # available, the resource name typically ends with "pd-ssd". The
573 # actual valid values are defined the Google Compute Engine API,
574 # not by the Cloud Dataflow API; consult the Google Compute Engine
575 # documentation for more information about determining the set of
576 # available disk types for a particular project and zone.
577 #
578 # Google Compute Engine Disk types are local to a particular
579 # project in a particular zone, and so the resource name will
580 # typically look something like this:
581 #
582 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
583 },
584 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700585 "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
586 # only be set in the Fn API path. For non-cross-language pipelines this
587 # should have only one entry. Cross-language pipelines will have two or more
588 # entries.
589 { # Defines a SDK harness container for executing Dataflow pipelines.
590 "containerImage": "A String", # A docker container image that resides in Google Container Registry.
591 "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
592 # container instance with this image. If false (or unset) recommends using
593 # more than one core per SDK container instance with this image for
594 # efficiency. Note that Dataflow service may choose to override this property
595 # if needed.
596 },
597 ],
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700598 },
599 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700600 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
601 # unspecified, the service will attempt to choose a reasonable
602 # default. This should be in the form of the API service name,
603 # e.g. "compute.googleapis.com".
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700604 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
605 # storage. The system will append the suffix "/temp-{JOBNAME} to
606 # this resource prefix, where {JOBNAME} is the value of the
607 # job_name field. The resulting bucket and object prefix is used
608 # as the prefix of the resources used to store temporary data
609 # needed during the job execution. NOTE: This will override the
610 # value in taskrunner_settings.
611 # The supported resource type is:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400612 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700613 # Google Cloud Storage:
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400614 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700615 # storage.googleapis.com/{bucket}/{object}
616 # bucket.storage.googleapis.com/{object}
617 },
618 "location": "A String", # The [regional endpoint]
619 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
620 # contains this job.
621 "tempFiles": [ # A set of files the system should be aware of that are used
622 # for temporary storage. These temporary files will be
623 # removed on job completion.
624 # No duplicates are allowed.
625 # No file patterns are supported.
626 #
627 # The supported files are:
628 #
629 # Google Cloud Storage:
630 #
631 # storage.googleapis.com/{bucket}/{object}
632 # bucket.storage.googleapis.com/{object}
633 "A String",
634 ],
635 "type": "A String", # The type of Cloud Dataflow job.
636 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
637 # If this field is set, the service will ensure its uniqueness.
638 # The request to create a job will fail if the service has knowledge of a
639 # previously submitted job with the same client's ID and job name.
640 # The caller may use this field to ensure idempotence of job
641 # creation across retried attempts to create a job.
642 # By default, the field is empty and, in that case, the service ignores it.
643 "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
644 # snapshot.
645 "stepsLocation": "A String", # The GCS location where the steps are stored.
646 "currentStateTime": "A String", # The timestamp associated with the current state.
647 "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
648 # Flexible resource scheduling jobs are started with some delay after job
649 # creation, so start_time is unset before start and is updated when the
650 # job is started by the Cloud Dataflow service. For other jobs, start_time
651 # always equals to create_time and is immutable and set by the Cloud Dataflow
652 # service.
653 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
654 # Cloud Dataflow service.
655 "requestedState": "A String", # The job's requested state.
656 #
657 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
658 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
659 # also be used to directly set a job's requested state to
660 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
661 # job if it has not already reached a terminal state.
662 "name": "A String", # The user-specified Cloud Dataflow job name.
663 #
664 # Only one Job with a given name may exist in a project at any
665 # given time. If a caller attempts to create a Job with the same
666 # name as an already-existing Job, the attempt returns the
667 # existing Job.
668 #
669 # The name must match the regular expression
670 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
671 "steps": [ # Exactly one of step or steps_location should be specified.
672 #
673 # The top-level steps that constitute the entire job.
674 { # Defines a particular step within a Cloud Dataflow job.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -0400675 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700676 # A job consists of multiple steps, each of which performs some
677 # specific operation as part of the overall job. Data is typically
678 # passed from one step to another as part of the job.
679 #
680 # Here's an example of a sequence of steps which together implement a
681 # Map-Reduce job:
682 #
683 # * Read a collection of data from some source, parsing the
684 # collection's elements.
685 #
686 # * Validate the elements.
687 #
688 # * Apply a user-defined function to map each element to some value
689 # and extract an element-specific key value.
690 #
691 # * Group elements with the same key into a single element with
692 # that key, transforming a multiply-keyed collection into a
693 # uniquely-keyed collection.
694 #
695 # * Write the elements out to some data sink.
696 #
697 # Note that the Cloud Dataflow service may be used to run many different
698 # types of jobs, not just Map-Reduce.
699 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700700 "name": "A String", # The name that identifies the step. This must be unique for each
701 # step with respect to all other steps in the Cloud Dataflow job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700702 "properties": { # Named properties associated with the step. Each kind of
703 # predefined step has its own required set of properties.
704 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
705 "a_key": "", # Properties of the object.
706 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700707 },
708 ],
709 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
710 # of the job it replaced.
711 #
712 # When sending a `CreateJobRequest`, you can update a job by specifying it
713 # here. The job named here is stopped, and its intermediate state is
714 # transferred to this job.
715 "currentState": "A String", # The current state of the job.
716 #
717 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
718 # specified.
719 #
720 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
721 # terminal state. After a job has reached a terminal state, no
722 # further state updates may be made.
723 #
724 # This field may be mutated by the Cloud Dataflow service;
725 # callers cannot mutate it.
726 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
727 # isn't contained in the submitted job.
728 "stages": { # A mapping from each stage to the information about that stage.
729 "a_key": { # Contains information about how a particular
730 # google.dataflow.v1beta3.Step will be executed.
731 "stepName": [ # The steps associated with the execution stage.
732 # Note that stages may have several steps, and that a given step
733 # might be run by more than one stage.
734 "A String",
735 ],
736 },
737 },
738 },
739 }</pre>
740</div>
741
742<div class="method">
743 <code class="details" id="get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</code>
744 <pre>Get the template associated with a template.
745
746Args:
747 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
748 gcsPath: string, Required. A Cloud Storage path to the template from which to
749create the job.
750Must be valid Cloud Storage URL, beginning with 'gs://'.
751 location: string, The [regional endpoint]
752(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
753which to direct the request.
754 x__xgafv: string, V1 error format.
755 Allowed values
756 1 - v1 error format
757 2 - v2 error format
758 view: string, The view to retrieve. Defaults to METADATA_ONLY.
759
760Returns:
761 An object of the form:
762
763 { # The response to a GetTemplate request.
764 "status": { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the
765 # request will be indicated in the error_details.
766 # different programming environments, including REST APIs and RPC APIs. It is
Dan O'Mearadd494642020-05-01 07:42:23 -0700767 # used by [gRPC](https://github.com/grpc). Each `Status` message contains
768 # three pieces of data: error code, error message, and error details.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700769 #
Dan O'Mearadd494642020-05-01 07:42:23 -0700770 # You can find out more about this error model and how to work with it in the
771 # [API Design Guide](https://cloud.google.com/apis/design/errors).
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700772 "message": "A String", # A developer-facing error message, which should be in English. Any
773 # user-facing error message should be localized and sent in the
774 # google.rpc.Status.details field, or localized by the client.
775 "code": 42, # The status code, which should be an enum value of google.rpc.Code.
776 "details": [ # A list of messages that carry the error details. There is a common set of
777 # message types for APIs to use.
778 {
779 "a_key": "", # Properties of the object. Contains field @type with type URL.
780 },
781 ],
782 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700783 "templateType": "A String", # Template Type.
784 "runtimeMetadata": { # RuntimeMetadata describing a runtime environment. # Describes the runtime metadata with SDKInfo and available parameters.
785 "parameters": [ # The parameters for the template.
786 { # Metadata for a specific parameter.
787 "name": "A String", # Required. The name of the parameter.
788 "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
789 "regexes": [ # Optional. Regexes that the parameter must match.
790 "A String",
791 ],
792 "label": "A String", # Required. The label to display for the parameter.
793 "helpText": "A String", # Required. The help text to display for the parameter.
794 "paramType": "A String", # Optional. The type of the parameter.
795 # Used for selecting input picker.
796 },
797 ],
798 "sdkInfo": { # SDK Information. # SDK Info for the template.
799 "version": "A String", # Optional. The SDK version.
800 "language": "A String", # Required. The SDK Language.
801 },
802 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700803 "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
804 # parameters, etc.
805 "name": "A String", # Required. The name of the template.
806 "parameters": [ # The parameters for the template.
807 { # Metadata for a specific parameter.
Dan O'Mearadd494642020-05-01 07:42:23 -0700808 "name": "A String", # Required. The name of the parameter.
809 "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700810 "regexes": [ # Optional. Regexes that the parameter must match.
811 "A String",
812 ],
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700813 "label": "A String", # Required. The label to display for the parameter.
Dan O'Mearadd494642020-05-01 07:42:23 -0700814 "helpText": "A String", # Required. The help text to display for the parameter.
815 "paramType": "A String", # Optional. The type of the parameter.
816 # Used for selecting input picker.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700817 },
818 ],
819 "description": "A String", # Optional. A description of the template.
820 },
821 }</pre>
822</div>
823
824<div class="method">
Dan O'Mearadd494642020-05-01 07:42:23 -0700825 <code class="details" id="launch">launch(projectId, body=None, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, location=None, gcsPath=None, validateOnly=None)</code>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700826 <pre>Launch a template.
827
828Args:
829 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
Dan O'Mearadd494642020-05-01 07:42:23 -0700830 body: object, The request body.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700831 The object takes the form of:
832
833{ # Parameters to provide to the template being launched.
834 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -0700835 "workerRegion": "A String", # The Compute Engine region
836 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
837 # which worker processing should occur, e.g. "us-west1". Mutually exclusive
838 # with worker_zone. If neither worker_region nor worker_zone is specified,
839 # default to the control plane's region.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700840 "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
841 # template if not specified.
842 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
843 # the service will use the network "default".
844 "zone": "A String", # The Compute Engine [availability
845 # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
846 # for launching worker instances to run your pipeline.
Dan O'Mearadd494642020-05-01 07:42:23 -0700847 # In the future, worker_zone will take precedence.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700848 "additionalUserLabels": { # Additional user labels to be specified for the job.
849 # Keys and values should follow the restrictions specified in the [labeling
850 # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
851 # page.
852 "a_key": "A String",
853 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700854 "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700855 "additionalExperiments": [ # Additional experiment flags for the job.
856 "A String",
857 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700858 "ipConfiguration": "A String", # Configuration for VM IPs.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700859 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
860 # Use with caution.
861 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
862 # Must be a valid Cloud Storage URL, beginning with `gs://`.
863 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
Dan O'Mearadd494642020-05-01 07:42:23 -0700864 "kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.
865 # Key format is:
866 # projects/&lt;project&gt;/locations/&lt;location&gt;/keyRings/&lt;keyring&gt;/cryptoKeys/&lt;key&gt;
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700867 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
868 # available to your pipeline during execution, from 1 to 1000.
869 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
870 # the form "regions/REGION/subnetworks/SUBNETWORK".
Dan O'Mearadd494642020-05-01 07:42:23 -0700871 "workerZone": "A String", # The Compute Engine zone
872 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
873 # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
874 # with worker_region. If neither worker_region nor worker_zone is specified,
875 # a zone in the control plane's region is chosen based on available capacity.
876 # If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700877 },
Dan O'Mearadd494642020-05-01 07:42:23 -0700878 "transformNameMapping": { # Only applicable when updating a pipeline. Map of transform name prefixes of
879 # the job to be replaced to the corresponding name prefixes of the new job.
880 "a_key": "A String",
881 },
882 "update": True or False, # If set, replace the existing pipeline with the name specified by jobName
883 # with this pipeline, preserving state.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700884 "parameters": { # The runtime parameters to pass to the job.
885 "a_key": "A String",
886 },
887 "jobName": "A String", # Required. The job name to use for the created job.
888 }
889
890 dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.
891The file must be a Json serialized DynamicTemplateFieSpec object.
892 x__xgafv: string, V1 error format.
893 Allowed values
894 1 - v1 error format
895 2 - v2 error format
896 dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.
897Must be a valid Cloud Storage URL, beginning with `gs://`.
898 location: string, The [regional endpoint]
899(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
900which to direct the request.
901 gcsPath: string, A Cloud Storage path to the template from which to create
902the job.
903Must be valid Cloud Storage URL, beginning with 'gs://'.
904 validateOnly: boolean, If true, the request is validated but not actually executed.
905Defaults to false.
906
907Returns:
908 An object of the form:
909
910 { # Response to the request to launch a template.
911 "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
912 # the job was successfully launched.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400913 "labels": { # User-defined labels for this job.
914 #
915 # The labels map can contain no more than 64 entries. Entries of the labels
916 # map are UTF8 strings that comply with the following restrictions:
917 #
918 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
919 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
Dan O'Mearadd494642020-05-01 07:42:23 -0700920 # * Both keys and values are additionally constrained to be &lt;= 128 bytes in
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400921 # size.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700922 "a_key": "A String",
923 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700924 "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
925 # by the metadata values provided here. Populated for ListJobs and all GetJob
926 # views SUMMARY and higher.
927 # ListJob response and Job SUMMARY view.
928 "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
929 "versionDisplayName": "A String", # A readable string describing the version of the SDK.
930 "version": "A String", # The version of the SDK used to run the job.
931 "sdkSupportStatus": "A String", # The support status for this SDK version.
932 },
933 "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
934 { # Metadata for a PubSub connector used by the job.
935 "topic": "A String", # Topic accessed in the connection.
936 "subscription": "A String", # Subscription used in the connection.
937 },
938 ],
939 "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
940 { # Metadata for a Datastore connector used by the job.
941 "projectId": "A String", # ProjectId accessed in the connection.
942 "namespace": "A String", # Namespace used in the connection.
943 },
944 ],
945 "fileDetails": [ # Identification of a File source used in the Dataflow job.
946 { # Metadata for a File connector used by the job.
947 "filePattern": "A String", # File Pattern used to access files by the connector.
948 },
949 ],
950 "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
951 { # Metadata for a Spanner connector used by the job.
952 "instanceId": "A String", # InstanceId accessed in the connection.
953 "projectId": "A String", # ProjectId accessed in the connection.
954 "databaseId": "A String", # DatabaseId accessed in the connection.
955 },
956 ],
957 "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
958 { # Metadata for a BigTable connector used by the job.
959 "instanceId": "A String", # InstanceId accessed in the connection.
960 "projectId": "A String", # ProjectId accessed in the connection.
961 "tableId": "A String", # TableId accessed in the connection.
962 },
963 ],
964 "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
965 { # Metadata for a BigQuery connector used by the job.
966 "projectId": "A String", # Project accessed in the connection.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700967 "query": "A String", # Query used to access data in the connection.
Dan O'Mearadd494642020-05-01 07:42:23 -0700968 "table": "A String", # Table accessed in the connection.
969 "dataset": "A String", # Dataset accessed in the connection.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700970 },
971 ],
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700972 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700973 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
974 # A description of the user pipeline and stages through which it is executed.
975 # Created by Cloud Dataflow service. Only retrieved with
976 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
977 # form. This data is provided by the Dataflow service for ease of visualizing
978 # the pipeline and interpreting Dataflow provided metrics.
979 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
980 { # Description of the type, names/ids, and input/outputs for a transform.
981 "kind": "A String", # Type of transform.
982 "name": "A String", # User provided name for this transform instance.
983 "inputCollectionName": [ # User names for all collection inputs to this transform.
984 "A String",
985 ],
986 "displayData": [ # Transform-specific display data.
987 { # Data provided with a pipeline or transform to provide descriptive info.
Dan O'Mearadd494642020-05-01 07:42:23 -0700988 "key": "A String", # The key identifying the display data.
989 # This is intended to be used as a label for the display data
990 # when viewed in a dax monitoring system.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700991 "shortStrValue": "A String", # A possible additional shorter value to display.
992 # For example a java_class_name_value of com.mypackage.MyDoFn
993 # will be stored with MyDoFn as the short_str_value and
994 # com.mypackage.MyDoFn as the java_class_name value.
995 # short_str_value can be displayed and java_class_name_value
996 # will be displayed as a tooltip.
Dan O'Mearadd494642020-05-01 07:42:23 -0700997 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700998 "url": "A String", # An optional full URL.
999 "floatValue": 3.14, # Contains value if the data is of float type.
1000 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1001 # language namespace (i.e. python module) which defines the display data.
1002 # This allows a dax monitoring system to specially handle the data
1003 # and perform custom rendering.
1004 "javaClassValue": "A String", # Contains value if the data is of java class type.
1005 "label": "A String", # An optional label to display in a dax UI for the element.
1006 "boolValue": True or False, # Contains value if the data is of a boolean type.
1007 "strValue": "A String", # Contains value if the data is of string type.
Dan O'Mearadd494642020-05-01 07:42:23 -07001008 "durationValue": "A String", # Contains value if the data is of duration type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001009 "int64Value": "A String", # Contains value if the data is of int64 type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001010 },
1011 ],
1012 "outputCollectionName": [ # User names for all collection outputs to this transform.
1013 "A String",
1014 ],
1015 "id": "A String", # SDK generated id of this transform instance.
1016 },
1017 ],
1018 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1019 { # Description of the composing transforms, names/ids, and input/outputs of a
1020 # stage of execution. Some composing transforms and sources may have been
1021 # generated by the Dataflow service during execution planning.
1022 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1023 { # Description of an interstitial value between transforms in an execution
1024 # stage.
1025 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1026 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1027 # source is most closely associated.
1028 "name": "A String", # Dataflow service generated name for this source.
1029 },
1030 ],
1031 "kind": "A String", # Type of tranform this stage is executing.
1032 "name": "A String", # Dataflow service generated name for this stage.
1033 "outputSource": [ # Output sources for this stage.
1034 { # Description of an input or output of an execution stage.
1035 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1036 "sizeBytes": "A String", # Size of the source, if measurable.
1037 "name": "A String", # Dataflow service generated name for this source.
1038 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1039 # source is most closely associated.
1040 },
1041 ],
1042 "inputSource": [ # Input sources for this stage.
1043 { # Description of an input or output of an execution stage.
1044 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1045 "sizeBytes": "A String", # Size of the source, if measurable.
1046 "name": "A String", # Dataflow service generated name for this source.
1047 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1048 # source is most closely associated.
1049 },
1050 ],
1051 "componentTransform": [ # Transforms that comprise this execution stage.
1052 { # Description of a transform executed as part of an execution stage.
1053 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1054 "originalTransform": "A String", # User name for the original user transform with which this transform is
1055 # most closely associated.
1056 "name": "A String", # Dataflow service generated name for this source.
1057 },
1058 ],
1059 "id": "A String", # Dataflow service generated id for this stage.
1060 },
1061 ],
1062 "displayData": [ # Pipeline level display data.
1063 { # Data provided with a pipeline or transform to provide descriptive info.
Dan O'Mearadd494642020-05-01 07:42:23 -07001064 "key": "A String", # The key identifying the display data.
1065 # This is intended to be used as a label for the display data
1066 # when viewed in a dax monitoring system.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001067 "shortStrValue": "A String", # A possible additional shorter value to display.
1068 # For example a java_class_name_value of com.mypackage.MyDoFn
1069 # will be stored with MyDoFn as the short_str_value and
1070 # com.mypackage.MyDoFn as the java_class_name value.
1071 # short_str_value can be displayed and java_class_name_value
1072 # will be displayed as a tooltip.
Dan O'Mearadd494642020-05-01 07:42:23 -07001073 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001074 "url": "A String", # An optional full URL.
1075 "floatValue": 3.14, # Contains value if the data is of float type.
1076 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1077 # language namespace (i.e. python module) which defines the display data.
1078 # This allows a dax monitoring system to specially handle the data
1079 # and perform custom rendering.
1080 "javaClassValue": "A String", # Contains value if the data is of java class type.
1081 "label": "A String", # An optional label to display in a dax UI for the element.
1082 "boolValue": True or False, # Contains value if the data is of a boolean type.
1083 "strValue": "A String", # Contains value if the data is of string type.
Dan O'Mearadd494642020-05-01 07:42:23 -07001084 "durationValue": "A String", # Contains value if the data is of duration type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001085 "int64Value": "A String", # Contains value if the data is of int64 type.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001086 },
1087 ],
1088 },
1089 "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1090 # callers cannot mutate it.
1091 { # A message describing the state of a particular execution stage.
1092 "executionStageName": "A String", # The name of the execution stage.
1093 "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1094 "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1095 },
1096 ],
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001097 "id": "A String", # The unique ID of this job.
1098 #
1099 # This field is set by the Cloud Dataflow service when the Job is
1100 # created, and is immutable for the life of the job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001101 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1102 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1103 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1104 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1105 # corresponding name prefixes of the new job.
1106 "a_key": "A String",
1107 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001108 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
Dan O'Mearadd494642020-05-01 07:42:23 -07001109 "workerRegion": "A String", # The Compute Engine region
1110 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
1111 # which worker processing should occur, e.g. "us-west1". Mutually exclusive
1112 # with worker_zone. If neither worker_region nor worker_zone is specified,
1113 # default to the control plane's region.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001114 "version": { # A structure describing which components and their versions of the service
1115 # are required in order to run the job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001116 "a_key": "", # Properties of the object.
1117 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001118 "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
1119 "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
1120 # at rest, AKA a Customer Managed Encryption Key (CMEK).
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001121 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001122 # Format:
1123 # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001124 "internalExperiments": { # Experimental settings.
1125 "a_key": "", # Properties of the object. Contains field @type with type URL.
1126 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001127 "dataset": "A String", # The dataset for the current project where various workflow
1128 # related tables are stored.
1129 #
1130 # The supported resource type is:
1131 #
1132 # Google BigQuery:
1133 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001134 "experiments": [ # The list of experiments to enable.
1135 "A String",
1136 ],
Sai Cheemalapatiea3a5e12016-10-12 14:05:53 -07001137 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001138 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1139 # options are passed through the service and are used to recreate the
1140 # SDK pipeline options on the worker in a language agnostic and platform
1141 # independent way.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001142 "a_key": "", # Properties of the object.
1143 },
1144 "userAgent": { # A description of the process that generated the request.
1145 "a_key": "", # Properties of the object.
1146 },
Dan O'Mearadd494642020-05-01 07:42:23 -07001147 "workerZone": "A String", # The Compute Engine zone
1148 # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
1149 # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
1150 # with worker_region. If neither worker_region nor worker_zone is specified,
1151 # a zone in the control plane's region is chosen based on available capacity.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001152 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1153 # specified in order for the job to have workers.
1154 { # Describes one particular pool of Cloud Dataflow workers to be
1155 # instantiated by the Cloud Dataflow service in order to perform the
1156 # computations required by a job. Note that a workflow job may use
1157 # multiple pools, in order to match the various computational
1158 # requirements of the various stages of the job.
Dan O'Mearadd494642020-05-01 07:42:23 -07001159 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1160 # harness, residing in Google Container Registry.
1161 #
1162 # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
1163 "ipConfiguration": "A String", # Configuration for VM IPs.
1164 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1165 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1166 "algorithm": "A String", # The algorithm to use for autoscaling.
1167 },
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001168 "diskSourceImage": "A String", # Fully qualified source image for disks.
Dan O'Mearadd494642020-05-01 07:42:23 -07001169 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1170 # the service will use the network "default".
1171 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1172 # will attempt to choose a reasonable default.
1173 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1174 "a_key": "A String",
1175 },
1176 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1177 # service will attempt to choose a reasonable default.
1178 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1179 # Compute Engine API.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001180 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1181 # using the standard Dataflow task runner. Users should ignore
1182 # this field.
1183 "workflowFileName": "A String", # The file to store the workflow in.
1184 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1185 # will not be uploaded.
1186 #
1187 # The supported resource type is:
1188 #
1189 # Google Cloud Storage:
1190 # storage.googleapis.com/{bucket}/{object}
1191 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001192 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
Dan O'Mearadd494642020-05-01 07:42:23 -07001193 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1194 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1195 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1196 "vmId": "A String", # The ID string of the VM.
1197 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1198 # taskrunner; e.g. "wheel".
1199 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1200 # taskrunner; e.g. "root".
1201 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1202 # access the Cloud Dataflow API.
1203 "A String",
1204 ],
1205 "languageHint": "A String", # The suggested backend language.
1206 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1207 # console.
1208 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1209 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001210 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1211 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1212 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1213 # "shuffle/v1beta1".
1214 "workerId": "A String", # The ID of the worker running this pipeline.
1215 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1216 #
1217 # When workers access Google Cloud APIs, they logically do so via
1218 # relative URLs. If this field is specified, it supplies the base
1219 # URL to use for resolving these relative URLs. The normative
1220 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1221 # Locators".
1222 #
1223 # If not specified, the default value is "http://www.googleapis.com/"
1224 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1225 # "dataflow/v1b3/projects".
1226 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1227 # storage.
1228 #
1229 # The supported resource type is:
1230 #
1231 # Google Cloud Storage:
1232 #
1233 # storage.googleapis.com/{bucket}/{object}
1234 # bucket.storage.googleapis.com/{object}
1235 },
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001236 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001237 "harnessCommand": "A String", # The command to launch the worker harness.
1238 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1239 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001240 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001241 # The supported resource type is:
1242 #
1243 # Google Cloud Storage:
1244 # storage.googleapis.com/{bucket}/{object}
1245 # bucket.storage.googleapis.com/{object}
Dan O'Mearadd494642020-05-01 07:42:23 -07001246 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1247 #
1248 # When workers access Google Cloud APIs, they logically do so via
1249 # relative URLs. If this field is specified, it supplies the base
1250 # URL to use for resolving these relative URLs. The normative
1251 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1252 # Locators".
1253 #
1254 # If not specified, the default value is "http://www.googleapis.com/"
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001255 },
Dan O'Mearadd494642020-05-01 07:42:23 -07001256 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1257 # service will choose a number of threads (according to the number of cores
1258 # on the selected machine type for batch, or 1 by convention for streaming).
1259 "poolArgs": { # Extra arguments for this worker pool.
1260 "a_key": "", # Properties of the object. Contains field @type with type URL.
1261 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001262 "packages": [ # Packages to be installed on workers.
1263 { # The packages that must be installed in order for a worker to run the
1264 # steps of the Cloud Dataflow job that will be assigned to its worker
1265 # pool.
1266 #
1267 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1268 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1269 # might use this to install jars containing the user's code and all of the
1270 # various dependencies (libraries, data files, etc.) required in order
1271 # for that code to run.
1272 "location": "A String", # The resource to read the package from. The supported resource type is:
1273 #
1274 # Google Cloud Storage:
1275 #
1276 # storage.googleapis.com/{bucket}
1277 # bucket.storage.googleapis.com/
1278 "name": "A String", # The name of the package.
1279 },
1280 ],
Dan O'Mearadd494642020-05-01 07:42:23 -07001281 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1282 # select a default set of packages which are useful to worker
1283 # harnesses written in a particular language.
1284 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1285 # are supported.
1286 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001287 # attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001288 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1289 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1290 # `TEARDOWN_NEVER`.
1291 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1292 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1293 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1294 # down.
1295 #
1296 # If the workers are not torn down by the service, they will
1297 # continue to run and use Google Compute Engine VM resources in the
1298 # user's project until they are explicitly terminated by the user.
1299 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1300 # policy except for small, manually supervised test jobs.
1301 #
1302 # If unknown or unspecified, the service will attempt to choose a reasonable
1303 # default.
Dan O'Mearadd494642020-05-01 07:42:23 -07001304 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1305 # attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001306 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1307 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001308 # attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001309 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1310 # the form "regions/REGION/subnetworks/SUBNETWORK".
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001311 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1312 { # Describes the data disk used by a workflow job.
1313 "mountPoint": "A String", # Directory in a VM where disk is mounted.
1314 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1315 # attempt to choose a reasonable default.
1316 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1317 # must be a disk type appropriate to the project and zone in which
1318 # the workers will run. If unknown or unspecified, the service
1319 # will attempt to choose a reasonable default.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001320 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001321 # For example, the standard persistent disk type is a resource name
1322 # typically ending in "pd-standard". If SSD persistent disks are
1323 # available, the resource name typically ends with "pd-ssd". The
1324 # actual valid values are defined the Google Compute Engine API,
1325 # not by the Cloud Dataflow API; consult the Google Compute Engine
1326 # documentation for more information about determining the set of
1327 # available disk types for a particular project and zone.
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001328 #
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001329 # Google Compute Engine Disk types are local to a particular
1330 # project in a particular zone, and so the resource name will
1331 # typically look something like this:
1332 #
1333 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Sai Cheemalapati4ba8c232017-06-06 18:46:08 -04001334 },
1335 ],
Dan O'Mearadd494642020-05-01 07:42:23 -07001336 "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
1337 # only be set in the Fn API path. For non-cross-language pipelines this
1338 # should have only one entry. Cross-language pipelines will have two or more
1339 # entries.
1340 { # Defines a SDK harness container for executing Dataflow pipelines.
1341 "containerImage": "A String", # A docker container image that resides in Google Container Registry.
1342 "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
1343 # container instance with this image. If false (or unset) recommends using
1344 # more than one core per SDK container instance with this image for
1345 # efficiency. Note that Dataflow service may choose to override this property
1346 # if needed.
1347 },
1348 ],
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001349 },
1350 ],
Dan O'Mearadd494642020-05-01 07:42:23 -07001351 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1352 # unspecified, the service will attempt to choose a reasonable
1353 # default. This should be in the form of the API service name,
1354 # e.g. "compute.googleapis.com".
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001355 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1356 # storage. The system will append the suffix "/temp-{JOBNAME} to
1357 # this resource prefix, where {JOBNAME} is the value of the
1358 # job_name field. The resulting bucket and object prefix is used
1359 # as the prefix of the resources used to store temporary data
1360 # needed during the job execution. NOTE: This will override the
1361 # value in taskrunner_settings.
1362 # The supported resource type is:
1363 #
1364 # Google Cloud Storage:
1365 #
1366 # storage.googleapis.com/{bucket}/{object}
1367 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001368 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001369 "location": "A String", # The [regional endpoint]
1370 # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1371 # contains this job.
1372 "tempFiles": [ # A set of files the system should be aware of that are used
1373 # for temporary storage. These temporary files will be
1374 # removed on job completion.
1375 # No duplicates are allowed.
1376 # No file patterns are supported.
1377 #
1378 # The supported files are:
1379 #
1380 # Google Cloud Storage:
1381 #
1382 # storage.googleapis.com/{bucket}/{object}
1383 # bucket.storage.googleapis.com/{object}
1384 "A String",
1385 ],
1386 "type": "A String", # The type of Cloud Dataflow job.
1387 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1388 # If this field is set, the service will ensure its uniqueness.
1389 # The request to create a job will fail if the service has knowledge of a
1390 # previously submitted job with the same client's ID and job name.
1391 # The caller may use this field to ensure idempotence of job
1392 # creation across retried attempts to create a job.
1393 # By default, the field is empty and, in that case, the service ignores it.
1394 "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1395 # snapshot.
1396 "stepsLocation": "A String", # The GCS location where the steps are stored.
1397 "currentStateTime": "A String", # The timestamp associated with the current state.
1398 "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1399 # Flexible resource scheduling jobs are started with some delay after job
1400 # creation, so start_time is unset before start and is updated when the
1401 # job is started by the Cloud Dataflow service. For other jobs, start_time
1402 # always equals to create_time and is immutable and set by the Cloud Dataflow
1403 # service.
1404 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1405 # Cloud Dataflow service.
1406 "requestedState": "A String", # The job's requested state.
1407 #
1408 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1409 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1410 # also be used to directly set a job's requested state to
1411 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1412 # job if it has not already reached a terminal state.
1413 "name": "A String", # The user-specified Cloud Dataflow job name.
1414 #
1415 # Only one Job with a given name may exist in a project at any
1416 # given time. If a caller attempts to create a Job with the same
1417 # name as an already-existing Job, the attempt returns the
1418 # existing Job.
1419 #
1420 # The name must match the regular expression
1421 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1422 "steps": [ # Exactly one of step or steps_location should be specified.
1423 #
1424 # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001425 { # Defines a particular step within a Cloud Dataflow job.
1426 #
1427 # A job consists of multiple steps, each of which performs some
1428 # specific operation as part of the overall job. Data is typically
1429 # passed from one step to another as part of the job.
1430 #
1431 # Here's an example of a sequence of steps which together implement a
1432 # Map-Reduce job:
1433 #
1434 # * Read a collection of data from some source, parsing the
1435 # collection's elements.
1436 #
1437 # * Validate the elements.
1438 #
1439 # * Apply a user-defined function to map each element to some value
1440 # and extract an element-specific key value.
1441 #
1442 # * Group elements with the same key into a single element with
1443 # that key, transforming a multiply-keyed collection into a
1444 # uniquely-keyed collection.
1445 #
1446 # * Write the elements out to some data sink.
1447 #
1448 # Note that the Cloud Dataflow service may be used to run many different
1449 # types of jobs, not just Map-Reduce.
1450 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Dan O'Mearadd494642020-05-01 07:42:23 -07001451 "name": "A String", # The name that identifies the step. This must be unique for each
1452 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001453 "properties": { # Named properties associated with the step. Each kind of
1454 # predefined step has its own required set of properties.
1455 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001456 "a_key": "", # Properties of the object.
1457 },
1458 },
1459 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07001460 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1461 # of the job it replaced.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001462 #
Thomas Coffee2f245372017-03-27 10:39:26 -07001463 # When sending a `CreateJobRequest`, you can update a job by specifying it
1464 # here. The job named here is stopped, and its intermediate state is
1465 # transferred to this job.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001466 "currentState": "A String", # The current state of the job.
1467 #
1468 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1469 # specified.
1470 #
1471 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1472 # terminal state. After a job has reached a terminal state, no
1473 # further state updates may be made.
1474 #
1475 # This field may be mutated by the Cloud Dataflow service;
1476 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001477 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1478 # isn't contained in the submitted job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001479 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001480 "a_key": { # Contains information about how a particular
1481 # google.dataflow.v1beta3.Step will be executed.
1482 "stepName": [ # The steps associated with the execution stage.
1483 # Note that stages may have several steps, and that a given step
1484 # might be run by more than one stage.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001485 "A String",
1486 ],
1487 },
1488 },
1489 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001490 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001491 }</pre>
1492</div>
1493
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001494</body></html>