blob: 197f0c99427d94ca0df948d6a176d409d7942e76 [file] [log] [blame]
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.templates.html">templates</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78 <code><a href="#create">create(projectId, body, x__xgafv=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040079<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
81 <code><a href="#get">get(projectId, gcsPath=None, x__xgafv=None, view=None)</a></code></p>
82<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
84 <code><a href="#launch">launch(projectId, body, dryRun=None, gcsPath=None, x__xgafv=None)</a></code></p>
85<p class="firstline">Launch a template.</p>
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070086<h3>Method Details</h3>
87<div class="method">
88 <code class="details" id="create">create(projectId, body, x__xgafv=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040089 <pre>Creates a Cloud Dataflow job from a template.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070090
91Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040092 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070093 body: object, The request body. (required)
94 The object takes the form of:
95
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040096{ # A request to create a Cloud Dataflow job from a template.
97 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
98 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
99 # Use with caution.
100 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
101 # Must be a valid Cloud Storage URL, beginning with `gs://`.
102 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
103 "zone": "A String", # The Compute Engine [availability zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
104 # for launching worker instances to run your pipeline.
105 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
106 # available to your pipeline during execution, from 1 to 1000.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800107 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400108 "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
109 # create the job.
110 # Must be a valid Cloud Storage URL, beginning with `gs://`.
111 "parameters": { # The runtime parameters to pass to the job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700112 "a_key": "A String",
113 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400114 "jobName": "A String", # Required. The job name to use for the created job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700115 }
116
117 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400118 Allowed values
119 1 - v1 error format
120 2 - v2 error format
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700121
122Returns:
123 An object of the form:
124
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400125 { # Defines a job to be run by the Cloud Dataflow service.
126 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
127 # If this field is set, the service will ensure its uniqueness.
128 # The request to create a job will fail if the service has knowledge of a
129 # previously submitted job with the same client's ID and job name.
130 # The caller may use this field to ensure idempotence of job
131 # creation across retried attempts to create a job.
132 # By default, the field is empty and, in that case, the service ignores it.
133 "requestedState": "A String", # The job's requested state.
134 #
135 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
136 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
137 # also be used to directly set a job's requested state to
138 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
139 # job if it has not already reached a terminal state.
140 "name": "A String", # The user-specified Cloud Dataflow job name.
141 #
142 # Only one Job with a given name may exist in a project at any
143 # given time. If a caller attempts to create a Job with the same
144 # name as an already-existing Job, the attempt returns the
145 # existing Job.
146 #
147 # The name must match the regular expression
148 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
149 "currentStateTime": "A String", # The timestamp associated with the current state.
150 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
151 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
152 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
153 "labels": { # User-defined labels for this job.
154 #
155 # The labels map can contain no more than 64 entries. Entries of the labels
156 # map are UTF8 strings that comply with the following restrictions:
157 #
158 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
159 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
160 # * Both keys and values are additionally constrained to be <= 128 bytes in
161 # size.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700162 "a_key": "A String",
163 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400164 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
165 # corresponding name prefixes of the new job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700166 "a_key": "A String",
167 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700168 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
169 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400170 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
171 "version": { # A structure describing which components and their versions of the service
172 # are required in order to run the job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700173 "a_key": "", # Properties of the object.
174 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400175 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
176 # storage. The system will append the suffix "/temp-{JOBNAME} to
177 # this resource prefix, where {JOBNAME} is the value of the
178 # job_name field. The resulting bucket and object prefix is used
179 # as the prefix of the resources used to store temporary data
180 # needed during the job execution. NOTE: This will override the
181 # value in taskrunner_settings.
182 # The supported resource type is:
183 #
184 # Google Cloud Storage:
185 #
186 # storage.googleapis.com/{bucket}/{object}
187 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700188 "internalExperiments": { # Experimental settings.
189 "a_key": "", # Properties of the object. Contains field @type with type URL.
190 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400191 "dataset": "A String", # The dataset for the current project where various workflow
192 # related tables are stored.
193 #
194 # The supported resource type is:
195 #
196 # Google BigQuery:
197 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700198 "experiments": [ # The list of experiments to enable.
199 "A String",
200 ],
Sai Cheemalapatiea3a5e12016-10-12 14:05:53 -0700201 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400202 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
203 # options are passed through the service and are used to recreate the
204 # SDK pipeline options on the worker in a language agnostic and platform
205 # independent way.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700206 "a_key": "", # Properties of the object.
207 },
208 "userAgent": { # A description of the process that generated the request.
209 "a_key": "", # Properties of the object.
210 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400211 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
212 # unspecified, the service will attempt to choose a reasonable
213 # default. This should be in the form of the API service name,
214 # e.g. "compute.googleapis.com".
215 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
216 # specified in order for the job to have workers.
217 { # Describes one particular pool of Cloud Dataflow workers to be
218 # instantiated by the Cloud Dataflow service in order to perform the
219 # computations required by a job. Note that a workflow job may use
220 # multiple pools, in order to match the various computational
221 # requirements of the various stages of the job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700222 "diskSourceImage": "A String", # Fully qualified source image for disks.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400223 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
224 # using the standard Dataflow task runner. Users should ignore
225 # this field.
226 "workflowFileName": "A String", # The file to store the workflow in.
227 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
228 # will not be uploaded.
229 #
230 # The supported resource type is:
231 #
232 # Google Cloud Storage:
233 # storage.googleapis.com/{bucket}/{object}
234 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400235 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
236 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700237 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
238 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
239 "vmId": "A String", # The ID string of the VM.
240 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
241 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400242 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
243 # access the Cloud Dataflow API.
244 "A String",
245 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400246 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
247 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
248 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
249 # "shuffle/v1beta1".
250 "workerId": "A String", # The ID of the worker running this pipeline.
251 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
252 #
253 # When workers access Google Cloud APIs, they logically do so via
254 # relative URLs. If this field is specified, it supplies the base
255 # URL to use for resolving these relative URLs. The normative
256 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
257 # Locators".
258 #
259 # If not specified, the default value is "http://www.googleapis.com/"
260 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
261 # "dataflow/v1b3/projects".
262 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
263 # storage.
264 #
265 # The supported resource type is:
266 #
267 # Google Cloud Storage:
268 #
269 # storage.googleapis.com/{bucket}/{object}
270 # bucket.storage.googleapis.com/{object}
271 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700272 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
273 # taskrunner; e.g. "wheel".
274 "languageHint": "A String", # The suggested backend language.
275 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
276 # console.
277 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
278 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400279 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400280 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
281 #
282 # When workers access Google Cloud APIs, they logically do so via
283 # relative URLs. If this field is specified, it supplies the base
284 # URL to use for resolving these relative URLs. The normative
285 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
286 # Locators".
287 #
288 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700289 "harnessCommand": "A String", # The command to launch the worker harness.
290 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
291 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400292 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700293 # The supported resource type is:
294 #
295 # Google Cloud Storage:
296 # storage.googleapis.com/{bucket}/{object}
297 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700298 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700299 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
300 # are supported.
301 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
302 # service will attempt to choose a reasonable default.
303 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
304 # the service will use the network "default".
305 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
306 # will attempt to choose a reasonable default.
307 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
308 # attempt to choose a reasonable default.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700309 "dataDisks": [ # Data disks that are used by a VM in this workflow.
310 { # Describes the data disk used by a workflow job.
311 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400312 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
313 # attempt to choose a reasonable default.
314 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
315 # must be a disk type appropriate to the project and zone in which
316 # the workers will run. If unknown or unspecified, the service
317 # will attempt to choose a reasonable default.
318 #
319 # For example, the standard persistent disk type is a resource name
320 # typically ending in "pd-standard". If SSD persistent disks are
321 # available, the resource name typically ends with "pd-ssd". The
322 # actual valid values are defined the Google Compute Engine API,
323 # not by the Cloud Dataflow API; consult the Google Compute Engine
324 # documentation for more information about determining the set of
325 # available disk types for a particular project and zone.
326 #
327 # Google Compute Engine Disk types are local to a particular
328 # project in a particular zone, and so the resource name will
329 # typically look something like this:
330 #
331 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700332 },
333 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700334 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
335 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
336 # `TEARDOWN_NEVER`.
337 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
338 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
339 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
340 # down.
341 #
342 # If the workers are not torn down by the service, they will
343 # continue to run and use Google Compute Engine VM resources in the
344 # user's project until they are explicitly terminated by the user.
345 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
346 # policy except for small, manually supervised test jobs.
347 #
348 # If unknown or unspecified, the service will attempt to choose a reasonable
349 # default.
350 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
351 # Compute Engine API.
352 "ipConfiguration": "A String", # Configuration for VM IPs.
353 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
354 # service will choose a number of threads (according to the number of cores
355 # on the selected machine type for batch, or 1 by convention for streaming).
356 "poolArgs": { # Extra arguments for this worker pool.
357 "a_key": "", # Properties of the object. Contains field @type with type URL.
358 },
359 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
360 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400361 # attempt to choose a reasonable default.
362 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
363 # harness, residing in Google Container Registry.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700364 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
365 # the form "regions/REGION/subnetworks/SUBNETWORK".
366 "packages": [ # Packages to be installed on workers.
367 { # The packages that must be installed in order for a worker to run the
368 # steps of the Cloud Dataflow job that will be assigned to its worker
369 # pool.
370 #
371 # This is the mechanism by which the Cloud Dataflow SDK causes code to
372 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
373 # might use this to install jars containing the user's code and all of the
374 # various dependencies (libraries, data files, etc.) required in order
375 # for that code to run.
376 "location": "A String", # The resource to read the package from. The supported resource type is:
377 #
378 # Google Cloud Storage:
379 #
380 # storage.googleapis.com/{bucket}
381 # bucket.storage.googleapis.com/
382 "name": "A String", # The name of the package.
383 },
384 ],
385 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
386 "algorithm": "A String", # The algorithm to use for autoscaling.
387 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
388 },
389 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
390 # select a default set of packages which are useful to worker
391 # harnesses written in a particular language.
392 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
393 # attempt to choose a reasonable default.
394 "metadata": { # Metadata to set on the Google Compute Engine VMs.
395 "a_key": "A String",
396 },
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700397 },
398 ],
399 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700400 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
401 # of the job it replaced.
402 #
403 # When sending a `CreateJobRequest`, you can update a job by specifying it
404 # here. The job named here is stopped, and its intermediate state is
405 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400406 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
407 # A description of the user pipeline and stages through which it is executed.
408 # Created by Cloud Dataflow service. Only retrieved with
409 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
410 # form. This data is provided by the Dataflow service for ease of visualizing
411 # the pipeline and interpretting Dataflow provided metrics.
412 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
413 { # Description of the type, names/ids, and input/outputs for a transform.
414 "kind": "A String", # Type of transform.
415 "name": "A String", # User provided name for this transform instance.
416 "inputCollectionName": [ # User names for all collection inputs to this transform.
417 "A String",
418 ],
419 "displayData": [ # Transform-specific display data.
420 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400421 "shortStrValue": "A String", # A possible additional shorter value to display.
422 # For example a java_class_name_value of com.mypackage.MyDoFn
423 # will be stored with MyDoFn as the short_str_value and
424 # com.mypackage.MyDoFn as the java_class_name value.
425 # short_str_value can be displayed and java_class_name_value
426 # will be displayed as a tooltip.
427 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700428 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400429 "url": "A String", # An optional full URL.
430 "floatValue": 3.14, # Contains value if the data is of float type.
431 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
432 # language namespace (i.e. python module) which defines the display data.
433 # This allows a dax monitoring system to specially handle the data
434 # and perform custom rendering.
435 "javaClassValue": "A String", # Contains value if the data is of java class type.
436 "label": "A String", # An optional label to display in a dax UI for the element.
437 "boolValue": True or False, # Contains value if the data is of a boolean type.
438 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700439 "key": "A String", # The key identifying the display data.
440 # This is intended to be used as a label for the display data
441 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400442 "int64Value": "A String", # Contains value if the data is of int64 type.
443 },
444 ],
445 "outputCollectionName": [ # User names for all collection outputs to this transform.
446 "A String",
447 ],
448 "id": "A String", # SDK generated id of this transform instance.
449 },
450 ],
451 "displayData": [ # Pipeline level display data.
452 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400453 "shortStrValue": "A String", # A possible additional shorter value to display.
454 # For example a java_class_name_value of com.mypackage.MyDoFn
455 # will be stored with MyDoFn as the short_str_value and
456 # com.mypackage.MyDoFn as the java_class_name value.
457 # short_str_value can be displayed and java_class_name_value
458 # will be displayed as a tooltip.
459 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700460 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400461 "url": "A String", # An optional full URL.
462 "floatValue": 3.14, # Contains value if the data is of float type.
463 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
464 # language namespace (i.e. python module) which defines the display data.
465 # This allows a dax monitoring system to specially handle the data
466 # and perform custom rendering.
467 "javaClassValue": "A String", # Contains value if the data is of java class type.
468 "label": "A String", # An optional label to display in a dax UI for the element.
469 "boolValue": True or False, # Contains value if the data is of a boolean type.
470 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700471 "key": "A String", # The key identifying the display data.
472 # This is intended to be used as a label for the display data
473 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400474 "int64Value": "A String", # Contains value if the data is of int64 type.
475 },
476 ],
477 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
478 { # Description of the composing transforms, names/ids, and input/outputs of a
479 # stage of execution. Some composing transforms and sources may have been
480 # generated by the Dataflow service during execution planning.
481 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
482 { # Description of an interstitial value between transforms in an execution
483 # stage.
484 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
485 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
486 # source is most closely associated.
487 "name": "A String", # Dataflow service generated name for this source.
488 },
489 ],
490 "kind": "A String", # Type of tranform this stage is executing.
491 "name": "A String", # Dataflow service generated name for this stage.
492 "outputSource": [ # Output sources for this stage.
493 { # Description of an input or output of an execution stage.
494 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700495 "sizeBytes": "A String", # Size of the source, if measurable.
496 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400497 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
498 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400499 },
500 ],
501 "inputSource": [ # Input sources for this stage.
502 { # Description of an input or output of an execution stage.
503 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700504 "sizeBytes": "A String", # Size of the source, if measurable.
505 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400506 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
507 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400508 },
509 ],
510 "componentTransform": [ # Transforms that comprise this execution stage.
511 { # Description of a transform executed as part of an execution stage.
512 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
513 "originalTransform": "A String", # User name for the original user transform with which this transform is
514 # most closely associated.
515 "name": "A String", # Dataflow service generated name for this source.
516 },
517 ],
518 "id": "A String", # Dataflow service generated id for this stage.
519 },
520 ],
521 },
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700522 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400523 { # Defines a particular step within a Cloud Dataflow job.
524 #
525 # A job consists of multiple steps, each of which performs some
526 # specific operation as part of the overall job. Data is typically
527 # passed from one step to another as part of the job.
528 #
529 # Here's an example of a sequence of steps which together implement a
530 # Map-Reduce job:
531 #
532 # * Read a collection of data from some source, parsing the
533 # collection's elements.
534 #
535 # * Validate the elements.
536 #
537 # * Apply a user-defined function to map each element to some value
538 # and extract an element-specific key value.
539 #
540 # * Group elements with the same key into a single element with
541 # that key, transforming a multiply-keyed collection into a
542 # uniquely-keyed collection.
543 #
544 # * Write the elements out to some data sink.
545 #
546 # Note that the Cloud Dataflow service may be used to run many different
547 # types of jobs, not just Map-Reduce.
548 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700549 "name": "A String", # The name that identifies the step. This must be unique for each
550 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400551 "properties": { # Named properties associated with the step. Each kind of
552 # predefined step has its own required set of properties.
553 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700554 "a_key": "", # Properties of the object.
555 },
556 },
557 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700558 "location": "A String", # The location that contains this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400559 "tempFiles": [ # A set of files the system should be aware of that are used
560 # for temporary storage. These temporary files will be
561 # removed on job completion.
562 # No duplicates are allowed.
563 # No file patterns are supported.
564 #
565 # The supported files are:
566 #
567 # Google Cloud Storage:
568 #
569 # storage.googleapis.com/{bucket}/{object}
570 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700571 "A String",
572 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400573 "type": "A String", # The type of Cloud Dataflow job.
574 "id": "A String", # The unique ID of this job.
575 #
576 # This field is set by the Cloud Dataflow service when the Job is
577 # created, and is immutable for the life of the job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700578 "currentState": "A String", # The current state of the job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400579 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700580 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
581 # specified.
582 #
583 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
584 # terminal state. After a job has reached a terminal state, no
585 # further state updates may be made.
586 #
587 # This field may be mutated by the Cloud Dataflow service;
588 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400589 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
590 # isn't contained in the submitted job.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700591 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400592 "a_key": { # Contains information about how a particular
593 # google.dataflow.v1beta3.Step will be executed.
594 "stepName": [ # The steps associated with the execution stage.
595 # Note that stages may have several steps, and that a given step
596 # might be run by more than one stage.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700597 "A String",
598 ],
599 },
600 },
601 },
602 }</pre>
603</div>
604
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400605<div class="method">
606 <code class="details" id="get">get(projectId, gcsPath=None, x__xgafv=None, view=None)</code>
607 <pre>Get the template associated with a template.
608
609Args:
610 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
611 gcsPath: string, Required. A Cloud Storage path to the template from which to
612create the job.
613Must be a valid Cloud Storage URL, beginning with `gs://`.
614 x__xgafv: string, V1 error format.
615 Allowed values
616 1 - v1 error format
617 2 - v2 error format
618 view: string, The view to retrieve. Defaults to METADATA_ONLY.
619
620Returns:
621 An object of the form:
622
623 { # The response to a GetTemplate request.
624 "status": { # The `Status` type defines a logical error model that is suitable for different # The status of the get template request. Any problems with the
625 # request will be indicated in the error_details.
626 # programming environments, including REST APIs and RPC APIs. It is used by
627 # [gRPC](https://github.com/grpc). The error model is designed to be:
628 #
629 # - Simple to use and understand for most users
630 # - Flexible enough to meet unexpected needs
631 #
632 # # Overview
633 #
634 # The `Status` message contains three pieces of data: error code, error message,
635 # and error details. The error code should be an enum value of
636 # google.rpc.Code, but it may accept additional error codes if needed. The
637 # error message should be a developer-facing English message that helps
638 # developers *understand* and *resolve* the error. If a localized user-facing
639 # error message is needed, put the localized message in the error details or
640 # localize it in the client. The optional error details may contain arbitrary
641 # information about the error. There is a predefined set of error detail types
642 # in the package `google.rpc` which can be used for common error conditions.
643 #
644 # # Language mapping
645 #
646 # The `Status` message is the logical representation of the error model, but it
647 # is not necessarily the actual wire format. When the `Status` message is
648 # exposed in different client libraries and different wire protocols, it can be
649 # mapped differently. For example, it will likely be mapped to some exceptions
650 # in Java, but more likely mapped to some error codes in C.
651 #
652 # # Other uses
653 #
654 # The error model and the `Status` message can be used in a variety of
655 # environments, either with or without APIs, to provide a
656 # consistent developer experience across different environments.
657 #
658 # Example uses of this error model include:
659 #
660 # - Partial errors. If a service needs to return partial errors to the client,
661 # it may embed the `Status` in the normal response to indicate the partial
662 # errors.
663 #
664 # - Workflow errors. A typical workflow has multiple steps. Each step may
665 # have a `Status` message for error reporting purpose.
666 #
667 # - Batch operations. If a client uses batch request and batch response, the
668 # `Status` message should be used directly inside batch response, one for
669 # each error sub-response.
670 #
671 # - Asynchronous operations. If an API call embeds asynchronous operation
672 # results in its response, the status of those operations should be
673 # represented directly using the `Status` message.
674 #
675 # - Logging. If some API errors are stored in logs, the message `Status` could
676 # be used directly after any stripping needed for security/privacy reasons.
677 "message": "A String", # A developer-facing error message, which should be in English. Any
678 # user-facing error message should be localized and sent in the
679 # google.rpc.Status.details field, or localized by the client.
680 "code": 42, # The status code, which should be an enum value of google.rpc.Code.
681 "details": [ # A list of messages that carry the error details. There will be a
682 # common set of message types for APIs to use.
683 {
684 "a_key": "", # Properties of the object. Contains field @type with type URL.
685 },
686 ],
687 },
688 "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
689 # parameters, etc.
690 "bypassTempDirValidation": True or False, # If true, will bypass the validation that the temp directory is
691 # writable. This should only be used with templates for pipelines
692 # that are guaranteed not to need to write to the temp directory,
693 # which is subject to change based on the optimizer.
694 "name": "A String", # Required. The name of the template.
695 "parameters": [ # The parameters for the template.
696 { # Metadata for a specific parameter.
697 "regexes": [ # Optional. Regexes that the parameter must match.
698 "A String",
699 ],
700 "helpText": "A String", # Required. The help text to display for the parameter.
701 "name": "A String", # Required. The name of the parameter.
702 "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
703 "label": "A String", # Required. The label to display for the parameter.
704 },
705 ],
706 "description": "A String", # Optional. A description of the template.
707 },
708 }</pre>
709</div>
710
711<div class="method">
712 <code class="details" id="launch">launch(projectId, body, dryRun=None, gcsPath=None, x__xgafv=None)</code>
713 <pre>Launch a template.
714
715Args:
716 projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
717 body: object, The request body. (required)
718 The object takes the form of:
719
720{ # Parameters to provide to the template being launched.
721 "environment": { # The environment values to set at runtime. # The runtime environment for the job.
722 "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
723 # Use with caution.
724 "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
725 # Must be a valid Cloud Storage URL, beginning with `gs://`.
726 "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
727 "zone": "A String", # The Compute Engine [availability zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
728 # for launching worker instances to run your pipeline.
729 "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
730 # available to your pipeline during execution, from 1 to 1000.
731 },
732 "parameters": { # The runtime parameters to pass to the job.
733 "a_key": "A String",
734 },
735 "jobName": "A String", # Required. The job name to use for the created job.
736 }
737
738 dryRun: boolean, Whether or not the job should actually be executed after
739validating parameters. Defaults to false. Validation errors do
740not cause the HTTP request to fail if true.
741 gcsPath: string, Required. A Cloud Storage path to the template from which to create
742the job.
743Must be valid Cloud Storage URL, beginning with 'gs://'.
744 x__xgafv: string, V1 error format.
745 Allowed values
746 1 - v1 error format
747 2 - v2 error format
748
749Returns:
750 An object of the form:
751
752 { # Response to the request to launch a template.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400753 "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
754 # the job was successfully launched.
755 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
756 # If this field is set, the service will ensure its uniqueness.
757 # The request to create a job will fail if the service has knowledge of a
758 # previously submitted job with the same client's ID and job name.
759 # The caller may use this field to ensure idempotence of job
760 # creation across retried attempts to create a job.
761 # By default, the field is empty and, in that case, the service ignores it.
762 "requestedState": "A String", # The job's requested state.
763 #
764 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
765 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
766 # also be used to directly set a job's requested state to
767 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
768 # job if it has not already reached a terminal state.
769 "name": "A String", # The user-specified Cloud Dataflow job name.
770 #
771 # Only one Job with a given name may exist in a project at any
772 # given time. If a caller attempts to create a Job with the same
773 # name as an already-existing Job, the attempt returns the
774 # existing Job.
775 #
776 # The name must match the regular expression
777 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
778 "currentStateTime": "A String", # The timestamp associated with the current state.
779 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
780 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
781 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
782 "labels": { # User-defined labels for this job.
783 #
784 # The labels map can contain no more than 64 entries. Entries of the labels
785 # map are UTF8 strings that comply with the following restrictions:
786 #
787 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
788 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
789 # * Both keys and values are additionally constrained to be <= 128 bytes in
790 # size.
791 "a_key": "A String",
792 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400793 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
794 # corresponding name prefixes of the new job.
795 "a_key": "A String",
796 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700797 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
798 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400799 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
800 "version": { # A structure describing which components and their versions of the service
801 # are required in order to run the job.
802 "a_key": "", # Properties of the object.
803 },
804 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
805 # storage. The system will append the suffix "/temp-{JOBNAME} to
806 # this resource prefix, where {JOBNAME} is the value of the
807 # job_name field. The resulting bucket and object prefix is used
808 # as the prefix of the resources used to store temporary data
809 # needed during the job execution. NOTE: This will override the
810 # value in taskrunner_settings.
811 # The supported resource type is:
812 #
813 # Google Cloud Storage:
814 #
815 # storage.googleapis.com/{bucket}/{object}
816 # bucket.storage.googleapis.com/{object}
817 "internalExperiments": { # Experimental settings.
818 "a_key": "", # Properties of the object. Contains field @type with type URL.
819 },
820 "dataset": "A String", # The dataset for the current project where various workflow
821 # related tables are stored.
822 #
823 # The supported resource type is:
824 #
825 # Google BigQuery:
826 # bigquery.googleapis.com/{dataset}
827 "experiments": [ # The list of experiments to enable.
828 "A String",
829 ],
830 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
831 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
832 # options are passed through the service and are used to recreate the
833 # SDK pipeline options on the worker in a language agnostic and platform
834 # independent way.
835 "a_key": "", # Properties of the object.
836 },
837 "userAgent": { # A description of the process that generated the request.
838 "a_key": "", # Properties of the object.
839 },
840 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
841 # unspecified, the service will attempt to choose a reasonable
842 # default. This should be in the form of the API service name,
843 # e.g. "compute.googleapis.com".
844 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
845 # specified in order for the job to have workers.
846 { # Describes one particular pool of Cloud Dataflow workers to be
847 # instantiated by the Cloud Dataflow service in order to perform the
848 # computations required by a job. Note that a workflow job may use
849 # multiple pools, in order to match the various computational
850 # requirements of the various stages of the job.
851 "diskSourceImage": "A String", # Fully qualified source image for disks.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400852 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
853 # using the standard Dataflow task runner. Users should ignore
854 # this field.
855 "workflowFileName": "A String", # The file to store the workflow in.
856 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
857 # will not be uploaded.
858 #
859 # The supported resource type is:
860 #
861 # Google Cloud Storage:
862 # storage.googleapis.com/{bucket}/{object}
863 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400864 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
865 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700866 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
867 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
868 "vmId": "A String", # The ID string of the VM.
869 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
870 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400871 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
872 # access the Cloud Dataflow API.
873 "A String",
874 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400875 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
876 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
877 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
878 # "shuffle/v1beta1".
879 "workerId": "A String", # The ID of the worker running this pipeline.
880 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
881 #
882 # When workers access Google Cloud APIs, they logically do so via
883 # relative URLs. If this field is specified, it supplies the base
884 # URL to use for resolving these relative URLs. The normative
885 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
886 # Locators".
887 #
888 # If not specified, the default value is "http://www.googleapis.com/"
889 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
890 # "dataflow/v1b3/projects".
891 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
892 # storage.
893 #
894 # The supported resource type is:
895 #
896 # Google Cloud Storage:
897 #
898 # storage.googleapis.com/{bucket}/{object}
899 # bucket.storage.googleapis.com/{object}
900 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700901 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
902 # taskrunner; e.g. "wheel".
903 "languageHint": "A String", # The suggested backend language.
904 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
905 # console.
906 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
907 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400908 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400909 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
910 #
911 # When workers access Google Cloud APIs, they logically do so via
912 # relative URLs. If this field is specified, it supplies the base
913 # URL to use for resolving these relative URLs. The normative
914 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
915 # Locators".
916 #
917 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700918 "harnessCommand": "A String", # The command to launch the worker harness.
919 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
920 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400921 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700922 # The supported resource type is:
923 #
924 # Google Cloud Storage:
925 # storage.googleapis.com/{bucket}/{object}
926 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400927 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700928 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
929 # are supported.
930 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
931 # service will attempt to choose a reasonable default.
932 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
933 # the service will use the network "default".
934 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
935 # will attempt to choose a reasonable default.
936 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
937 # attempt to choose a reasonable default.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400938 "dataDisks": [ # Data disks that are used by a VM in this workflow.
939 { # Describes the data disk used by a workflow job.
940 "mountPoint": "A String", # Directory in a VM where disk is mounted.
941 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
942 # attempt to choose a reasonable default.
943 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
944 # must be a disk type appropriate to the project and zone in which
945 # the workers will run. If unknown or unspecified, the service
946 # will attempt to choose a reasonable default.
947 #
948 # For example, the standard persistent disk type is a resource name
949 # typically ending in "pd-standard". If SSD persistent disks are
950 # available, the resource name typically ends with "pd-ssd". The
951 # actual valid values are defined the Google Compute Engine API,
952 # not by the Cloud Dataflow API; consult the Google Compute Engine
953 # documentation for more information about determining the set of
954 # available disk types for a particular project and zone.
955 #
956 # Google Compute Engine Disk types are local to a particular
957 # project in a particular zone, and so the resource name will
958 # typically look something like this:
959 #
960 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
961 },
962 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700963 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
964 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
965 # `TEARDOWN_NEVER`.
966 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
967 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
968 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
969 # down.
970 #
971 # If the workers are not torn down by the service, they will
972 # continue to run and use Google Compute Engine VM resources in the
973 # user's project until they are explicitly terminated by the user.
974 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
975 # policy except for small, manually supervised test jobs.
976 #
977 # If unknown or unspecified, the service will attempt to choose a reasonable
978 # default.
979 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
980 # Compute Engine API.
981 "ipConfiguration": "A String", # Configuration for VM IPs.
982 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
983 # service will choose a number of threads (according to the number of cores
984 # on the selected machine type for batch, or 1 by convention for streaming).
985 "poolArgs": { # Extra arguments for this worker pool.
986 "a_key": "", # Properties of the object. Contains field @type with type URL.
987 },
988 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
989 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400990 # attempt to choose a reasonable default.
991 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
992 # harness, residing in Google Container Registry.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700993 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
994 # the form "regions/REGION/subnetworks/SUBNETWORK".
995 "packages": [ # Packages to be installed on workers.
996 { # The packages that must be installed in order for a worker to run the
997 # steps of the Cloud Dataflow job that will be assigned to its worker
998 # pool.
999 #
1000 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1001 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1002 # might use this to install jars containing the user's code and all of the
1003 # various dependencies (libraries, data files, etc.) required in order
1004 # for that code to run.
1005 "location": "A String", # The resource to read the package from. The supported resource type is:
1006 #
1007 # Google Cloud Storage:
1008 #
1009 # storage.googleapis.com/{bucket}
1010 # bucket.storage.googleapis.com/
1011 "name": "A String", # The name of the package.
1012 },
1013 ],
1014 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1015 "algorithm": "A String", # The algorithm to use for autoscaling.
1016 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1017 },
1018 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1019 # select a default set of packages which are useful to worker
1020 # harnesses written in a particular language.
1021 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1022 # attempt to choose a reasonable default.
1023 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1024 "a_key": "A String",
1025 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001026 },
1027 ],
1028 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001029 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1030 # of the job it replaced.
1031 #
1032 # When sending a `CreateJobRequest`, you can update a job by specifying it
1033 # here. The job named here is stopped, and its intermediate state is
1034 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001035 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1036 # A description of the user pipeline and stages through which it is executed.
1037 # Created by Cloud Dataflow service. Only retrieved with
1038 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1039 # form. This data is provided by the Dataflow service for ease of visualizing
1040 # the pipeline and interpretting Dataflow provided metrics.
1041 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1042 { # Description of the type, names/ids, and input/outputs for a transform.
1043 "kind": "A String", # Type of transform.
1044 "name": "A String", # User provided name for this transform instance.
1045 "inputCollectionName": [ # User names for all collection inputs to this transform.
1046 "A String",
1047 ],
1048 "displayData": [ # Transform-specific display data.
1049 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001050 "shortStrValue": "A String", # A possible additional shorter value to display.
1051 # For example a java_class_name_value of com.mypackage.MyDoFn
1052 # will be stored with MyDoFn as the short_str_value and
1053 # com.mypackage.MyDoFn as the java_class_name value.
1054 # short_str_value can be displayed and java_class_name_value
1055 # will be displayed as a tooltip.
1056 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001057 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001058 "url": "A String", # An optional full URL.
1059 "floatValue": 3.14, # Contains value if the data is of float type.
1060 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1061 # language namespace (i.e. python module) which defines the display data.
1062 # This allows a dax monitoring system to specially handle the data
1063 # and perform custom rendering.
1064 "javaClassValue": "A String", # Contains value if the data is of java class type.
1065 "label": "A String", # An optional label to display in a dax UI for the element.
1066 "boolValue": True or False, # Contains value if the data is of a boolean type.
1067 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001068 "key": "A String", # The key identifying the display data.
1069 # This is intended to be used as a label for the display data
1070 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001071 "int64Value": "A String", # Contains value if the data is of int64 type.
1072 },
1073 ],
1074 "outputCollectionName": [ # User names for all collection outputs to this transform.
1075 "A String",
1076 ],
1077 "id": "A String", # SDK generated id of this transform instance.
1078 },
1079 ],
1080 "displayData": [ # Pipeline level display data.
1081 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001082 "shortStrValue": "A String", # A possible additional shorter value to display.
1083 # For example a java_class_name_value of com.mypackage.MyDoFn
1084 # will be stored with MyDoFn as the short_str_value and
1085 # com.mypackage.MyDoFn as the java_class_name value.
1086 # short_str_value can be displayed and java_class_name_value
1087 # will be displayed as a tooltip.
1088 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001089 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001090 "url": "A String", # An optional full URL.
1091 "floatValue": 3.14, # Contains value if the data is of float type.
1092 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1093 # language namespace (i.e. python module) which defines the display data.
1094 # This allows a dax monitoring system to specially handle the data
1095 # and perform custom rendering.
1096 "javaClassValue": "A String", # Contains value if the data is of java class type.
1097 "label": "A String", # An optional label to display in a dax UI for the element.
1098 "boolValue": True or False, # Contains value if the data is of a boolean type.
1099 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001100 "key": "A String", # The key identifying the display data.
1101 # This is intended to be used as a label for the display data
1102 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001103 "int64Value": "A String", # Contains value if the data is of int64 type.
1104 },
1105 ],
1106 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1107 { # Description of the composing transforms, names/ids, and input/outputs of a
1108 # stage of execution. Some composing transforms and sources may have been
1109 # generated by the Dataflow service during execution planning.
1110 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1111 { # Description of an interstitial value between transforms in an execution
1112 # stage.
1113 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1114 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1115 # source is most closely associated.
1116 "name": "A String", # Dataflow service generated name for this source.
1117 },
1118 ],
1119 "kind": "A String", # Type of tranform this stage is executing.
1120 "name": "A String", # Dataflow service generated name for this stage.
1121 "outputSource": [ # Output sources for this stage.
1122 { # Description of an input or output of an execution stage.
1123 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001124 "sizeBytes": "A String", # Size of the source, if measurable.
1125 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001126 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1127 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001128 },
1129 ],
1130 "inputSource": [ # Input sources for this stage.
1131 { # Description of an input or output of an execution stage.
1132 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001133 "sizeBytes": "A String", # Size of the source, if measurable.
1134 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001135 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1136 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001137 },
1138 ],
1139 "componentTransform": [ # Transforms that comprise this execution stage.
1140 { # Description of a transform executed as part of an execution stage.
1141 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1142 "originalTransform": "A String", # User name for the original user transform with which this transform is
1143 # most closely associated.
1144 "name": "A String", # Dataflow service generated name for this source.
1145 },
1146 ],
1147 "id": "A String", # Dataflow service generated id for this stage.
1148 },
1149 ],
1150 },
1151 "steps": [ # The top-level steps that constitute the entire job.
1152 { # Defines a particular step within a Cloud Dataflow job.
1153 #
1154 # A job consists of multiple steps, each of which performs some
1155 # specific operation as part of the overall job. Data is typically
1156 # passed from one step to another as part of the job.
1157 #
1158 # Here's an example of a sequence of steps which together implement a
1159 # Map-Reduce job:
1160 #
1161 # * Read a collection of data from some source, parsing the
1162 # collection's elements.
1163 #
1164 # * Validate the elements.
1165 #
1166 # * Apply a user-defined function to map each element to some value
1167 # and extract an element-specific key value.
1168 #
1169 # * Group elements with the same key into a single element with
1170 # that key, transforming a multiply-keyed collection into a
1171 # uniquely-keyed collection.
1172 #
1173 # * Write the elements out to some data sink.
1174 #
1175 # Note that the Cloud Dataflow service may be used to run many different
1176 # types of jobs, not just Map-Reduce.
1177 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001178 "name": "A String", # The name that identifies the step. This must be unique for each
1179 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001180 "properties": { # Named properties associated with the step. Each kind of
1181 # predefined step has its own required set of properties.
1182 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
1183 "a_key": "", # Properties of the object.
1184 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001185 },
1186 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001187 "location": "A String", # The location that contains this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001188 "tempFiles": [ # A set of files the system should be aware of that are used
1189 # for temporary storage. These temporary files will be
1190 # removed on job completion.
1191 # No duplicates are allowed.
1192 # No file patterns are supported.
1193 #
1194 # The supported files are:
1195 #
1196 # Google Cloud Storage:
1197 #
1198 # storage.googleapis.com/{bucket}/{object}
1199 # bucket.storage.googleapis.com/{object}
1200 "A String",
1201 ],
1202 "type": "A String", # The type of Cloud Dataflow job.
1203 "id": "A String", # The unique ID of this job.
1204 #
1205 # This field is set by the Cloud Dataflow service when the Job is
1206 # created, and is immutable for the life of the job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001207 "currentState": "A String", # The current state of the job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001208 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001209 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1210 # specified.
1211 #
1212 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1213 # terminal state. After a job has reached a terminal state, no
1214 # further state updates may be made.
1215 #
1216 # This field may be mutated by the Cloud Dataflow service;
1217 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001218 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1219 # isn't contained in the submitted job.
1220 "stages": { # A mapping from each stage to the information about that stage.
1221 "a_key": { # Contains information about how a particular
1222 # google.dataflow.v1beta3.Step will be executed.
1223 "stepName": [ # The steps associated with the execution stage.
1224 # Note that stages may have several steps, and that a given step
1225 # might be run by more than one stage.
1226 "A String",
1227 ],
1228 },
1229 },
1230 },
1231 },
1232 }</pre>
1233</div>
1234
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001235</body></html>