blob: 211911cefbfef8418daf5f14777f87d68339c552 [file] [log] [blame]
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.jobs.html">jobs</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78 <code><a href="dataflow_v1b3.projects.locations.jobs.messages.html">messages()</a></code>
79</p>
80<p class="firstline">Returns the messages Resource.</p>
81
82<p class="toc_element">
83 <code><a href="dataflow_v1b3.projects.locations.jobs.workItems.html">workItems()</a></code>
84</p>
85<p class="firstline">Returns the workItems Resource.</p>
86
87<p class="toc_element">
88 <code><a href="#create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040089<p class="firstline">Creates a Cloud Dataflow job.</p>
Jon Wayne Parrott692617a2017-01-06 09:58:29 -080090<p class="toc_element">
91 <code><a href="#get">get(projectId, location, jobId, x__xgafv=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040092<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
Jon Wayne Parrott692617a2017-01-06 09:58:29 -080093<p class="toc_element">
94 <code><a href="#getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</a></code></p>
95<p class="firstline">Request the job status.</p>
96<p class="toc_element">
97 <code><a href="#list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040098<p class="firstline">List the jobs of a project.</p>
Jon Wayne Parrott692617a2017-01-06 09:58:29 -080099<p class="toc_element">
100 <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
101<p class="firstline">Retrieves the next page of results.</p>
102<p class="toc_element">
103 <code><a href="#update">update(projectId, location, jobId, body, x__xgafv=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400104<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800105<h3>Method Details</h3>
106<div class="method">
107 <code class="details" id="create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400108 <pre>Creates a Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800109
110Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400111 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
112 location: string, The location that contains this job. (required)
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800113 body: object, The request body. (required)
114 The object takes the form of:
115
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400116{ # Defines a job to be run by the Cloud Dataflow service.
117 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
118 # If this field is set, the service will ensure its uniqueness.
119 # The request to create a job will fail if the service has knowledge of a
120 # previously submitted job with the same client's ID and job name.
121 # The caller may use this field to ensure idempotence of job
122 # creation across retried attempts to create a job.
123 # By default, the field is empty and, in that case, the service ignores it.
124 "requestedState": "A String", # The job's requested state.
125 #
126 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
127 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
128 # also be used to directly set a job's requested state to
129 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
130 # job if it has not already reached a terminal state.
131 "name": "A String", # The user-specified Cloud Dataflow job name.
132 #
133 # Only one Job with a given name may exist in a project at any
134 # given time. If a caller attempts to create a Job with the same
135 # name as an already-existing Job, the attempt returns the
136 # existing Job.
137 #
138 # The name must match the regular expression
139 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
140 "currentStateTime": "A String", # The timestamp associated with the current state.
141 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
142 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
143 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
144 "labels": { # User-defined labels for this job.
145 #
146 # The labels map can contain no more than 64 entries. Entries of the labels
147 # map are UTF8 strings that comply with the following restrictions:
148 #
149 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
150 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
151 # * Both keys and values are additionally constrained to be <= 128 bytes in
152 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800153 "a_key": "A String",
154 },
Thomas Coffee2f245372017-03-27 10:39:26 -0700155 "location": "A String", # The location that contains this job.
156 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
157 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400158 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
159 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800160 "a_key": "A String",
161 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400162 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
163 "version": { # A structure describing which components and their versions of the service
164 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800165 "a_key": "", # Properties of the object.
166 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400167 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
168 # storage. The system will append the suffix "/temp-{JOBNAME} to
169 # this resource prefix, where {JOBNAME} is the value of the
170 # job_name field. The resulting bucket and object prefix is used
171 # as the prefix of the resources used to store temporary data
172 # needed during the job execution. NOTE: This will override the
173 # value in taskrunner_settings.
174 # The supported resource type is:
175 #
176 # Google Cloud Storage:
177 #
178 # storage.googleapis.com/{bucket}/{object}
179 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800180 "internalExperiments": { # Experimental settings.
181 "a_key": "", # Properties of the object. Contains field @type with type URL.
182 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400183 "dataset": "A String", # The dataset for the current project where various workflow
184 # related tables are stored.
185 #
186 # The supported resource type is:
187 #
188 # Google BigQuery:
189 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800190 "experiments": [ # The list of experiments to enable.
191 "A String",
192 ],
193 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400194 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
195 # options are passed through the service and are used to recreate the
196 # SDK pipeline options on the worker in a language agnostic and platform
197 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800198 "a_key": "", # Properties of the object.
199 },
200 "userAgent": { # A description of the process that generated the request.
201 "a_key": "", # Properties of the object.
202 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400203 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
204 # unspecified, the service will attempt to choose a reasonable
205 # default. This should be in the form of the API service name,
206 # e.g. "compute.googleapis.com".
207 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
208 # specified in order for the job to have workers.
209 { # Describes one particular pool of Cloud Dataflow workers to be
210 # instantiated by the Cloud Dataflow service in order to perform the
211 # computations required by a job. Note that a workflow job may use
212 # multiple pools, in order to match the various computational
213 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800214 "diskSourceImage": "A String", # Fully qualified source image for disks.
Thomas Coffee2f245372017-03-27 10:39:26 -0700215 "ipConfiguration": "A String", # Configuration for VM IPs.
216 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
217 # are supported.
218 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
219 # service will attempt to choose a reasonable default.
220 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
221 # the service will use the network "default".
222 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
223 # will attempt to choose a reasonable default.
224 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
225 # attempt to choose a reasonable default.
226 "metadata": { # Metadata to set on the Google Compute Engine VMs.
227 "a_key": "A String",
228 },
229 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
230 # Compute Engine API.
231 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
232 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
233 # `TEARDOWN_NEVER`.
234 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
235 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
236 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
237 # down.
238 #
239 # If the workers are not torn down by the service, they will
240 # continue to run and use Google Compute Engine VM resources in the
241 # user's project until they are explicitly terminated by the user.
242 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
243 # policy except for small, manually supervised test jobs.
244 #
245 # If unknown or unspecified, the service will attempt to choose a reasonable
246 # default.
247 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
248 # service will choose a number of threads (according to the number of cores
249 # on the selected machine type for batch, or 1 by convention for streaming).
250 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
251 # the form "regions/REGION/subnetworks/SUBNETWORK".
252 "poolArgs": { # Extra arguments for this worker pool.
253 "a_key": "", # Properties of the object. Contains field @type with type URL.
254 },
255 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
256 # execute the job. If zero or unspecified, the service will
257 # attempt to choose a reasonable default.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400258 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
259 # using the standard Dataflow task runner. Users should ignore
260 # this field.
261 "workflowFileName": "A String", # The file to store the workflow in.
262 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
263 # will not be uploaded.
264 #
265 # The supported resource type is:
266 #
267 # Google Cloud Storage:
268 # storage.googleapis.com/{bucket}/{object}
269 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400270 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
271 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700272 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
273 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
274 "vmId": "A String", # The ID string of the VM.
275 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
276 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400277 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
278 # access the Cloud Dataflow API.
279 "A String",
280 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400281 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
282 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
283 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
284 # "shuffle/v1beta1".
285 "workerId": "A String", # The ID of the worker running this pipeline.
286 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
287 #
288 # When workers access Google Cloud APIs, they logically do so via
289 # relative URLs. If this field is specified, it supplies the base
290 # URL to use for resolving these relative URLs. The normative
291 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
292 # Locators".
293 #
294 # If not specified, the default value is "http://www.googleapis.com/"
295 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
296 # "dataflow/v1b3/projects".
297 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
298 # storage.
299 #
300 # The supported resource type is:
301 #
302 # Google Cloud Storage:
303 #
304 # storage.googleapis.com/{bucket}/{object}
305 # bucket.storage.googleapis.com/{object}
306 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700307 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
308 # taskrunner; e.g. "wheel".
309 "languageHint": "A String", # The suggested backend language.
310 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
311 # console.
312 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
313 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400314 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400315 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
316 #
317 # When workers access Google Cloud APIs, they logically do so via
318 # relative URLs. If this field is specified, it supplies the base
319 # URL to use for resolving these relative URLs. The normative
320 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
321 # Locators".
322 #
323 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700324 "harnessCommand": "A String", # The command to launch the worker harness.
325 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
326 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400327 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700328 # The supported resource type is:
329 #
330 # Google Cloud Storage:
331 # storage.googleapis.com/{bucket}/{object}
332 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800333 },
Thomas Coffee2f245372017-03-27 10:39:26 -0700334 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
335 # select a default set of packages which are useful to worker
336 # harnesses written in a particular language.
337 "packages": [ # Packages to be installed on workers.
338 { # The packages that must be installed in order for a worker to run the
339 # steps of the Cloud Dataflow job that will be assigned to its worker
340 # pool.
341 #
342 # This is the mechanism by which the Cloud Dataflow SDK causes code to
343 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
344 # might use this to install jars containing the user's code and all of the
345 # various dependencies (libraries, data files, etc.) required in order
346 # for that code to run.
347 "name": "A String", # The name of the package.
348 "location": "A String", # The resource to read the package from. The supported resource type is:
349 #
350 # Google Cloud Storage:
351 #
352 # storage.googleapis.com/{bucket}
353 # bucket.storage.googleapis.com/
354 },
355 ],
356 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
357 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
358 "algorithm": "A String", # The algorithm to use for autoscaling.
359 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800360 "dataDisks": [ # Data disks that are used by a VM in this workflow.
361 { # Describes the data disk used by a workflow job.
362 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400363 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
364 # attempt to choose a reasonable default.
365 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
366 # must be a disk type appropriate to the project and zone in which
367 # the workers will run. If unknown or unspecified, the service
368 # will attempt to choose a reasonable default.
369 #
370 # For example, the standard persistent disk type is a resource name
371 # typically ending in "pd-standard". If SSD persistent disks are
372 # available, the resource name typically ends with "pd-ssd". The
373 # actual valid values are defined the Google Compute Engine API,
374 # not by the Cloud Dataflow API; consult the Google Compute Engine
375 # documentation for more information about determining the set of
376 # available disk types for a particular project and zone.
377 #
378 # Google Compute Engine Disk types are local to a particular
379 # project in a particular zone, and so the resource name will
380 # typically look something like this:
381 #
382 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800383 },
384 ],
Thomas Coffee2f245372017-03-27 10:39:26 -0700385 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400386 # attempt to choose a reasonable default.
387 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
388 # harness, residing in Google Container Registry.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800389 },
390 ],
391 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400392 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
393 # A description of the user pipeline and stages through which it is executed.
394 # Created by Cloud Dataflow service. Only retrieved with
395 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
396 # form. This data is provided by the Dataflow service for ease of visualizing
397 # the pipeline and interpretting Dataflow provided metrics.
398 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
399 { # Description of the type, names/ids, and input/outputs for a transform.
400 "kind": "A String", # Type of transform.
401 "name": "A String", # User provided name for this transform instance.
402 "inputCollectionName": [ # User names for all collection inputs to this transform.
403 "A String",
404 ],
405 "displayData": [ # Transform-specific display data.
406 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -0700407 "key": "A String", # The key identifying the display data.
408 # This is intended to be used as a label for the display data
409 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400410 "shortStrValue": "A String", # A possible additional shorter value to display.
411 # For example a java_class_name_value of com.mypackage.MyDoFn
412 # will be stored with MyDoFn as the short_str_value and
413 # com.mypackage.MyDoFn as the java_class_name value.
414 # short_str_value can be displayed and java_class_name_value
415 # will be displayed as a tooltip.
416 "timestampValue": "A String", # Contains value if the data is of timestamp type.
417 "url": "A String", # An optional full URL.
418 "floatValue": 3.14, # Contains value if the data is of float type.
419 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
420 # language namespace (i.e. python module) which defines the display data.
421 # This allows a dax monitoring system to specially handle the data
422 # and perform custom rendering.
423 "javaClassValue": "A String", # Contains value if the data is of java class type.
424 "label": "A String", # An optional label to display in a dax UI for the element.
425 "boolValue": True or False, # Contains value if the data is of a boolean type.
426 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -0700427 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400428 "int64Value": "A String", # Contains value if the data is of int64 type.
429 },
430 ],
431 "outputCollectionName": [ # User names for all collection outputs to this transform.
432 "A String",
433 ],
434 "id": "A String", # SDK generated id of this transform instance.
435 },
436 ],
437 "displayData": [ # Pipeline level display data.
438 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -0700439 "key": "A String", # The key identifying the display data.
440 # This is intended to be used as a label for the display data
441 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400442 "shortStrValue": "A String", # A possible additional shorter value to display.
443 # For example a java_class_name_value of com.mypackage.MyDoFn
444 # will be stored with MyDoFn as the short_str_value and
445 # com.mypackage.MyDoFn as the java_class_name value.
446 # short_str_value can be displayed and java_class_name_value
447 # will be displayed as a tooltip.
448 "timestampValue": "A String", # Contains value if the data is of timestamp type.
449 "url": "A String", # An optional full URL.
450 "floatValue": 3.14, # Contains value if the data is of float type.
451 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
452 # language namespace (i.e. python module) which defines the display data.
453 # This allows a dax monitoring system to specially handle the data
454 # and perform custom rendering.
455 "javaClassValue": "A String", # Contains value if the data is of java class type.
456 "label": "A String", # An optional label to display in a dax UI for the element.
457 "boolValue": True or False, # Contains value if the data is of a boolean type.
458 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -0700459 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400460 "int64Value": "A String", # Contains value if the data is of int64 type.
461 },
462 ],
463 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
464 { # Description of the composing transforms, names/ids, and input/outputs of a
465 # stage of execution. Some composing transforms and sources may have been
466 # generated by the Dataflow service during execution planning.
467 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
468 { # Description of an interstitial value between transforms in an execution
469 # stage.
470 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
471 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
472 # source is most closely associated.
473 "name": "A String", # Dataflow service generated name for this source.
474 },
475 ],
476 "kind": "A String", # Type of tranform this stage is executing.
477 "name": "A String", # Dataflow service generated name for this stage.
478 "outputSource": [ # Output sources for this stage.
479 { # Description of an input or output of an execution stage.
480 "userName": "A String", # Human-readable name for this source; may be user or system generated.
481 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
482 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -0700483 "name": "A String", # Dataflow service generated name for this source.
484 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400485 },
486 ],
487 "inputSource": [ # Input sources for this stage.
488 { # Description of an input or output of an execution stage.
489 "userName": "A String", # Human-readable name for this source; may be user or system generated.
490 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
491 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -0700492 "name": "A String", # Dataflow service generated name for this source.
493 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400494 },
495 ],
496 "componentTransform": [ # Transforms that comprise this execution stage.
497 { # Description of a transform executed as part of an execution stage.
498 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
499 "originalTransform": "A String", # User name for the original user transform with which this transform is
500 # most closely associated.
501 "name": "A String", # Dataflow service generated name for this source.
502 },
503 ],
504 "id": "A String", # Dataflow service generated id for this stage.
505 },
506 ],
507 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800508 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400509 { # Defines a particular step within a Cloud Dataflow job.
510 #
511 # A job consists of multiple steps, each of which performs some
512 # specific operation as part of the overall job. Data is typically
513 # passed from one step to another as part of the job.
514 #
515 # Here's an example of a sequence of steps which together implement a
516 # Map-Reduce job:
517 #
518 # * Read a collection of data from some source, parsing the
519 # collection's elements.
520 #
521 # * Validate the elements.
522 #
523 # * Apply a user-defined function to map each element to some value
524 # and extract an element-specific key value.
525 #
526 # * Group elements with the same key into a single element with
527 # that key, transforming a multiply-keyed collection into a
528 # uniquely-keyed collection.
529 #
530 # * Write the elements out to some data sink.
531 #
532 # Note that the Cloud Dataflow service may be used to run many different
533 # types of jobs, not just Map-Reduce.
534 "kind": "A String", # The kind of step in the Cloud Dataflow job.
535 "properties": { # Named properties associated with the step. Each kind of
536 # predefined step has its own required set of properties.
537 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800538 "a_key": "", # Properties of the object.
539 },
Thomas Coffee2f245372017-03-27 10:39:26 -0700540 "name": "A String", # The name that identifies the step. This must be unique for each
541 # step with respect to all other steps in the Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800542 },
543 ],
Thomas Coffee2f245372017-03-27 10:39:26 -0700544 "currentState": "A String", # The current state of the job.
545 #
546 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
547 # specified.
548 #
549 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
550 # terminal state. After a job has reached a terminal state, no
551 # further state updates may be made.
552 #
553 # This field may be mutated by the Cloud Dataflow service;
554 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400555 "tempFiles": [ # A set of files the system should be aware of that are used
556 # for temporary storage. These temporary files will be
557 # removed on job completion.
558 # No duplicates are allowed.
559 # No file patterns are supported.
560 #
561 # The supported files are:
562 #
563 # Google Cloud Storage:
564 #
565 # storage.googleapis.com/{bucket}/{object}
566 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800567 "A String",
568 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400569 "type": "A String", # The type of Cloud Dataflow job.
570 "id": "A String", # The unique ID of this job.
571 #
572 # This field is set by the Cloud Dataflow service when the Job is
573 # created, and is immutable for the life of the job.
Thomas Coffee2f245372017-03-27 10:39:26 -0700574 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
575 # of the job it replaced.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400576 #
Thomas Coffee2f245372017-03-27 10:39:26 -0700577 # When sending a `CreateJobRequest`, you can update a job by specifying it
578 # here. The job named here is stopped, and its intermediate state is
579 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400580 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
581 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800582 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400583 "a_key": { # Contains information about how a particular
584 # google.dataflow.v1beta3.Step will be executed.
585 "stepName": [ # The steps associated with the execution stage.
586 # Note that stages may have several steps, and that a given step
587 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800588 "A String",
589 ],
590 },
591 },
592 },
593 }
594
595 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400596 Allowed values
597 1 - v1 error format
598 2 - v2 error format
599 replaceJobId: string, Deprecated. This field is now in the Job message.
600 view: string, The level of information requested in response.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800601
602Returns:
603 An object of the form:
604
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400605 { # Defines a job to be run by the Cloud Dataflow service.
606 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
607 # If this field is set, the service will ensure its uniqueness.
608 # The request to create a job will fail if the service has knowledge of a
609 # previously submitted job with the same client's ID and job name.
610 # The caller may use this field to ensure idempotence of job
611 # creation across retried attempts to create a job.
612 # By default, the field is empty and, in that case, the service ignores it.
613 "requestedState": "A String", # The job's requested state.
614 #
615 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
616 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
617 # also be used to directly set a job's requested state to
618 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
619 # job if it has not already reached a terminal state.
620 "name": "A String", # The user-specified Cloud Dataflow job name.
621 #
622 # Only one Job with a given name may exist in a project at any
623 # given time. If a caller attempts to create a Job with the same
624 # name as an already-existing Job, the attempt returns the
625 # existing Job.
626 #
627 # The name must match the regular expression
628 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
629 "currentStateTime": "A String", # The timestamp associated with the current state.
630 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
631 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
632 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
633 "labels": { # User-defined labels for this job.
634 #
635 # The labels map can contain no more than 64 entries. Entries of the labels
636 # map are UTF8 strings that comply with the following restrictions:
637 #
638 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
639 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
640 # * Both keys and values are additionally constrained to be <= 128 bytes in
641 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800642 "a_key": "A String",
643 },
Thomas Coffee2f245372017-03-27 10:39:26 -0700644 "location": "A String", # The location that contains this job.
645 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
646 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400647 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
648 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800649 "a_key": "A String",
650 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400651 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
652 "version": { # A structure describing which components and their versions of the service
653 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800654 "a_key": "", # Properties of the object.
655 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400656 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
657 # storage. The system will append the suffix "/temp-{JOBNAME} to
658 # this resource prefix, where {JOBNAME} is the value of the
659 # job_name field. The resulting bucket and object prefix is used
660 # as the prefix of the resources used to store temporary data
661 # needed during the job execution. NOTE: This will override the
662 # value in taskrunner_settings.
663 # The supported resource type is:
664 #
665 # Google Cloud Storage:
666 #
667 # storage.googleapis.com/{bucket}/{object}
668 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800669 "internalExperiments": { # Experimental settings.
670 "a_key": "", # Properties of the object. Contains field @type with type URL.
671 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400672 "dataset": "A String", # The dataset for the current project where various workflow
673 # related tables are stored.
674 #
675 # The supported resource type is:
676 #
677 # Google BigQuery:
678 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800679 "experiments": [ # The list of experiments to enable.
680 "A String",
681 ],
682 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400683 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
684 # options are passed through the service and are used to recreate the
685 # SDK pipeline options on the worker in a language agnostic and platform
686 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800687 "a_key": "", # Properties of the object.
688 },
689 "userAgent": { # A description of the process that generated the request.
690 "a_key": "", # Properties of the object.
691 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400692 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
693 # unspecified, the service will attempt to choose a reasonable
694 # default. This should be in the form of the API service name,
695 # e.g. "compute.googleapis.com".
696 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
697 # specified in order for the job to have workers.
698 { # Describes one particular pool of Cloud Dataflow workers to be
699 # instantiated by the Cloud Dataflow service in order to perform the
700 # computations required by a job. Note that a workflow job may use
701 # multiple pools, in order to match the various computational
702 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800703 "diskSourceImage": "A String", # Fully qualified source image for disks.
Thomas Coffee2f245372017-03-27 10:39:26 -0700704 "ipConfiguration": "A String", # Configuration for VM IPs.
705 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
706 # are supported.
707 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
708 # service will attempt to choose a reasonable default.
709 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
710 # the service will use the network "default".
711 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
712 # will attempt to choose a reasonable default.
713 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
714 # attempt to choose a reasonable default.
715 "metadata": { # Metadata to set on the Google Compute Engine VMs.
716 "a_key": "A String",
717 },
718 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
719 # Compute Engine API.
720 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
721 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
722 # `TEARDOWN_NEVER`.
723 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
724 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
725 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
726 # down.
727 #
728 # If the workers are not torn down by the service, they will
729 # continue to run and use Google Compute Engine VM resources in the
730 # user's project until they are explicitly terminated by the user.
731 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
732 # policy except for small, manually supervised test jobs.
733 #
734 # If unknown or unspecified, the service will attempt to choose a reasonable
735 # default.
736 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
737 # service will choose a number of threads (according to the number of cores
738 # on the selected machine type for batch, or 1 by convention for streaming).
739 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
740 # the form "regions/REGION/subnetworks/SUBNETWORK".
741 "poolArgs": { # Extra arguments for this worker pool.
742 "a_key": "", # Properties of the object. Contains field @type with type URL.
743 },
744 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
745 # execute the job. If zero or unspecified, the service will
746 # attempt to choose a reasonable default.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400747 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
748 # using the standard Dataflow task runner. Users should ignore
749 # this field.
750 "workflowFileName": "A String", # The file to store the workflow in.
751 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
752 # will not be uploaded.
753 #
754 # The supported resource type is:
755 #
756 # Google Cloud Storage:
757 # storage.googleapis.com/{bucket}/{object}
758 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400759 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
760 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700761 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
762 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
763 "vmId": "A String", # The ID string of the VM.
764 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
765 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400766 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
767 # access the Cloud Dataflow API.
768 "A String",
769 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400770 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
771 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
772 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
773 # "shuffle/v1beta1".
774 "workerId": "A String", # The ID of the worker running this pipeline.
775 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
776 #
777 # When workers access Google Cloud APIs, they logically do so via
778 # relative URLs. If this field is specified, it supplies the base
779 # URL to use for resolving these relative URLs. The normative
780 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
781 # Locators".
782 #
783 # If not specified, the default value is "http://www.googleapis.com/"
784 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
785 # "dataflow/v1b3/projects".
786 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
787 # storage.
788 #
789 # The supported resource type is:
790 #
791 # Google Cloud Storage:
792 #
793 # storage.googleapis.com/{bucket}/{object}
794 # bucket.storage.googleapis.com/{object}
795 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700796 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
797 # taskrunner; e.g. "wheel".
798 "languageHint": "A String", # The suggested backend language.
799 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
800 # console.
801 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
802 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400803 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400804 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
805 #
806 # When workers access Google Cloud APIs, they logically do so via
807 # relative URLs. If this field is specified, it supplies the base
808 # URL to use for resolving these relative URLs. The normative
809 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
810 # Locators".
811 #
812 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700813 "harnessCommand": "A String", # The command to launch the worker harness.
814 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
815 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400816 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700817 # The supported resource type is:
818 #
819 # Google Cloud Storage:
820 # storage.googleapis.com/{bucket}/{object}
821 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800822 },
Thomas Coffee2f245372017-03-27 10:39:26 -0700823 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
824 # select a default set of packages which are useful to worker
825 # harnesses written in a particular language.
826 "packages": [ # Packages to be installed on workers.
827 { # The packages that must be installed in order for a worker to run the
828 # steps of the Cloud Dataflow job that will be assigned to its worker
829 # pool.
830 #
831 # This is the mechanism by which the Cloud Dataflow SDK causes code to
832 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
833 # might use this to install jars containing the user's code and all of the
834 # various dependencies (libraries, data files, etc.) required in order
835 # for that code to run.
836 "name": "A String", # The name of the package.
837 "location": "A String", # The resource to read the package from. The supported resource type is:
838 #
839 # Google Cloud Storage:
840 #
841 # storage.googleapis.com/{bucket}
842 # bucket.storage.googleapis.com/
843 },
844 ],
845 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
846 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
847 "algorithm": "A String", # The algorithm to use for autoscaling.
848 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800849 "dataDisks": [ # Data disks that are used by a VM in this workflow.
850 { # Describes the data disk used by a workflow job.
851 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400852 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
853 # attempt to choose a reasonable default.
854 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
855 # must be a disk type appropriate to the project and zone in which
856 # the workers will run. If unknown or unspecified, the service
857 # will attempt to choose a reasonable default.
858 #
859 # For example, the standard persistent disk type is a resource name
860 # typically ending in "pd-standard". If SSD persistent disks are
861 # available, the resource name typically ends with "pd-ssd". The
862 # actual valid values are defined the Google Compute Engine API,
863 # not by the Cloud Dataflow API; consult the Google Compute Engine
864 # documentation for more information about determining the set of
865 # available disk types for a particular project and zone.
866 #
867 # Google Compute Engine Disk types are local to a particular
868 # project in a particular zone, and so the resource name will
869 # typically look something like this:
870 #
871 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800872 },
873 ],
Thomas Coffee2f245372017-03-27 10:39:26 -0700874 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400875 # attempt to choose a reasonable default.
876 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
877 # harness, residing in Google Container Registry.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800878 },
879 ],
880 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400881 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
882 # A description of the user pipeline and stages through which it is executed.
883 # Created by Cloud Dataflow service. Only retrieved with
884 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
885 # form. This data is provided by the Dataflow service for ease of visualizing
886 # the pipeline and interpretting Dataflow provided metrics.
887 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
888 { # Description of the type, names/ids, and input/outputs for a transform.
889 "kind": "A String", # Type of transform.
890 "name": "A String", # User provided name for this transform instance.
891 "inputCollectionName": [ # User names for all collection inputs to this transform.
892 "A String",
893 ],
894 "displayData": [ # Transform-specific display data.
895 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -0700896 "key": "A String", # The key identifying the display data.
897 # This is intended to be used as a label for the display data
898 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400899 "shortStrValue": "A String", # A possible additional shorter value to display.
900 # For example a java_class_name_value of com.mypackage.MyDoFn
901 # will be stored with MyDoFn as the short_str_value and
902 # com.mypackage.MyDoFn as the java_class_name value.
903 # short_str_value can be displayed and java_class_name_value
904 # will be displayed as a tooltip.
905 "timestampValue": "A String", # Contains value if the data is of timestamp type.
906 "url": "A String", # An optional full URL.
907 "floatValue": 3.14, # Contains value if the data is of float type.
908 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
909 # language namespace (i.e. python module) which defines the display data.
910 # This allows a dax monitoring system to specially handle the data
911 # and perform custom rendering.
912 "javaClassValue": "A String", # Contains value if the data is of java class type.
913 "label": "A String", # An optional label to display in a dax UI for the element.
914 "boolValue": True or False, # Contains value if the data is of a boolean type.
915 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -0700916 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400917 "int64Value": "A String", # Contains value if the data is of int64 type.
918 },
919 ],
920 "outputCollectionName": [ # User names for all collection outputs to this transform.
921 "A String",
922 ],
923 "id": "A String", # SDK generated id of this transform instance.
924 },
925 ],
926 "displayData": [ # Pipeline level display data.
927 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -0700928 "key": "A String", # The key identifying the display data.
929 # This is intended to be used as a label for the display data
930 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400931 "shortStrValue": "A String", # A possible additional shorter value to display.
932 # For example a java_class_name_value of com.mypackage.MyDoFn
933 # will be stored with MyDoFn as the short_str_value and
934 # com.mypackage.MyDoFn as the java_class_name value.
935 # short_str_value can be displayed and java_class_name_value
936 # will be displayed as a tooltip.
937 "timestampValue": "A String", # Contains value if the data is of timestamp type.
938 "url": "A String", # An optional full URL.
939 "floatValue": 3.14, # Contains value if the data is of float type.
940 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
941 # language namespace (i.e. python module) which defines the display data.
942 # This allows a dax monitoring system to specially handle the data
943 # and perform custom rendering.
944 "javaClassValue": "A String", # Contains value if the data is of java class type.
945 "label": "A String", # An optional label to display in a dax UI for the element.
946 "boolValue": True or False, # Contains value if the data is of a boolean type.
947 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -0700948 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400949 "int64Value": "A String", # Contains value if the data is of int64 type.
950 },
951 ],
952 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
953 { # Description of the composing transforms, names/ids, and input/outputs of a
954 # stage of execution. Some composing transforms and sources may have been
955 # generated by the Dataflow service during execution planning.
956 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
957 { # Description of an interstitial value between transforms in an execution
958 # stage.
959 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
960 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
961 # source is most closely associated.
962 "name": "A String", # Dataflow service generated name for this source.
963 },
964 ],
965 "kind": "A String", # Type of tranform this stage is executing.
966 "name": "A String", # Dataflow service generated name for this stage.
967 "outputSource": [ # Output sources for this stage.
968 { # Description of an input or output of an execution stage.
969 "userName": "A String", # Human-readable name for this source; may be user or system generated.
970 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
971 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -0700972 "name": "A String", # Dataflow service generated name for this source.
973 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400974 },
975 ],
976 "inputSource": [ # Input sources for this stage.
977 { # Description of an input or output of an execution stage.
978 "userName": "A String", # Human-readable name for this source; may be user or system generated.
979 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
980 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -0700981 "name": "A String", # Dataflow service generated name for this source.
982 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400983 },
984 ],
985 "componentTransform": [ # Transforms that comprise this execution stage.
986 { # Description of a transform executed as part of an execution stage.
987 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
988 "originalTransform": "A String", # User name for the original user transform with which this transform is
989 # most closely associated.
990 "name": "A String", # Dataflow service generated name for this source.
991 },
992 ],
993 "id": "A String", # Dataflow service generated id for this stage.
994 },
995 ],
996 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800997 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400998 { # Defines a particular step within a Cloud Dataflow job.
999 #
1000 # A job consists of multiple steps, each of which performs some
1001 # specific operation as part of the overall job. Data is typically
1002 # passed from one step to another as part of the job.
1003 #
1004 # Here's an example of a sequence of steps which together implement a
1005 # Map-Reduce job:
1006 #
1007 # * Read a collection of data from some source, parsing the
1008 # collection's elements.
1009 #
1010 # * Validate the elements.
1011 #
1012 # * Apply a user-defined function to map each element to some value
1013 # and extract an element-specific key value.
1014 #
1015 # * Group elements with the same key into a single element with
1016 # that key, transforming a multiply-keyed collection into a
1017 # uniquely-keyed collection.
1018 #
1019 # * Write the elements out to some data sink.
1020 #
1021 # Note that the Cloud Dataflow service may be used to run many different
1022 # types of jobs, not just Map-Reduce.
1023 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1024 "properties": { # Named properties associated with the step. Each kind of
1025 # predefined step has its own required set of properties.
1026 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001027 "a_key": "", # Properties of the object.
1028 },
Thomas Coffee2f245372017-03-27 10:39:26 -07001029 "name": "A String", # The name that identifies the step. This must be unique for each
1030 # step with respect to all other steps in the Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001031 },
1032 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07001033 "currentState": "A String", # The current state of the job.
1034 #
1035 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1036 # specified.
1037 #
1038 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1039 # terminal state. After a job has reached a terminal state, no
1040 # further state updates may be made.
1041 #
1042 # This field may be mutated by the Cloud Dataflow service;
1043 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001044 "tempFiles": [ # A set of files the system should be aware of that are used
1045 # for temporary storage. These temporary files will be
1046 # removed on job completion.
1047 # No duplicates are allowed.
1048 # No file patterns are supported.
1049 #
1050 # The supported files are:
1051 #
1052 # Google Cloud Storage:
1053 #
1054 # storage.googleapis.com/{bucket}/{object}
1055 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001056 "A String",
1057 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001058 "type": "A String", # The type of Cloud Dataflow job.
1059 "id": "A String", # The unique ID of this job.
1060 #
1061 # This field is set by the Cloud Dataflow service when the Job is
1062 # created, and is immutable for the life of the job.
Thomas Coffee2f245372017-03-27 10:39:26 -07001063 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1064 # of the job it replaced.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001065 #
Thomas Coffee2f245372017-03-27 10:39:26 -07001066 # When sending a `CreateJobRequest`, you can update a job by specifying it
1067 # here. The job named here is stopped, and its intermediate state is
1068 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001069 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1070 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001071 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001072 "a_key": { # Contains information about how a particular
1073 # google.dataflow.v1beta3.Step will be executed.
1074 "stepName": [ # The steps associated with the execution stage.
1075 # Note that stages may have several steps, and that a given step
1076 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001077 "A String",
1078 ],
1079 },
1080 },
1081 },
1082 }</pre>
1083</div>
1084
1085<div class="method">
1086 <code class="details" id="get">get(projectId, location, jobId, x__xgafv=None, view=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001087 <pre>Gets the state of the specified Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001088
1089Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001090 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
1091 location: string, The location that contains this job. (required)
1092 jobId: string, The job ID. (required)
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001093 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001094 Allowed values
1095 1 - v1 error format
1096 2 - v2 error format
1097 view: string, The level of information requested in response.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001098
1099Returns:
1100 An object of the form:
1101
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001102 { # Defines a job to be run by the Cloud Dataflow service.
1103 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1104 # If this field is set, the service will ensure its uniqueness.
1105 # The request to create a job will fail if the service has knowledge of a
1106 # previously submitted job with the same client's ID and job name.
1107 # The caller may use this field to ensure idempotence of job
1108 # creation across retried attempts to create a job.
1109 # By default, the field is empty and, in that case, the service ignores it.
1110 "requestedState": "A String", # The job's requested state.
1111 #
1112 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1113 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1114 # also be used to directly set a job's requested state to
1115 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1116 # job if it has not already reached a terminal state.
1117 "name": "A String", # The user-specified Cloud Dataflow job name.
1118 #
1119 # Only one Job with a given name may exist in a project at any
1120 # given time. If a caller attempts to create a Job with the same
1121 # name as an already-existing Job, the attempt returns the
1122 # existing Job.
1123 #
1124 # The name must match the regular expression
1125 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1126 "currentStateTime": "A String", # The timestamp associated with the current state.
1127 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1128 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1129 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1130 "labels": { # User-defined labels for this job.
1131 #
1132 # The labels map can contain no more than 64 entries. Entries of the labels
1133 # map are UTF8 strings that comply with the following restrictions:
1134 #
1135 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
1136 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1137 # * Both keys and values are additionally constrained to be <= 128 bytes in
1138 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001139 "a_key": "A String",
1140 },
Thomas Coffee2f245372017-03-27 10:39:26 -07001141 "location": "A String", # The location that contains this job.
1142 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1143 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001144 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1145 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001146 "a_key": "A String",
1147 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001148 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1149 "version": { # A structure describing which components and their versions of the service
1150 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001151 "a_key": "", # Properties of the object.
1152 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001153 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1154 # storage. The system will append the suffix "/temp-{JOBNAME} to
1155 # this resource prefix, where {JOBNAME} is the value of the
1156 # job_name field. The resulting bucket and object prefix is used
1157 # as the prefix of the resources used to store temporary data
1158 # needed during the job execution. NOTE: This will override the
1159 # value in taskrunner_settings.
1160 # The supported resource type is:
1161 #
1162 # Google Cloud Storage:
1163 #
1164 # storage.googleapis.com/{bucket}/{object}
1165 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001166 "internalExperiments": { # Experimental settings.
1167 "a_key": "", # Properties of the object. Contains field @type with type URL.
1168 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001169 "dataset": "A String", # The dataset for the current project where various workflow
1170 # related tables are stored.
1171 #
1172 # The supported resource type is:
1173 #
1174 # Google BigQuery:
1175 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001176 "experiments": [ # The list of experiments to enable.
1177 "A String",
1178 ],
1179 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001180 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1181 # options are passed through the service and are used to recreate the
1182 # SDK pipeline options on the worker in a language agnostic and platform
1183 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001184 "a_key": "", # Properties of the object.
1185 },
1186 "userAgent": { # A description of the process that generated the request.
1187 "a_key": "", # Properties of the object.
1188 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001189 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1190 # unspecified, the service will attempt to choose a reasonable
1191 # default. This should be in the form of the API service name,
1192 # e.g. "compute.googleapis.com".
1193 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1194 # specified in order for the job to have workers.
1195 { # Describes one particular pool of Cloud Dataflow workers to be
1196 # instantiated by the Cloud Dataflow service in order to perform the
1197 # computations required by a job. Note that a workflow job may use
1198 # multiple pools, in order to match the various computational
1199 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001200 "diskSourceImage": "A String", # Fully qualified source image for disks.
Thomas Coffee2f245372017-03-27 10:39:26 -07001201 "ipConfiguration": "A String", # Configuration for VM IPs.
1202 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1203 # are supported.
1204 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1205 # service will attempt to choose a reasonable default.
1206 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1207 # the service will use the network "default".
1208 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1209 # will attempt to choose a reasonable default.
1210 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1211 # attempt to choose a reasonable default.
1212 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1213 "a_key": "A String",
1214 },
1215 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1216 # Compute Engine API.
1217 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1218 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1219 # `TEARDOWN_NEVER`.
1220 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1221 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1222 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1223 # down.
1224 #
1225 # If the workers are not torn down by the service, they will
1226 # continue to run and use Google Compute Engine VM resources in the
1227 # user's project until they are explicitly terminated by the user.
1228 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1229 # policy except for small, manually supervised test jobs.
1230 #
1231 # If unknown or unspecified, the service will attempt to choose a reasonable
1232 # default.
1233 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1234 # service will choose a number of threads (according to the number of cores
1235 # on the selected machine type for batch, or 1 by convention for streaming).
1236 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1237 # the form "regions/REGION/subnetworks/SUBNETWORK".
1238 "poolArgs": { # Extra arguments for this worker pool.
1239 "a_key": "", # Properties of the object. Contains field @type with type URL.
1240 },
1241 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1242 # execute the job. If zero or unspecified, the service will
1243 # attempt to choose a reasonable default.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001244 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1245 # using the standard Dataflow task runner. Users should ignore
1246 # this field.
1247 "workflowFileName": "A String", # The file to store the workflow in.
1248 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1249 # will not be uploaded.
1250 #
1251 # The supported resource type is:
1252 #
1253 # Google Cloud Storage:
1254 # storage.googleapis.com/{bucket}/{object}
1255 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001256 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1257 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001258 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1259 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1260 "vmId": "A String", # The ID string of the VM.
1261 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1262 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001263 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1264 # access the Cloud Dataflow API.
1265 "A String",
1266 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001267 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1268 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1269 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1270 # "shuffle/v1beta1".
1271 "workerId": "A String", # The ID of the worker running this pipeline.
1272 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1273 #
1274 # When workers access Google Cloud APIs, they logically do so via
1275 # relative URLs. If this field is specified, it supplies the base
1276 # URL to use for resolving these relative URLs. The normative
1277 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1278 # Locators".
1279 #
1280 # If not specified, the default value is "http://www.googleapis.com/"
1281 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1282 # "dataflow/v1b3/projects".
1283 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1284 # storage.
1285 #
1286 # The supported resource type is:
1287 #
1288 # Google Cloud Storage:
1289 #
1290 # storage.googleapis.com/{bucket}/{object}
1291 # bucket.storage.googleapis.com/{object}
1292 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001293 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1294 # taskrunner; e.g. "wheel".
1295 "languageHint": "A String", # The suggested backend language.
1296 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1297 # console.
1298 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1299 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001300 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001301 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1302 #
1303 # When workers access Google Cloud APIs, they logically do so via
1304 # relative URLs. If this field is specified, it supplies the base
1305 # URL to use for resolving these relative URLs. The normative
1306 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1307 # Locators".
1308 #
1309 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001310 "harnessCommand": "A String", # The command to launch the worker harness.
1311 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1312 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001313 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001314 # The supported resource type is:
1315 #
1316 # Google Cloud Storage:
1317 # storage.googleapis.com/{bucket}/{object}
1318 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001319 },
Thomas Coffee2f245372017-03-27 10:39:26 -07001320 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1321 # select a default set of packages which are useful to worker
1322 # harnesses written in a particular language.
1323 "packages": [ # Packages to be installed on workers.
1324 { # The packages that must be installed in order for a worker to run the
1325 # steps of the Cloud Dataflow job that will be assigned to its worker
1326 # pool.
1327 #
1328 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1329 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1330 # might use this to install jars containing the user's code and all of the
1331 # various dependencies (libraries, data files, etc.) required in order
1332 # for that code to run.
1333 "name": "A String", # The name of the package.
1334 "location": "A String", # The resource to read the package from. The supported resource type is:
1335 #
1336 # Google Cloud Storage:
1337 #
1338 # storage.googleapis.com/{bucket}
1339 # bucket.storage.googleapis.com/
1340 },
1341 ],
1342 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1343 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1344 "algorithm": "A String", # The algorithm to use for autoscaling.
1345 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001346 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1347 { # Describes the data disk used by a workflow job.
1348 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001349 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1350 # attempt to choose a reasonable default.
1351 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1352 # must be a disk type appropriate to the project and zone in which
1353 # the workers will run. If unknown or unspecified, the service
1354 # will attempt to choose a reasonable default.
1355 #
1356 # For example, the standard persistent disk type is a resource name
1357 # typically ending in "pd-standard". If SSD persistent disks are
1358 # available, the resource name typically ends with "pd-ssd". The
1359 # actual valid values are defined the Google Compute Engine API,
1360 # not by the Cloud Dataflow API; consult the Google Compute Engine
1361 # documentation for more information about determining the set of
1362 # available disk types for a particular project and zone.
1363 #
1364 # Google Compute Engine Disk types are local to a particular
1365 # project in a particular zone, and so the resource name will
1366 # typically look something like this:
1367 #
1368 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001369 },
1370 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07001371 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001372 # attempt to choose a reasonable default.
1373 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1374 # harness, residing in Google Container Registry.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001375 },
1376 ],
1377 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001378 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1379 # A description of the user pipeline and stages through which it is executed.
1380 # Created by Cloud Dataflow service. Only retrieved with
1381 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1382 # form. This data is provided by the Dataflow service for ease of visualizing
1383 # the pipeline and interpretting Dataflow provided metrics.
1384 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1385 { # Description of the type, names/ids, and input/outputs for a transform.
1386 "kind": "A String", # Type of transform.
1387 "name": "A String", # User provided name for this transform instance.
1388 "inputCollectionName": [ # User names for all collection inputs to this transform.
1389 "A String",
1390 ],
1391 "displayData": [ # Transform-specific display data.
1392 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -07001393 "key": "A String", # The key identifying the display data.
1394 # This is intended to be used as a label for the display data
1395 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001396 "shortStrValue": "A String", # A possible additional shorter value to display.
1397 # For example a java_class_name_value of com.mypackage.MyDoFn
1398 # will be stored with MyDoFn as the short_str_value and
1399 # com.mypackage.MyDoFn as the java_class_name value.
1400 # short_str_value can be displayed and java_class_name_value
1401 # will be displayed as a tooltip.
1402 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1403 "url": "A String", # An optional full URL.
1404 "floatValue": 3.14, # Contains value if the data is of float type.
1405 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1406 # language namespace (i.e. python module) which defines the display data.
1407 # This allows a dax monitoring system to specially handle the data
1408 # and perform custom rendering.
1409 "javaClassValue": "A String", # Contains value if the data is of java class type.
1410 "label": "A String", # An optional label to display in a dax UI for the element.
1411 "boolValue": True or False, # Contains value if the data is of a boolean type.
1412 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -07001413 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001414 "int64Value": "A String", # Contains value if the data is of int64 type.
1415 },
1416 ],
1417 "outputCollectionName": [ # User names for all collection outputs to this transform.
1418 "A String",
1419 ],
1420 "id": "A String", # SDK generated id of this transform instance.
1421 },
1422 ],
1423 "displayData": [ # Pipeline level display data.
1424 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -07001425 "key": "A String", # The key identifying the display data.
1426 # This is intended to be used as a label for the display data
1427 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001428 "shortStrValue": "A String", # A possible additional shorter value to display.
1429 # For example a java_class_name_value of com.mypackage.MyDoFn
1430 # will be stored with MyDoFn as the short_str_value and
1431 # com.mypackage.MyDoFn as the java_class_name value.
1432 # short_str_value can be displayed and java_class_name_value
1433 # will be displayed as a tooltip.
1434 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1435 "url": "A String", # An optional full URL.
1436 "floatValue": 3.14, # Contains value if the data is of float type.
1437 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1438 # language namespace (i.e. python module) which defines the display data.
1439 # This allows a dax monitoring system to specially handle the data
1440 # and perform custom rendering.
1441 "javaClassValue": "A String", # Contains value if the data is of java class type.
1442 "label": "A String", # An optional label to display in a dax UI for the element.
1443 "boolValue": True or False, # Contains value if the data is of a boolean type.
1444 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -07001445 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001446 "int64Value": "A String", # Contains value if the data is of int64 type.
1447 },
1448 ],
1449 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1450 { # Description of the composing transforms, names/ids, and input/outputs of a
1451 # stage of execution. Some composing transforms and sources may have been
1452 # generated by the Dataflow service during execution planning.
1453 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1454 { # Description of an interstitial value between transforms in an execution
1455 # stage.
1456 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1457 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1458 # source is most closely associated.
1459 "name": "A String", # Dataflow service generated name for this source.
1460 },
1461 ],
1462 "kind": "A String", # Type of tranform this stage is executing.
1463 "name": "A String", # Dataflow service generated name for this stage.
1464 "outputSource": [ # Output sources for this stage.
1465 { # Description of an input or output of an execution stage.
1466 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1467 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1468 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -07001469 "name": "A String", # Dataflow service generated name for this source.
1470 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001471 },
1472 ],
1473 "inputSource": [ # Input sources for this stage.
1474 { # Description of an input or output of an execution stage.
1475 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1476 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1477 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -07001478 "name": "A String", # Dataflow service generated name for this source.
1479 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001480 },
1481 ],
1482 "componentTransform": [ # Transforms that comprise this execution stage.
1483 { # Description of a transform executed as part of an execution stage.
1484 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1485 "originalTransform": "A String", # User name for the original user transform with which this transform is
1486 # most closely associated.
1487 "name": "A String", # Dataflow service generated name for this source.
1488 },
1489 ],
1490 "id": "A String", # Dataflow service generated id for this stage.
1491 },
1492 ],
1493 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001494 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001495 { # Defines a particular step within a Cloud Dataflow job.
1496 #
1497 # A job consists of multiple steps, each of which performs some
1498 # specific operation as part of the overall job. Data is typically
1499 # passed from one step to another as part of the job.
1500 #
1501 # Here's an example of a sequence of steps which together implement a
1502 # Map-Reduce job:
1503 #
1504 # * Read a collection of data from some source, parsing the
1505 # collection's elements.
1506 #
1507 # * Validate the elements.
1508 #
1509 # * Apply a user-defined function to map each element to some value
1510 # and extract an element-specific key value.
1511 #
1512 # * Group elements with the same key into a single element with
1513 # that key, transforming a multiply-keyed collection into a
1514 # uniquely-keyed collection.
1515 #
1516 # * Write the elements out to some data sink.
1517 #
1518 # Note that the Cloud Dataflow service may be used to run many different
1519 # types of jobs, not just Map-Reduce.
1520 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1521 "properties": { # Named properties associated with the step. Each kind of
1522 # predefined step has its own required set of properties.
1523 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001524 "a_key": "", # Properties of the object.
1525 },
Thomas Coffee2f245372017-03-27 10:39:26 -07001526 "name": "A String", # The name that identifies the step. This must be unique for each
1527 # step with respect to all other steps in the Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001528 },
1529 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07001530 "currentState": "A String", # The current state of the job.
1531 #
1532 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1533 # specified.
1534 #
1535 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1536 # terminal state. After a job has reached a terminal state, no
1537 # further state updates may be made.
1538 #
1539 # This field may be mutated by the Cloud Dataflow service;
1540 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001541 "tempFiles": [ # A set of files the system should be aware of that are used
1542 # for temporary storage. These temporary files will be
1543 # removed on job completion.
1544 # No duplicates are allowed.
1545 # No file patterns are supported.
1546 #
1547 # The supported files are:
1548 #
1549 # Google Cloud Storage:
1550 #
1551 # storage.googleapis.com/{bucket}/{object}
1552 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001553 "A String",
1554 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001555 "type": "A String", # The type of Cloud Dataflow job.
1556 "id": "A String", # The unique ID of this job.
1557 #
1558 # This field is set by the Cloud Dataflow service when the Job is
1559 # created, and is immutable for the life of the job.
Thomas Coffee2f245372017-03-27 10:39:26 -07001560 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1561 # of the job it replaced.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001562 #
Thomas Coffee2f245372017-03-27 10:39:26 -07001563 # When sending a `CreateJobRequest`, you can update a job by specifying it
1564 # here. The job named here is stopped, and its intermediate state is
1565 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001566 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1567 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001568 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001569 "a_key": { # Contains information about how a particular
1570 # google.dataflow.v1beta3.Step will be executed.
1571 "stepName": [ # The steps associated with the execution stage.
1572 # Note that stages may have several steps, and that a given step
1573 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001574 "A String",
1575 ],
1576 },
1577 },
1578 },
1579 }</pre>
1580</div>
1581
1582<div class="method">
1583 <code class="details" id="getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</code>
1584 <pre>Request the job status.
1585
1586Args:
1587 projectId: string, A project id. (required)
1588 location: string, The location which contains the job specified by job_id. (required)
1589 jobId: string, The job to get messages for. (required)
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001590 startTime: string, Return only metric data that has changed since this time.
1591Default is to return all information about all metrics for the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001592 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001593 Allowed values
1594 1 - v1 error format
1595 2 - v2 error format
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001596
1597Returns:
1598 An object of the form:
1599
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001600 { # JobMetrics contains a collection of metrics descibing the detailed progress
1601 # of a Dataflow job. Metrics correspond to user-defined and system-defined
1602 # metrics in the job.
1603 #
1604 # This resource captures only the most recent values of each metric;
1605 # time-series data can be queried for them (under the same metric names)
1606 # from Cloud Monitoring.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001607 "metrics": [ # All metrics for this job.
1608 { # Describes the state of a metric.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001609 "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1610 # This holds the count of the aggregated values and is used in combination
1611 # with mean_sum above to obtain the actual mean aggregate value.
1612 # The only possible value type is Long.
1613 "kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are
1614 # "Sum", "Max", "Min", "Mean", "Set", "And", and "Or".
1615 # The specified aggregation kind is case-insensitive.
1616 #
1617 # If omitted, this is not an aggregated value but instead
1618 # a single metric sample value.
1619 "set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only
1620 # possible value type is a list of Values whose type can be Long, Double,
1621 # or String, according to the metric's type. All Values in the list must
1622 # be of the same type.
1623 "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
1624 # metric.
1625 "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
1626 # will be "dataflow" for metrics defined by the Dataflow service or SDK.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001627 "name": "A String", # Worker-defined metric name.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001628 "context": { # Zero or more labeled fields which identify the part of the job this
1629 # metric is associated with, such as the name of a step or collection.
1630 #
1631 # For example, built-in counters associated with steps will have
1632 # context['step'] = <step-name>. Counters associated with PCollections
1633 # in the SDK will have context['pcollection'] = <pcollection-name>.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001634 "a_key": "A String",
1635 },
1636 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001637 "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1638 # This holds the sum of the aggregated values and is used in combination
1639 # with mean_count below to obtain the actual mean aggregate value.
1640 # The only possible value types are Long and Double.
1641 "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
1642 # value accumulated since the worker started working on this WorkItem.
1643 # By default this is false, indicating that this metric is reported
1644 # as a delta that is not associated with any WorkItem.
1645 "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
1646 # reporting work progress; it will be filled in responses from the
1647 # metrics API.
1648 "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
1649 # "And", and "Or". The possible value types are Long, Double, and Boolean.
1650 "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
1651 # service.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001652 },
1653 ],
1654 "metricTime": "A String", # Timestamp as of which metric values are current.
1655 }</pre>
1656</div>
1657
1658<div class="method">
1659 <code class="details" id="list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001660 <pre>List the jobs of a project.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001661
1662Args:
1663 projectId: string, The project which owns the jobs. (required)
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001664 location: string, The location that contains this job. (required)
1665 pageSize: integer, If there are many jobs, limit response to at most this many.
1666The actual number of jobs returned will be the lesser of max_responses
1667and an unspecified server-defined limit.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001668 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001669 Allowed values
1670 1 - v1 error format
1671 2 - v2 error format
1672 pageToken: string, Set this to the 'next_page_token' field of a previous response
1673to request additional results in a long list.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001674 filter: string, The kind of filter to use.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001675 view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001676
1677Returns:
1678 An object of the form:
1679
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001680 { # Response to a request to list Cloud Dataflow jobs. This may be a partial
1681 # response, depending on the page size in the ListJobsRequest.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001682 "nextPageToken": "A String", # Set if there may be more results than fit in this response.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001683 "jobs": [ # A subset of the requested job information.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001684 { # Defines a job to be run by the Cloud Dataflow service.
1685 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1686 # If this field is set, the service will ensure its uniqueness.
1687 # The request to create a job will fail if the service has knowledge of a
1688 # previously submitted job with the same client's ID and job name.
1689 # The caller may use this field to ensure idempotence of job
1690 # creation across retried attempts to create a job.
1691 # By default, the field is empty and, in that case, the service ignores it.
1692 "requestedState": "A String", # The job's requested state.
1693 #
1694 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1695 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1696 # also be used to directly set a job's requested state to
1697 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1698 # job if it has not already reached a terminal state.
1699 "name": "A String", # The user-specified Cloud Dataflow job name.
1700 #
1701 # Only one Job with a given name may exist in a project at any
1702 # given time. If a caller attempts to create a Job with the same
1703 # name as an already-existing Job, the attempt returns the
1704 # existing Job.
1705 #
1706 # The name must match the regular expression
1707 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1708 "currentStateTime": "A String", # The timestamp associated with the current state.
1709 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1710 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1711 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1712 "labels": { # User-defined labels for this job.
1713 #
1714 # The labels map can contain no more than 64 entries. Entries of the labels
1715 # map are UTF8 strings that comply with the following restrictions:
1716 #
1717 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
1718 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1719 # * Both keys and values are additionally constrained to be <= 128 bytes in
1720 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001721 "a_key": "A String",
1722 },
Thomas Coffee2f245372017-03-27 10:39:26 -07001723 "location": "A String", # The location that contains this job.
1724 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1725 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001726 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1727 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001728 "a_key": "A String",
1729 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001730 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1731 "version": { # A structure describing which components and their versions of the service
1732 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001733 "a_key": "", # Properties of the object.
1734 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001735 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1736 # storage. The system will append the suffix "/temp-{JOBNAME} to
1737 # this resource prefix, where {JOBNAME} is the value of the
1738 # job_name field. The resulting bucket and object prefix is used
1739 # as the prefix of the resources used to store temporary data
1740 # needed during the job execution. NOTE: This will override the
1741 # value in taskrunner_settings.
1742 # The supported resource type is:
1743 #
1744 # Google Cloud Storage:
1745 #
1746 # storage.googleapis.com/{bucket}/{object}
1747 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001748 "internalExperiments": { # Experimental settings.
1749 "a_key": "", # Properties of the object. Contains field @type with type URL.
1750 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001751 "dataset": "A String", # The dataset for the current project where various workflow
1752 # related tables are stored.
1753 #
1754 # The supported resource type is:
1755 #
1756 # Google BigQuery:
1757 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001758 "experiments": [ # The list of experiments to enable.
1759 "A String",
1760 ],
1761 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001762 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1763 # options are passed through the service and are used to recreate the
1764 # SDK pipeline options on the worker in a language agnostic and platform
1765 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001766 "a_key": "", # Properties of the object.
1767 },
1768 "userAgent": { # A description of the process that generated the request.
1769 "a_key": "", # Properties of the object.
1770 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001771 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1772 # unspecified, the service will attempt to choose a reasonable
1773 # default. This should be in the form of the API service name,
1774 # e.g. "compute.googleapis.com".
1775 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1776 # specified in order for the job to have workers.
1777 { # Describes one particular pool of Cloud Dataflow workers to be
1778 # instantiated by the Cloud Dataflow service in order to perform the
1779 # computations required by a job. Note that a workflow job may use
1780 # multiple pools, in order to match the various computational
1781 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001782 "diskSourceImage": "A String", # Fully qualified source image for disks.
Thomas Coffee2f245372017-03-27 10:39:26 -07001783 "ipConfiguration": "A String", # Configuration for VM IPs.
1784 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1785 # are supported.
1786 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1787 # service will attempt to choose a reasonable default.
1788 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1789 # the service will use the network "default".
1790 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1791 # will attempt to choose a reasonable default.
1792 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1793 # attempt to choose a reasonable default.
1794 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1795 "a_key": "A String",
1796 },
1797 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1798 # Compute Engine API.
1799 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1800 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1801 # `TEARDOWN_NEVER`.
1802 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1803 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1804 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1805 # down.
1806 #
1807 # If the workers are not torn down by the service, they will
1808 # continue to run and use Google Compute Engine VM resources in the
1809 # user's project until they are explicitly terminated by the user.
1810 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1811 # policy except for small, manually supervised test jobs.
1812 #
1813 # If unknown or unspecified, the service will attempt to choose a reasonable
1814 # default.
1815 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1816 # service will choose a number of threads (according to the number of cores
1817 # on the selected machine type for batch, or 1 by convention for streaming).
1818 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1819 # the form "regions/REGION/subnetworks/SUBNETWORK".
1820 "poolArgs": { # Extra arguments for this worker pool.
1821 "a_key": "", # Properties of the object. Contains field @type with type URL.
1822 },
1823 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1824 # execute the job. If zero or unspecified, the service will
1825 # attempt to choose a reasonable default.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001826 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1827 # using the standard Dataflow task runner. Users should ignore
1828 # this field.
1829 "workflowFileName": "A String", # The file to store the workflow in.
1830 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1831 # will not be uploaded.
1832 #
1833 # The supported resource type is:
1834 #
1835 # Google Cloud Storage:
1836 # storage.googleapis.com/{bucket}/{object}
1837 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001838 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1839 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001840 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1841 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1842 "vmId": "A String", # The ID string of the VM.
1843 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1844 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001845 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1846 # access the Cloud Dataflow API.
1847 "A String",
1848 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001849 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1850 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1851 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1852 # "shuffle/v1beta1".
1853 "workerId": "A String", # The ID of the worker running this pipeline.
1854 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1855 #
1856 # When workers access Google Cloud APIs, they logically do so via
1857 # relative URLs. If this field is specified, it supplies the base
1858 # URL to use for resolving these relative URLs. The normative
1859 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1860 # Locators".
1861 #
1862 # If not specified, the default value is "http://www.googleapis.com/"
1863 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1864 # "dataflow/v1b3/projects".
1865 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1866 # storage.
1867 #
1868 # The supported resource type is:
1869 #
1870 # Google Cloud Storage:
1871 #
1872 # storage.googleapis.com/{bucket}/{object}
1873 # bucket.storage.googleapis.com/{object}
1874 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001875 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1876 # taskrunner; e.g. "wheel".
1877 "languageHint": "A String", # The suggested backend language.
1878 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1879 # console.
1880 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1881 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001882 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001883 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1884 #
1885 # When workers access Google Cloud APIs, they logically do so via
1886 # relative URLs. If this field is specified, it supplies the base
1887 # URL to use for resolving these relative URLs. The normative
1888 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1889 # Locators".
1890 #
1891 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001892 "harnessCommand": "A String", # The command to launch the worker harness.
1893 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1894 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001895 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001896 # The supported resource type is:
1897 #
1898 # Google Cloud Storage:
1899 # storage.googleapis.com/{bucket}/{object}
1900 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001901 },
Thomas Coffee2f245372017-03-27 10:39:26 -07001902 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1903 # select a default set of packages which are useful to worker
1904 # harnesses written in a particular language.
1905 "packages": [ # Packages to be installed on workers.
1906 { # The packages that must be installed in order for a worker to run the
1907 # steps of the Cloud Dataflow job that will be assigned to its worker
1908 # pool.
1909 #
1910 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1911 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1912 # might use this to install jars containing the user's code and all of the
1913 # various dependencies (libraries, data files, etc.) required in order
1914 # for that code to run.
1915 "name": "A String", # The name of the package.
1916 "location": "A String", # The resource to read the package from. The supported resource type is:
1917 #
1918 # Google Cloud Storage:
1919 #
1920 # storage.googleapis.com/{bucket}
1921 # bucket.storage.googleapis.com/
1922 },
1923 ],
1924 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1925 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1926 "algorithm": "A String", # The algorithm to use for autoscaling.
1927 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001928 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1929 { # Describes the data disk used by a workflow job.
1930 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001931 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1932 # attempt to choose a reasonable default.
1933 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1934 # must be a disk type appropriate to the project and zone in which
1935 # the workers will run. If unknown or unspecified, the service
1936 # will attempt to choose a reasonable default.
1937 #
1938 # For example, the standard persistent disk type is a resource name
1939 # typically ending in "pd-standard". If SSD persistent disks are
1940 # available, the resource name typically ends with "pd-ssd". The
1941 # actual valid values are defined the Google Compute Engine API,
1942 # not by the Cloud Dataflow API; consult the Google Compute Engine
1943 # documentation for more information about determining the set of
1944 # available disk types for a particular project and zone.
1945 #
1946 # Google Compute Engine Disk types are local to a particular
1947 # project in a particular zone, and so the resource name will
1948 # typically look something like this:
1949 #
1950 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001951 },
1952 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07001953 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001954 # attempt to choose a reasonable default.
1955 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1956 # harness, residing in Google Container Registry.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001957 },
1958 ],
1959 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001960 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1961 # A description of the user pipeline and stages through which it is executed.
1962 # Created by Cloud Dataflow service. Only retrieved with
1963 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1964 # form. This data is provided by the Dataflow service for ease of visualizing
1965 # the pipeline and interpretting Dataflow provided metrics.
1966 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1967 { # Description of the type, names/ids, and input/outputs for a transform.
1968 "kind": "A String", # Type of transform.
1969 "name": "A String", # User provided name for this transform instance.
1970 "inputCollectionName": [ # User names for all collection inputs to this transform.
1971 "A String",
1972 ],
1973 "displayData": [ # Transform-specific display data.
1974 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -07001975 "key": "A String", # The key identifying the display data.
1976 # This is intended to be used as a label for the display data
1977 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001978 "shortStrValue": "A String", # A possible additional shorter value to display.
1979 # For example a java_class_name_value of com.mypackage.MyDoFn
1980 # will be stored with MyDoFn as the short_str_value and
1981 # com.mypackage.MyDoFn as the java_class_name value.
1982 # short_str_value can be displayed and java_class_name_value
1983 # will be displayed as a tooltip.
1984 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1985 "url": "A String", # An optional full URL.
1986 "floatValue": 3.14, # Contains value if the data is of float type.
1987 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1988 # language namespace (i.e. python module) which defines the display data.
1989 # This allows a dax monitoring system to specially handle the data
1990 # and perform custom rendering.
1991 "javaClassValue": "A String", # Contains value if the data is of java class type.
1992 "label": "A String", # An optional label to display in a dax UI for the element.
1993 "boolValue": True or False, # Contains value if the data is of a boolean type.
1994 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -07001995 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001996 "int64Value": "A String", # Contains value if the data is of int64 type.
1997 },
1998 ],
1999 "outputCollectionName": [ # User names for all collection outputs to this transform.
2000 "A String",
2001 ],
2002 "id": "A String", # SDK generated id of this transform instance.
2003 },
2004 ],
2005 "displayData": [ # Pipeline level display data.
2006 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -07002007 "key": "A String", # The key identifying the display data.
2008 # This is intended to be used as a label for the display data
2009 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002010 "shortStrValue": "A String", # A possible additional shorter value to display.
2011 # For example a java_class_name_value of com.mypackage.MyDoFn
2012 # will be stored with MyDoFn as the short_str_value and
2013 # com.mypackage.MyDoFn as the java_class_name value.
2014 # short_str_value can be displayed and java_class_name_value
2015 # will be displayed as a tooltip.
2016 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2017 "url": "A String", # An optional full URL.
2018 "floatValue": 3.14, # Contains value if the data is of float type.
2019 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2020 # language namespace (i.e. python module) which defines the display data.
2021 # This allows a dax monitoring system to specially handle the data
2022 # and perform custom rendering.
2023 "javaClassValue": "A String", # Contains value if the data is of java class type.
2024 "label": "A String", # An optional label to display in a dax UI for the element.
2025 "boolValue": True or False, # Contains value if the data is of a boolean type.
2026 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -07002027 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002028 "int64Value": "A String", # Contains value if the data is of int64 type.
2029 },
2030 ],
2031 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2032 { # Description of the composing transforms, names/ids, and input/outputs of a
2033 # stage of execution. Some composing transforms and sources may have been
2034 # generated by the Dataflow service during execution planning.
2035 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2036 { # Description of an interstitial value between transforms in an execution
2037 # stage.
2038 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2039 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2040 # source is most closely associated.
2041 "name": "A String", # Dataflow service generated name for this source.
2042 },
2043 ],
2044 "kind": "A String", # Type of tranform this stage is executing.
2045 "name": "A String", # Dataflow service generated name for this stage.
2046 "outputSource": [ # Output sources for this stage.
2047 { # Description of an input or output of an execution stage.
2048 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2049 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2050 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -07002051 "name": "A String", # Dataflow service generated name for this source.
2052 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002053 },
2054 ],
2055 "inputSource": [ # Input sources for this stage.
2056 { # Description of an input or output of an execution stage.
2057 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2058 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2059 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -07002060 "name": "A String", # Dataflow service generated name for this source.
2061 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002062 },
2063 ],
2064 "componentTransform": [ # Transforms that comprise this execution stage.
2065 { # Description of a transform executed as part of an execution stage.
2066 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2067 "originalTransform": "A String", # User name for the original user transform with which this transform is
2068 # most closely associated.
2069 "name": "A String", # Dataflow service generated name for this source.
2070 },
2071 ],
2072 "id": "A String", # Dataflow service generated id for this stage.
2073 },
2074 ],
2075 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002076 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002077 { # Defines a particular step within a Cloud Dataflow job.
2078 #
2079 # A job consists of multiple steps, each of which performs some
2080 # specific operation as part of the overall job. Data is typically
2081 # passed from one step to another as part of the job.
2082 #
2083 # Here's an example of a sequence of steps which together implement a
2084 # Map-Reduce job:
2085 #
2086 # * Read a collection of data from some source, parsing the
2087 # collection's elements.
2088 #
2089 # * Validate the elements.
2090 #
2091 # * Apply a user-defined function to map each element to some value
2092 # and extract an element-specific key value.
2093 #
2094 # * Group elements with the same key into a single element with
2095 # that key, transforming a multiply-keyed collection into a
2096 # uniquely-keyed collection.
2097 #
2098 # * Write the elements out to some data sink.
2099 #
2100 # Note that the Cloud Dataflow service may be used to run many different
2101 # types of jobs, not just Map-Reduce.
2102 "kind": "A String", # The kind of step in the Cloud Dataflow job.
2103 "properties": { # Named properties associated with the step. Each kind of
2104 # predefined step has its own required set of properties.
2105 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002106 "a_key": "", # Properties of the object.
2107 },
Thomas Coffee2f245372017-03-27 10:39:26 -07002108 "name": "A String", # The name that identifies the step. This must be unique for each
2109 # step with respect to all other steps in the Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002110 },
2111 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07002112 "currentState": "A String", # The current state of the job.
2113 #
2114 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2115 # specified.
2116 #
2117 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2118 # terminal state. After a job has reached a terminal state, no
2119 # further state updates may be made.
2120 #
2121 # This field may be mutated by the Cloud Dataflow service;
2122 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002123 "tempFiles": [ # A set of files the system should be aware of that are used
2124 # for temporary storage. These temporary files will be
2125 # removed on job completion.
2126 # No duplicates are allowed.
2127 # No file patterns are supported.
2128 #
2129 # The supported files are:
2130 #
2131 # Google Cloud Storage:
2132 #
2133 # storage.googleapis.com/{bucket}/{object}
2134 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002135 "A String",
2136 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002137 "type": "A String", # The type of Cloud Dataflow job.
2138 "id": "A String", # The unique ID of this job.
2139 #
2140 # This field is set by the Cloud Dataflow service when the Job is
2141 # created, and is immutable for the life of the job.
Thomas Coffee2f245372017-03-27 10:39:26 -07002142 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2143 # of the job it replaced.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002144 #
Thomas Coffee2f245372017-03-27 10:39:26 -07002145 # When sending a `CreateJobRequest`, you can update a job by specifying it
2146 # here. The job named here is stopped, and its intermediate state is
2147 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002148 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2149 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002150 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002151 "a_key": { # Contains information about how a particular
2152 # google.dataflow.v1beta3.Step will be executed.
2153 "stepName": [ # The steps associated with the execution stage.
2154 # Note that stages may have several steps, and that a given step
2155 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002156 "A String",
2157 ],
2158 },
2159 },
2160 },
2161 },
2162 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002163 "failedLocation": [ # Zero or more messages describing locations that failed to respond.
2164 { # Indicates which location failed to respond to a request for data.
2165 "name": "A String", # The name of the failed location.
2166 },
2167 ],
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002168 }</pre>
2169</div>
2170
2171<div class="method">
2172 <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
2173 <pre>Retrieves the next page of results.
2174
2175Args:
2176 previous_request: The request for the previous page. (required)
2177 previous_response: The response from the request for the previous page. (required)
2178
2179Returns:
2180 A request object that you can call 'execute()' on to request the next
2181 page. Returns None if there are no more items in the collection.
2182 </pre>
2183</div>
2184
2185<div class="method">
2186 <code class="details" id="update">update(projectId, location, jobId, body, x__xgafv=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002187 <pre>Updates the state of an existing Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002188
2189Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002190 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
2191 location: string, The location that contains this job. (required)
2192 jobId: string, The job ID. (required)
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002193 body: object, The request body. (required)
2194 The object takes the form of:
2195
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002196{ # Defines a job to be run by the Cloud Dataflow service.
2197 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2198 # If this field is set, the service will ensure its uniqueness.
2199 # The request to create a job will fail if the service has knowledge of a
2200 # previously submitted job with the same client's ID and job name.
2201 # The caller may use this field to ensure idempotence of job
2202 # creation across retried attempts to create a job.
2203 # By default, the field is empty and, in that case, the service ignores it.
2204 "requestedState": "A String", # The job's requested state.
2205 #
2206 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2207 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
2208 # also be used to directly set a job's requested state to
2209 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2210 # job if it has not already reached a terminal state.
2211 "name": "A String", # The user-specified Cloud Dataflow job name.
2212 #
2213 # Only one Job with a given name may exist in a project at any
2214 # given time. If a caller attempts to create a Job with the same
2215 # name as an already-existing Job, the attempt returns the
2216 # existing Job.
2217 #
2218 # The name must match the regular expression
2219 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2220 "currentStateTime": "A String", # The timestamp associated with the current state.
2221 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2222 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2223 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2224 "labels": { # User-defined labels for this job.
2225 #
2226 # The labels map can contain no more than 64 entries. Entries of the labels
2227 # map are UTF8 strings that comply with the following restrictions:
2228 #
2229 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
2230 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2231 # * Both keys and values are additionally constrained to be <= 128 bytes in
2232 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002233 "a_key": "A String",
2234 },
Thomas Coffee2f245372017-03-27 10:39:26 -07002235 "location": "A String", # The location that contains this job.
2236 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2237 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002238 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2239 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002240 "a_key": "A String",
2241 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002242 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2243 "version": { # A structure describing which components and their versions of the service
2244 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002245 "a_key": "", # Properties of the object.
2246 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002247 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2248 # storage. The system will append the suffix "/temp-{JOBNAME} to
2249 # this resource prefix, where {JOBNAME} is the value of the
2250 # job_name field. The resulting bucket and object prefix is used
2251 # as the prefix of the resources used to store temporary data
2252 # needed during the job execution. NOTE: This will override the
2253 # value in taskrunner_settings.
2254 # The supported resource type is:
2255 #
2256 # Google Cloud Storage:
2257 #
2258 # storage.googleapis.com/{bucket}/{object}
2259 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002260 "internalExperiments": { # Experimental settings.
2261 "a_key": "", # Properties of the object. Contains field @type with type URL.
2262 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002263 "dataset": "A String", # The dataset for the current project where various workflow
2264 # related tables are stored.
2265 #
2266 # The supported resource type is:
2267 #
2268 # Google BigQuery:
2269 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002270 "experiments": [ # The list of experiments to enable.
2271 "A String",
2272 ],
2273 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002274 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2275 # options are passed through the service and are used to recreate the
2276 # SDK pipeline options on the worker in a language agnostic and platform
2277 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002278 "a_key": "", # Properties of the object.
2279 },
2280 "userAgent": { # A description of the process that generated the request.
2281 "a_key": "", # Properties of the object.
2282 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002283 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
2284 # unspecified, the service will attempt to choose a reasonable
2285 # default. This should be in the form of the API service name,
2286 # e.g. "compute.googleapis.com".
2287 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2288 # specified in order for the job to have workers.
2289 { # Describes one particular pool of Cloud Dataflow workers to be
2290 # instantiated by the Cloud Dataflow service in order to perform the
2291 # computations required by a job. Note that a workflow job may use
2292 # multiple pools, in order to match the various computational
2293 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002294 "diskSourceImage": "A String", # Fully qualified source image for disks.
Thomas Coffee2f245372017-03-27 10:39:26 -07002295 "ipConfiguration": "A String", # Configuration for VM IPs.
2296 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2297 # are supported.
2298 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
2299 # service will attempt to choose a reasonable default.
2300 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
2301 # the service will use the network "default".
2302 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
2303 # will attempt to choose a reasonable default.
2304 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
2305 # attempt to choose a reasonable default.
2306 "metadata": { # Metadata to set on the Google Compute Engine VMs.
2307 "a_key": "A String",
2308 },
2309 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2310 # Compute Engine API.
2311 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2312 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2313 # `TEARDOWN_NEVER`.
2314 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2315 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2316 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2317 # down.
2318 #
2319 # If the workers are not torn down by the service, they will
2320 # continue to run and use Google Compute Engine VM resources in the
2321 # user's project until they are explicitly terminated by the user.
2322 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2323 # policy except for small, manually supervised test jobs.
2324 #
2325 # If unknown or unspecified, the service will attempt to choose a reasonable
2326 # default.
2327 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2328 # service will choose a number of threads (according to the number of cores
2329 # on the selected machine type for batch, or 1 by convention for streaming).
2330 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
2331 # the form "regions/REGION/subnetworks/SUBNETWORK".
2332 "poolArgs": { # Extra arguments for this worker pool.
2333 "a_key": "", # Properties of the object. Contains field @type with type URL.
2334 },
2335 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2336 # execute the job. If zero or unspecified, the service will
2337 # attempt to choose a reasonable default.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002338 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2339 # using the standard Dataflow task runner. Users should ignore
2340 # this field.
2341 "workflowFileName": "A String", # The file to store the workflow in.
2342 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
2343 # will not be uploaded.
2344 #
2345 # The supported resource type is:
2346 #
2347 # Google Cloud Storage:
2348 # storage.googleapis.com/{bucket}/{object}
2349 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002350 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2351 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002352 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2353 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2354 "vmId": "A String", # The ID string of the VM.
2355 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2356 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002357 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2358 # access the Cloud Dataflow API.
2359 "A String",
2360 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002361 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2362 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2363 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2364 # "shuffle/v1beta1".
2365 "workerId": "A String", # The ID of the worker running this pipeline.
2366 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2367 #
2368 # When workers access Google Cloud APIs, they logically do so via
2369 # relative URLs. If this field is specified, it supplies the base
2370 # URL to use for resolving these relative URLs. The normative
2371 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2372 # Locators".
2373 #
2374 # If not specified, the default value is "http://www.googleapis.com/"
2375 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2376 # "dataflow/v1b3/projects".
2377 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2378 # storage.
2379 #
2380 # The supported resource type is:
2381 #
2382 # Google Cloud Storage:
2383 #
2384 # storage.googleapis.com/{bucket}/{object}
2385 # bucket.storage.googleapis.com/{object}
2386 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002387 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2388 # taskrunner; e.g. "wheel".
2389 "languageHint": "A String", # The suggested backend language.
2390 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2391 # console.
2392 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2393 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002394 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002395 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2396 #
2397 # When workers access Google Cloud APIs, they logically do so via
2398 # relative URLs. If this field is specified, it supplies the base
2399 # URL to use for resolving these relative URLs. The normative
2400 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2401 # Locators".
2402 #
2403 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002404 "harnessCommand": "A String", # The command to launch the worker harness.
2405 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2406 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002407 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002408 # The supported resource type is:
2409 #
2410 # Google Cloud Storage:
2411 # storage.googleapis.com/{bucket}/{object}
2412 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002413 },
Thomas Coffee2f245372017-03-27 10:39:26 -07002414 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
2415 # select a default set of packages which are useful to worker
2416 # harnesses written in a particular language.
2417 "packages": [ # Packages to be installed on workers.
2418 { # The packages that must be installed in order for a worker to run the
2419 # steps of the Cloud Dataflow job that will be assigned to its worker
2420 # pool.
2421 #
2422 # This is the mechanism by which the Cloud Dataflow SDK causes code to
2423 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2424 # might use this to install jars containing the user's code and all of the
2425 # various dependencies (libraries, data files, etc.) required in order
2426 # for that code to run.
2427 "name": "A String", # The name of the package.
2428 "location": "A String", # The resource to read the package from. The supported resource type is:
2429 #
2430 # Google Cloud Storage:
2431 #
2432 # storage.googleapis.com/{bucket}
2433 # bucket.storage.googleapis.com/
2434 },
2435 ],
2436 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2437 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2438 "algorithm": "A String", # The algorithm to use for autoscaling.
2439 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002440 "dataDisks": [ # Data disks that are used by a VM in this workflow.
2441 { # Describes the data disk used by a workflow job.
2442 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002443 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
2444 # attempt to choose a reasonable default.
2445 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
2446 # must be a disk type appropriate to the project and zone in which
2447 # the workers will run. If unknown or unspecified, the service
2448 # will attempt to choose a reasonable default.
2449 #
2450 # For example, the standard persistent disk type is a resource name
2451 # typically ending in "pd-standard". If SSD persistent disks are
2452 # available, the resource name typically ends with "pd-ssd". The
2453 # actual valid values are defined the Google Compute Engine API,
2454 # not by the Cloud Dataflow API; consult the Google Compute Engine
2455 # documentation for more information about determining the set of
2456 # available disk types for a particular project and zone.
2457 #
2458 # Google Compute Engine Disk types are local to a particular
2459 # project in a particular zone, and so the resource name will
2460 # typically look something like this:
2461 #
2462 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002463 },
2464 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07002465 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002466 # attempt to choose a reasonable default.
2467 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2468 # harness, residing in Google Container Registry.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002469 },
2470 ],
2471 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002472 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2473 # A description of the user pipeline and stages through which it is executed.
2474 # Created by Cloud Dataflow service. Only retrieved with
2475 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2476 # form. This data is provided by the Dataflow service for ease of visualizing
2477 # the pipeline and interpretting Dataflow provided metrics.
2478 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2479 { # Description of the type, names/ids, and input/outputs for a transform.
2480 "kind": "A String", # Type of transform.
2481 "name": "A String", # User provided name for this transform instance.
2482 "inputCollectionName": [ # User names for all collection inputs to this transform.
2483 "A String",
2484 ],
2485 "displayData": [ # Transform-specific display data.
2486 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -07002487 "key": "A String", # The key identifying the display data.
2488 # This is intended to be used as a label for the display data
2489 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002490 "shortStrValue": "A String", # A possible additional shorter value to display.
2491 # For example a java_class_name_value of com.mypackage.MyDoFn
2492 # will be stored with MyDoFn as the short_str_value and
2493 # com.mypackage.MyDoFn as the java_class_name value.
2494 # short_str_value can be displayed and java_class_name_value
2495 # will be displayed as a tooltip.
2496 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2497 "url": "A String", # An optional full URL.
2498 "floatValue": 3.14, # Contains value if the data is of float type.
2499 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2500 # language namespace (i.e. python module) which defines the display data.
2501 # This allows a dax monitoring system to specially handle the data
2502 # and perform custom rendering.
2503 "javaClassValue": "A String", # Contains value if the data is of java class type.
2504 "label": "A String", # An optional label to display in a dax UI for the element.
2505 "boolValue": True or False, # Contains value if the data is of a boolean type.
2506 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -07002507 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002508 "int64Value": "A String", # Contains value if the data is of int64 type.
2509 },
2510 ],
2511 "outputCollectionName": [ # User names for all collection outputs to this transform.
2512 "A String",
2513 ],
2514 "id": "A String", # SDK generated id of this transform instance.
2515 },
2516 ],
2517 "displayData": [ # Pipeline level display data.
2518 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -07002519 "key": "A String", # The key identifying the display data.
2520 # This is intended to be used as a label for the display data
2521 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002522 "shortStrValue": "A String", # A possible additional shorter value to display.
2523 # For example a java_class_name_value of com.mypackage.MyDoFn
2524 # will be stored with MyDoFn as the short_str_value and
2525 # com.mypackage.MyDoFn as the java_class_name value.
2526 # short_str_value can be displayed and java_class_name_value
2527 # will be displayed as a tooltip.
2528 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2529 "url": "A String", # An optional full URL.
2530 "floatValue": 3.14, # Contains value if the data is of float type.
2531 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2532 # language namespace (i.e. python module) which defines the display data.
2533 # This allows a dax monitoring system to specially handle the data
2534 # and perform custom rendering.
2535 "javaClassValue": "A String", # Contains value if the data is of java class type.
2536 "label": "A String", # An optional label to display in a dax UI for the element.
2537 "boolValue": True or False, # Contains value if the data is of a boolean type.
2538 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -07002539 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002540 "int64Value": "A String", # Contains value if the data is of int64 type.
2541 },
2542 ],
2543 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2544 { # Description of the composing transforms, names/ids, and input/outputs of a
2545 # stage of execution. Some composing transforms and sources may have been
2546 # generated by the Dataflow service during execution planning.
2547 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2548 { # Description of an interstitial value between transforms in an execution
2549 # stage.
2550 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2551 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2552 # source is most closely associated.
2553 "name": "A String", # Dataflow service generated name for this source.
2554 },
2555 ],
2556 "kind": "A String", # Type of tranform this stage is executing.
2557 "name": "A String", # Dataflow service generated name for this stage.
2558 "outputSource": [ # Output sources for this stage.
2559 { # Description of an input or output of an execution stage.
2560 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2561 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2562 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -07002563 "name": "A String", # Dataflow service generated name for this source.
2564 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002565 },
2566 ],
2567 "inputSource": [ # Input sources for this stage.
2568 { # Description of an input or output of an execution stage.
2569 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2570 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2571 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -07002572 "name": "A String", # Dataflow service generated name for this source.
2573 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002574 },
2575 ],
2576 "componentTransform": [ # Transforms that comprise this execution stage.
2577 { # Description of a transform executed as part of an execution stage.
2578 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2579 "originalTransform": "A String", # User name for the original user transform with which this transform is
2580 # most closely associated.
2581 "name": "A String", # Dataflow service generated name for this source.
2582 },
2583 ],
2584 "id": "A String", # Dataflow service generated id for this stage.
2585 },
2586 ],
2587 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002588 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002589 { # Defines a particular step within a Cloud Dataflow job.
2590 #
2591 # A job consists of multiple steps, each of which performs some
2592 # specific operation as part of the overall job. Data is typically
2593 # passed from one step to another as part of the job.
2594 #
2595 # Here's an example of a sequence of steps which together implement a
2596 # Map-Reduce job:
2597 #
2598 # * Read a collection of data from some source, parsing the
2599 # collection's elements.
2600 #
2601 # * Validate the elements.
2602 #
2603 # * Apply a user-defined function to map each element to some value
2604 # and extract an element-specific key value.
2605 #
2606 # * Group elements with the same key into a single element with
2607 # that key, transforming a multiply-keyed collection into a
2608 # uniquely-keyed collection.
2609 #
2610 # * Write the elements out to some data sink.
2611 #
2612 # Note that the Cloud Dataflow service may be used to run many different
2613 # types of jobs, not just Map-Reduce.
2614 "kind": "A String", # The kind of step in the Cloud Dataflow job.
2615 "properties": { # Named properties associated with the step. Each kind of
2616 # predefined step has its own required set of properties.
2617 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002618 "a_key": "", # Properties of the object.
2619 },
Thomas Coffee2f245372017-03-27 10:39:26 -07002620 "name": "A String", # The name that identifies the step. This must be unique for each
2621 # step with respect to all other steps in the Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002622 },
2623 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07002624 "currentState": "A String", # The current state of the job.
2625 #
2626 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2627 # specified.
2628 #
2629 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2630 # terminal state. After a job has reached a terminal state, no
2631 # further state updates may be made.
2632 #
2633 # This field may be mutated by the Cloud Dataflow service;
2634 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002635 "tempFiles": [ # A set of files the system should be aware of that are used
2636 # for temporary storage. These temporary files will be
2637 # removed on job completion.
2638 # No duplicates are allowed.
2639 # No file patterns are supported.
2640 #
2641 # The supported files are:
2642 #
2643 # Google Cloud Storage:
2644 #
2645 # storage.googleapis.com/{bucket}/{object}
2646 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002647 "A String",
2648 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002649 "type": "A String", # The type of Cloud Dataflow job.
2650 "id": "A String", # The unique ID of this job.
2651 #
2652 # This field is set by the Cloud Dataflow service when the Job is
2653 # created, and is immutable for the life of the job.
Thomas Coffee2f245372017-03-27 10:39:26 -07002654 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2655 # of the job it replaced.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002656 #
Thomas Coffee2f245372017-03-27 10:39:26 -07002657 # When sending a `CreateJobRequest`, you can update a job by specifying it
2658 # here. The job named here is stopped, and its intermediate state is
2659 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002660 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2661 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002662 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002663 "a_key": { # Contains information about how a particular
2664 # google.dataflow.v1beta3.Step will be executed.
2665 "stepName": [ # The steps associated with the execution stage.
2666 # Note that stages may have several steps, and that a given step
2667 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002668 "A String",
2669 ],
2670 },
2671 },
2672 },
2673 }
2674
2675 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002676 Allowed values
2677 1 - v1 error format
2678 2 - v2 error format
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002679
2680Returns:
2681 An object of the form:
2682
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002683 { # Defines a job to be run by the Cloud Dataflow service.
2684 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2685 # If this field is set, the service will ensure its uniqueness.
2686 # The request to create a job will fail if the service has knowledge of a
2687 # previously submitted job with the same client's ID and job name.
2688 # The caller may use this field to ensure idempotence of job
2689 # creation across retried attempts to create a job.
2690 # By default, the field is empty and, in that case, the service ignores it.
2691 "requestedState": "A String", # The job's requested state.
2692 #
2693 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2694 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
2695 # also be used to directly set a job's requested state to
2696 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2697 # job if it has not already reached a terminal state.
2698 "name": "A String", # The user-specified Cloud Dataflow job name.
2699 #
2700 # Only one Job with a given name may exist in a project at any
2701 # given time. If a caller attempts to create a Job with the same
2702 # name as an already-existing Job, the attempt returns the
2703 # existing Job.
2704 #
2705 # The name must match the regular expression
2706 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2707 "currentStateTime": "A String", # The timestamp associated with the current state.
2708 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2709 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2710 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2711 "labels": { # User-defined labels for this job.
2712 #
2713 # The labels map can contain no more than 64 entries. Entries of the labels
2714 # map are UTF8 strings that comply with the following restrictions:
2715 #
2716 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
2717 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2718 # * Both keys and values are additionally constrained to be <= 128 bytes in
2719 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002720 "a_key": "A String",
2721 },
Thomas Coffee2f245372017-03-27 10:39:26 -07002722 "location": "A String", # The location that contains this job.
2723 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2724 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002725 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2726 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002727 "a_key": "A String",
2728 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002729 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2730 "version": { # A structure describing which components and their versions of the service
2731 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002732 "a_key": "", # Properties of the object.
2733 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002734 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2735 # storage. The system will append the suffix "/temp-{JOBNAME} to
2736 # this resource prefix, where {JOBNAME} is the value of the
2737 # job_name field. The resulting bucket and object prefix is used
2738 # as the prefix of the resources used to store temporary data
2739 # needed during the job execution. NOTE: This will override the
2740 # value in taskrunner_settings.
2741 # The supported resource type is:
2742 #
2743 # Google Cloud Storage:
2744 #
2745 # storage.googleapis.com/{bucket}/{object}
2746 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002747 "internalExperiments": { # Experimental settings.
2748 "a_key": "", # Properties of the object. Contains field @type with type URL.
2749 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002750 "dataset": "A String", # The dataset for the current project where various workflow
2751 # related tables are stored.
2752 #
2753 # The supported resource type is:
2754 #
2755 # Google BigQuery:
2756 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002757 "experiments": [ # The list of experiments to enable.
2758 "A String",
2759 ],
2760 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002761 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2762 # options are passed through the service and are used to recreate the
2763 # SDK pipeline options on the worker in a language agnostic and platform
2764 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002765 "a_key": "", # Properties of the object.
2766 },
2767 "userAgent": { # A description of the process that generated the request.
2768 "a_key": "", # Properties of the object.
2769 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002770 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
2771 # unspecified, the service will attempt to choose a reasonable
2772 # default. This should be in the form of the API service name,
2773 # e.g. "compute.googleapis.com".
2774 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2775 # specified in order for the job to have workers.
2776 { # Describes one particular pool of Cloud Dataflow workers to be
2777 # instantiated by the Cloud Dataflow service in order to perform the
2778 # computations required by a job. Note that a workflow job may use
2779 # multiple pools, in order to match the various computational
2780 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002781 "diskSourceImage": "A String", # Fully qualified source image for disks.
Thomas Coffee2f245372017-03-27 10:39:26 -07002782 "ipConfiguration": "A String", # Configuration for VM IPs.
2783 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2784 # are supported.
2785 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
2786 # service will attempt to choose a reasonable default.
2787 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
2788 # the service will use the network "default".
2789 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
2790 # will attempt to choose a reasonable default.
2791 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
2792 # attempt to choose a reasonable default.
2793 "metadata": { # Metadata to set on the Google Compute Engine VMs.
2794 "a_key": "A String",
2795 },
2796 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2797 # Compute Engine API.
2798 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2799 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2800 # `TEARDOWN_NEVER`.
2801 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2802 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2803 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2804 # down.
2805 #
2806 # If the workers are not torn down by the service, they will
2807 # continue to run and use Google Compute Engine VM resources in the
2808 # user's project until they are explicitly terminated by the user.
2809 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2810 # policy except for small, manually supervised test jobs.
2811 #
2812 # If unknown or unspecified, the service will attempt to choose a reasonable
2813 # default.
2814 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2815 # service will choose a number of threads (according to the number of cores
2816 # on the selected machine type for batch, or 1 by convention for streaming).
2817 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
2818 # the form "regions/REGION/subnetworks/SUBNETWORK".
2819 "poolArgs": { # Extra arguments for this worker pool.
2820 "a_key": "", # Properties of the object. Contains field @type with type URL.
2821 },
2822 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2823 # execute the job. If zero or unspecified, the service will
2824 # attempt to choose a reasonable default.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002825 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2826 # using the standard Dataflow task runner. Users should ignore
2827 # this field.
2828 "workflowFileName": "A String", # The file to store the workflow in.
2829 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
2830 # will not be uploaded.
2831 #
2832 # The supported resource type is:
2833 #
2834 # Google Cloud Storage:
2835 # storage.googleapis.com/{bucket}/{object}
2836 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002837 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2838 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002839 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2840 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2841 "vmId": "A String", # The ID string of the VM.
2842 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2843 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002844 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2845 # access the Cloud Dataflow API.
2846 "A String",
2847 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002848 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2849 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2850 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2851 # "shuffle/v1beta1".
2852 "workerId": "A String", # The ID of the worker running this pipeline.
2853 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2854 #
2855 # When workers access Google Cloud APIs, they logically do so via
2856 # relative URLs. If this field is specified, it supplies the base
2857 # URL to use for resolving these relative URLs. The normative
2858 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2859 # Locators".
2860 #
2861 # If not specified, the default value is "http://www.googleapis.com/"
2862 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2863 # "dataflow/v1b3/projects".
2864 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2865 # storage.
2866 #
2867 # The supported resource type is:
2868 #
2869 # Google Cloud Storage:
2870 #
2871 # storage.googleapis.com/{bucket}/{object}
2872 # bucket.storage.googleapis.com/{object}
2873 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002874 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2875 # taskrunner; e.g. "wheel".
2876 "languageHint": "A String", # The suggested backend language.
2877 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2878 # console.
2879 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2880 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002881 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002882 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2883 #
2884 # When workers access Google Cloud APIs, they logically do so via
2885 # relative URLs. If this field is specified, it supplies the base
2886 # URL to use for resolving these relative URLs. The normative
2887 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2888 # Locators".
2889 #
2890 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002891 "harnessCommand": "A String", # The command to launch the worker harness.
2892 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2893 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002894 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002895 # The supported resource type is:
2896 #
2897 # Google Cloud Storage:
2898 # storage.googleapis.com/{bucket}/{object}
2899 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002900 },
Thomas Coffee2f245372017-03-27 10:39:26 -07002901 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
2902 # select a default set of packages which are useful to worker
2903 # harnesses written in a particular language.
2904 "packages": [ # Packages to be installed on workers.
2905 { # The packages that must be installed in order for a worker to run the
2906 # steps of the Cloud Dataflow job that will be assigned to its worker
2907 # pool.
2908 #
2909 # This is the mechanism by which the Cloud Dataflow SDK causes code to
2910 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2911 # might use this to install jars containing the user's code and all of the
2912 # various dependencies (libraries, data files, etc.) required in order
2913 # for that code to run.
2914 "name": "A String", # The name of the package.
2915 "location": "A String", # The resource to read the package from. The supported resource type is:
2916 #
2917 # Google Cloud Storage:
2918 #
2919 # storage.googleapis.com/{bucket}
2920 # bucket.storage.googleapis.com/
2921 },
2922 ],
2923 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2924 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2925 "algorithm": "A String", # The algorithm to use for autoscaling.
2926 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002927 "dataDisks": [ # Data disks that are used by a VM in this workflow.
2928 { # Describes the data disk used by a workflow job.
2929 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002930 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
2931 # attempt to choose a reasonable default.
2932 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
2933 # must be a disk type appropriate to the project and zone in which
2934 # the workers will run. If unknown or unspecified, the service
2935 # will attempt to choose a reasonable default.
2936 #
2937 # For example, the standard persistent disk type is a resource name
2938 # typically ending in "pd-standard". If SSD persistent disks are
2939 # available, the resource name typically ends with "pd-ssd". The
2940 # actual valid values are defined the Google Compute Engine API,
2941 # not by the Cloud Dataflow API; consult the Google Compute Engine
2942 # documentation for more information about determining the set of
2943 # available disk types for a particular project and zone.
2944 #
2945 # Google Compute Engine Disk types are local to a particular
2946 # project in a particular zone, and so the resource name will
2947 # typically look something like this:
2948 #
2949 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002950 },
2951 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07002952 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002953 # attempt to choose a reasonable default.
2954 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2955 # harness, residing in Google Container Registry.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002956 },
2957 ],
2958 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002959 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2960 # A description of the user pipeline and stages through which it is executed.
2961 # Created by Cloud Dataflow service. Only retrieved with
2962 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2963 # form. This data is provided by the Dataflow service for ease of visualizing
2964 # the pipeline and interpretting Dataflow provided metrics.
2965 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2966 { # Description of the type, names/ids, and input/outputs for a transform.
2967 "kind": "A String", # Type of transform.
2968 "name": "A String", # User provided name for this transform instance.
2969 "inputCollectionName": [ # User names for all collection inputs to this transform.
2970 "A String",
2971 ],
2972 "displayData": [ # Transform-specific display data.
2973 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -07002974 "key": "A String", # The key identifying the display data.
2975 # This is intended to be used as a label for the display data
2976 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002977 "shortStrValue": "A String", # A possible additional shorter value to display.
2978 # For example a java_class_name_value of com.mypackage.MyDoFn
2979 # will be stored with MyDoFn as the short_str_value and
2980 # com.mypackage.MyDoFn as the java_class_name value.
2981 # short_str_value can be displayed and java_class_name_value
2982 # will be displayed as a tooltip.
2983 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2984 "url": "A String", # An optional full URL.
2985 "floatValue": 3.14, # Contains value if the data is of float type.
2986 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2987 # language namespace (i.e. python module) which defines the display data.
2988 # This allows a dax monitoring system to specially handle the data
2989 # and perform custom rendering.
2990 "javaClassValue": "A String", # Contains value if the data is of java class type.
2991 "label": "A String", # An optional label to display in a dax UI for the element.
2992 "boolValue": True or False, # Contains value if the data is of a boolean type.
2993 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -07002994 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002995 "int64Value": "A String", # Contains value if the data is of int64 type.
2996 },
2997 ],
2998 "outputCollectionName": [ # User names for all collection outputs to this transform.
2999 "A String",
3000 ],
3001 "id": "A String", # SDK generated id of this transform instance.
3002 },
3003 ],
3004 "displayData": [ # Pipeline level display data.
3005 { # Data provided with a pipeline or transform to provide descriptive info.
Thomas Coffee2f245372017-03-27 10:39:26 -07003006 "key": "A String", # The key identifying the display data.
3007 # This is intended to be used as a label for the display data
3008 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003009 "shortStrValue": "A String", # A possible additional shorter value to display.
3010 # For example a java_class_name_value of com.mypackage.MyDoFn
3011 # will be stored with MyDoFn as the short_str_value and
3012 # com.mypackage.MyDoFn as the java_class_name value.
3013 # short_str_value can be displayed and java_class_name_value
3014 # will be displayed as a tooltip.
3015 "timestampValue": "A String", # Contains value if the data is of timestamp type.
3016 "url": "A String", # An optional full URL.
3017 "floatValue": 3.14, # Contains value if the data is of float type.
3018 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3019 # language namespace (i.e. python module) which defines the display data.
3020 # This allows a dax monitoring system to specially handle the data
3021 # and perform custom rendering.
3022 "javaClassValue": "A String", # Contains value if the data is of java class type.
3023 "label": "A String", # An optional label to display in a dax UI for the element.
3024 "boolValue": True or False, # Contains value if the data is of a boolean type.
3025 "strValue": "A String", # Contains value if the data is of string type.
Thomas Coffee2f245372017-03-27 10:39:26 -07003026 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003027 "int64Value": "A String", # Contains value if the data is of int64 type.
3028 },
3029 ],
3030 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
3031 { # Description of the composing transforms, names/ids, and input/outputs of a
3032 # stage of execution. Some composing transforms and sources may have been
3033 # generated by the Dataflow service during execution planning.
3034 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
3035 { # Description of an interstitial value between transforms in an execution
3036 # stage.
3037 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3038 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3039 # source is most closely associated.
3040 "name": "A String", # Dataflow service generated name for this source.
3041 },
3042 ],
3043 "kind": "A String", # Type of tranform this stage is executing.
3044 "name": "A String", # Dataflow service generated name for this stage.
3045 "outputSource": [ # Output sources for this stage.
3046 { # Description of an input or output of an execution stage.
3047 "userName": "A String", # Human-readable name for this source; may be user or system generated.
3048 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3049 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -07003050 "name": "A String", # Dataflow service generated name for this source.
3051 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003052 },
3053 ],
3054 "inputSource": [ # Input sources for this stage.
3055 { # Description of an input or output of an execution stage.
3056 "userName": "A String", # Human-readable name for this source; may be user or system generated.
3057 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3058 # source is most closely associated.
Thomas Coffee2f245372017-03-27 10:39:26 -07003059 "name": "A String", # Dataflow service generated name for this source.
3060 "sizeBytes": "A String", # Size of the source, if measurable.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003061 },
3062 ],
3063 "componentTransform": [ # Transforms that comprise this execution stage.
3064 { # Description of a transform executed as part of an execution stage.
3065 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3066 "originalTransform": "A String", # User name for the original user transform with which this transform is
3067 # most closely associated.
3068 "name": "A String", # Dataflow service generated name for this source.
3069 },
3070 ],
3071 "id": "A String", # Dataflow service generated id for this stage.
3072 },
3073 ],
3074 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003075 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003076 { # Defines a particular step within a Cloud Dataflow job.
3077 #
3078 # A job consists of multiple steps, each of which performs some
3079 # specific operation as part of the overall job. Data is typically
3080 # passed from one step to another as part of the job.
3081 #
3082 # Here's an example of a sequence of steps which together implement a
3083 # Map-Reduce job:
3084 #
3085 # * Read a collection of data from some source, parsing the
3086 # collection's elements.
3087 #
3088 # * Validate the elements.
3089 #
3090 # * Apply a user-defined function to map each element to some value
3091 # and extract an element-specific key value.
3092 #
3093 # * Group elements with the same key into a single element with
3094 # that key, transforming a multiply-keyed collection into a
3095 # uniquely-keyed collection.
3096 #
3097 # * Write the elements out to some data sink.
3098 #
3099 # Note that the Cloud Dataflow service may be used to run many different
3100 # types of jobs, not just Map-Reduce.
3101 "kind": "A String", # The kind of step in the Cloud Dataflow job.
3102 "properties": { # Named properties associated with the step. Each kind of
3103 # predefined step has its own required set of properties.
3104 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003105 "a_key": "", # Properties of the object.
3106 },
Thomas Coffee2f245372017-03-27 10:39:26 -07003107 "name": "A String", # The name that identifies the step. This must be unique for each
3108 # step with respect to all other steps in the Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003109 },
3110 ],
Thomas Coffee2f245372017-03-27 10:39:26 -07003111 "currentState": "A String", # The current state of the job.
3112 #
3113 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
3114 # specified.
3115 #
3116 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
3117 # terminal state. After a job has reached a terminal state, no
3118 # further state updates may be made.
3119 #
3120 # This field may be mutated by the Cloud Dataflow service;
3121 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003122 "tempFiles": [ # A set of files the system should be aware of that are used
3123 # for temporary storage. These temporary files will be
3124 # removed on job completion.
3125 # No duplicates are allowed.
3126 # No file patterns are supported.
3127 #
3128 # The supported files are:
3129 #
3130 # Google Cloud Storage:
3131 #
3132 # storage.googleapis.com/{bucket}/{object}
3133 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003134 "A String",
3135 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003136 "type": "A String", # The type of Cloud Dataflow job.
3137 "id": "A String", # The unique ID of this job.
3138 #
3139 # This field is set by the Cloud Dataflow service when the Job is
3140 # created, and is immutable for the life of the job.
Thomas Coffee2f245372017-03-27 10:39:26 -07003141 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
3142 # of the job it replaced.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003143 #
Thomas Coffee2f245372017-03-27 10:39:26 -07003144 # When sending a `CreateJobRequest`, you can update a job by specifying it
3145 # here. The job named here is stopped, and its intermediate state is
3146 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003147 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3148 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003149 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003150 "a_key": { # Contains information about how a particular
3151 # google.dataflow.v1beta3.Step will be executed.
3152 "stepName": [ # The steps associated with the execution stage.
3153 # Note that stages may have several steps, and that a given step
3154 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003155 "A String",
3156 ],
3157 },
3158 },
3159 },
3160 }</pre>
3161</div>
3162
3163</body></html>