blob: 0c87e36a577d5e2e4b34857d0a5c73c60e806403 [file] [log] [blame]
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.jobs.html">jobs</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -070078 <code><a href="dataflow_v1b3.projects.jobs.debug.html">debug()</a></code>
79</p>
80<p class="firstline">Returns the debug Resource.</p>
81
82<p class="toc_element">
Nathaniel Manista4f877e52015-06-15 16:44:50 +000083 <code><a href="dataflow_v1b3.projects.jobs.messages.html">messages()</a></code>
84</p>
85<p class="firstline">Returns the messages Resource.</p>
86
87<p class="toc_element">
88 <code><a href="dataflow_v1b3.projects.jobs.workItems.html">workItems()</a></code>
89</p>
90<p class="firstline">Returns the workItems Resource.</p>
91
92<p class="toc_element">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -080093 <code><a href="#create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040094<p class="firstline">Creates a Cloud Dataflow job.</p>
Nathaniel Manista4f877e52015-06-15 16:44:50 +000095<p class="toc_element">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -080096 <code><a href="#get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040097<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
Nathaniel Manista4f877e52015-06-15 16:44:50 +000098<p class="toc_element">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -080099 <code><a href="#getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</a></code></p>
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000100<p class="firstline">Request the job status.</p>
101<p class="toc_element">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800102 <code><a href="#list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400103<p class="firstline">List the jobs of a project.</p>
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000104<p class="toc_element">
105 <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
106<p class="firstline">Retrieves the next page of results.</p>
107<p class="toc_element">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800108 <code><a href="#update">update(projectId, jobId, body, location=None, x__xgafv=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400109<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000110<h3>Method Details</h3>
111<div class="method">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800112 <code class="details" id="create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400113 <pre>Creates a Cloud Dataflow job.
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000114
115Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400116 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000117 body: object, The request body. (required)
118 The object takes the form of:
119
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400120{ # Defines a job to be run by the Cloud Dataflow service.
121 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
122 # If this field is set, the service will ensure its uniqueness.
123 # The request to create a job will fail if the service has knowledge of a
124 # previously submitted job with the same client's ID and job name.
125 # The caller may use this field to ensure idempotence of job
126 # creation across retried attempts to create a job.
127 # By default, the field is empty and, in that case, the service ignores it.
128 "requestedState": "A String", # The job's requested state.
129 #
130 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
131 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
132 # also be used to directly set a job's requested state to
133 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
134 # job if it has not already reached a terminal state.
135 "name": "A String", # The user-specified Cloud Dataflow job name.
136 #
137 # Only one Job with a given name may exist in a project at any
138 # given time. If a caller attempts to create a Job with the same
139 # name as an already-existing Job, the attempt returns the
140 # existing Job.
141 #
142 # The name must match the regular expression
143 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
144 "currentStateTime": "A String", # The timestamp associated with the current state.
145 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
146 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
147 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
148 "labels": { # User-defined labels for this job.
149 #
150 # The labels map can contain no more than 64 entries. Entries of the labels
151 # map are UTF8 strings that comply with the following restrictions:
152 #
153 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
154 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
155 # * Both keys and values are additionally constrained to be <= 128 bytes in
156 # size.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700157 "a_key": "A String",
158 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400159 "location": "A String", # The location that contains this job.
160 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
161 # Cloud Dataflow service.
162 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
163 # corresponding name prefixes of the new job.
Takashi Matsuo06694102015-09-11 13:55:40 -0700164 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000165 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400166 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
167 "version": { # A structure describing which components and their versions of the service
168 # are required in order to run the job.
Takashi Matsuo06694102015-09-11 13:55:40 -0700169 "a_key": "", # Properties of the object.
170 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400171 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
172 # storage. The system will append the suffix "/temp-{JOBNAME} to
173 # this resource prefix, where {JOBNAME} is the value of the
174 # job_name field. The resulting bucket and object prefix is used
175 # as the prefix of the resources used to store temporary data
176 # needed during the job execution. NOTE: This will override the
177 # value in taskrunner_settings.
178 # The supported resource type is:
179 #
180 # Google Cloud Storage:
181 #
182 # storage.googleapis.com/{bucket}/{object}
183 # bucket.storage.googleapis.com/{object}
Takashi Matsuo06694102015-09-11 13:55:40 -0700184 "internalExperiments": { # Experimental settings.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700185 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -0700186 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400187 "dataset": "A String", # The dataset for the current project where various workflow
188 # related tables are stored.
189 #
190 # The supported resource type is:
191 #
192 # Google BigQuery:
193 # bigquery.googleapis.com/{dataset}
Takashi Matsuo06694102015-09-11 13:55:40 -0700194 "experiments": [ # The list of experiments to enable.
195 "A String",
196 ],
Sai Cheemalapatiea3a5e12016-10-12 14:05:53 -0700197 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400198 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
199 # options are passed through the service and are used to recreate the
200 # SDK pipeline options on the worker in a language agnostic and platform
201 # independent way.
Takashi Matsuo06694102015-09-11 13:55:40 -0700202 "a_key": "", # Properties of the object.
203 },
204 "userAgent": { # A description of the process that generated the request.
205 "a_key": "", # Properties of the object.
206 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400207 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
208 # unspecified, the service will attempt to choose a reasonable
209 # default. This should be in the form of the API service name,
210 # e.g. "compute.googleapis.com".
211 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
212 # specified in order for the job to have workers.
213 { # Describes one particular pool of Cloud Dataflow workers to be
214 # instantiated by the Cloud Dataflow service in order to perform the
215 # computations required by a job. Note that a workflow job may use
216 # multiple pools, in order to match the various computational
217 # requirements of the various stages of the job.
Takashi Matsuo06694102015-09-11 13:55:40 -0700218 "diskSourceImage": "A String", # Fully qualified source image for disks.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700219 "ipConfiguration": "A String", # Configuration for VM IPs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400220 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
221 # are supported.
222 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
223 # service will attempt to choose a reasonable default.
224 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
225 # the service will use the network "default".
226 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
227 # will attempt to choose a reasonable default.
228 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
229 # attempt to choose a reasonable default.
Takashi Matsuo06694102015-09-11 13:55:40 -0700230 "metadata": { # Metadata to set on the Google Compute Engine VMs.
231 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000232 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400233 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
234 # Compute Engine API.
235 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
236 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
237 # `TEARDOWN_NEVER`.
238 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
239 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
240 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
241 # down.
242 #
243 # If the workers are not torn down by the service, they will
244 # continue to run and use Google Compute Engine VM resources in the
245 # user's project until they are explicitly terminated by the user.
246 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
247 # policy except for small, manually supervised test jobs.
248 #
249 # If unknown or unspecified, the service will attempt to choose a reasonable
250 # default.
251 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
252 # service will choose a number of threads (according to the number of cores
253 # on the selected machine type for batch, or 1 by convention for streaming).
254 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
255 # the form "regions/REGION/subnetworks/SUBNETWORK".
Takashi Matsuo06694102015-09-11 13:55:40 -0700256 "poolArgs": { # Extra arguments for this worker pool.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700257 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -0700258 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400259 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
260 # execute the job. If zero or unspecified, the service will
261 # attempt to choose a reasonable default.
262 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
263 # using the standard Dataflow task runner. Users should ignore
264 # this field.
265 "workflowFileName": "A String", # The file to store the workflow in.
266 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
267 # will not be uploaded.
268 #
269 # The supported resource type is:
270 #
271 # Google Cloud Storage:
272 # storage.googleapis.com/{bucket}/{object}
273 # bucket.storage.googleapis.com/{object}
274 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
275 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
276 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
277 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
278 "vmId": "A String", # The ID string of the VM.
279 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
280 # taskrunner; e.g. "wheel".
281 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
282 # taskrunner; e.g. "root".
283 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
284 # access the Cloud Dataflow API.
285 "A String",
286 ],
287 "languageHint": "A String", # The suggested backend language.
288 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
289 # console.
290 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
291 "logDir": "A String", # The directory on the VM to store logs.
292 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
293 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
294 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
295 # "shuffle/v1beta1".
296 "workerId": "A String", # The ID of the worker running this pipeline.
297 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
298 #
299 # When workers access Google Cloud APIs, they logically do so via
300 # relative URLs. If this field is specified, it supplies the base
301 # URL to use for resolving these relative URLs. The normative
302 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
303 # Locators".
304 #
305 # If not specified, the default value is "http://www.googleapis.com/"
306 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
307 # "dataflow/v1b3/projects".
308 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
309 # storage.
310 #
311 # The supported resource type is:
312 #
313 # Google Cloud Storage:
314 #
315 # storage.googleapis.com/{bucket}/{object}
316 # bucket.storage.googleapis.com/{object}
317 },
318 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
319 "harnessCommand": "A String", # The command to launch the worker harness.
320 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
321 # temporary storage.
322 #
323 # The supported resource type is:
324 #
325 # Google Cloud Storage:
326 # storage.googleapis.com/{bucket}/{object}
327 # bucket.storage.googleapis.com/{object}
328 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
329 #
330 # When workers access Google Cloud APIs, they logically do so via
331 # relative URLs. If this field is specified, it supplies the base
332 # URL to use for resolving these relative URLs. The normative
333 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
334 # Locators".
335 #
336 # If not specified, the default value is "http://www.googleapis.com/"
337 },
338 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
339 # select a default set of packages which are useful to worker
340 # harnesses written in a particular language.
Takashi Matsuo06694102015-09-11 13:55:40 -0700341 "packages": [ # Packages to be installed on workers.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400342 { # The packages that must be installed in order for a worker to run the
343 # steps of the Cloud Dataflow job that will be assigned to its worker
344 # pool.
345 #
346 # This is the mechanism by which the Cloud Dataflow SDK causes code to
347 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
348 # might use this to install jars containing the user's code and all of the
349 # various dependencies (libraries, data files, etc.) required in order
350 # for that code to run.
351 "location": "A String", # The resource to read the package from. The supported resource type is:
352 #
353 # Google Cloud Storage:
354 #
355 # storage.googleapis.com/{bucket}
356 # bucket.storage.googleapis.com/
Takashi Matsuo06694102015-09-11 13:55:40 -0700357 "name": "A String", # The name of the package.
Takashi Matsuo06694102015-09-11 13:55:40 -0700358 },
359 ],
360 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
Takashi Matsuo06694102015-09-11 13:55:40 -0700361 "algorithm": "A String", # The algorithm to use for autoscaling.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400362 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
Takashi Matsuo06694102015-09-11 13:55:40 -0700363 },
364 "dataDisks": [ # Data disks that are used by a VM in this workflow.
365 { # Describes the data disk used by a workflow job.
366 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400367 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
368 # attempt to choose a reasonable default.
369 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
370 # must be a disk type appropriate to the project and zone in which
371 # the workers will run. If unknown or unspecified, the service
372 # will attempt to choose a reasonable default.
373 #
374 # For example, the standard persistent disk type is a resource name
375 # typically ending in "pd-standard". If SSD persistent disks are
376 # available, the resource name typically ends with "pd-ssd". The
377 # actual valid values are defined the Google Compute Engine API,
378 # not by the Cloud Dataflow API; consult the Google Compute Engine
379 # documentation for more information about determining the set of
380 # available disk types for a particular project and zone.
381 #
382 # Google Compute Engine Disk types are local to a particular
383 # project in a particular zone, and so the resource name will
384 # typically look something like this:
385 #
386 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Takashi Matsuo06694102015-09-11 13:55:40 -0700387 },
388 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400389 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
390 # attempt to choose a reasonable default.
391 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
392 # harness, residing in Google Container Registry.
Takashi Matsuo06694102015-09-11 13:55:40 -0700393 },
394 ],
395 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400396 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
397 # A description of the user pipeline and stages through which it is executed.
398 # Created by Cloud Dataflow service. Only retrieved with
399 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
400 # form. This data is provided by the Dataflow service for ease of visualizing
401 # the pipeline and interpretting Dataflow provided metrics.
402 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
403 { # Description of the type, names/ids, and input/outputs for a transform.
404 "kind": "A String", # Type of transform.
405 "name": "A String", # User provided name for this transform instance.
406 "inputCollectionName": [ # User names for all collection inputs to this transform.
407 "A String",
408 ],
409 "displayData": [ # Transform-specific display data.
410 { # Data provided with a pipeline or transform to provide descriptive info.
411 "key": "A String", # The key identifying the display data.
412 # This is intended to be used as a label for the display data
413 # when viewed in a dax monitoring system.
414 "shortStrValue": "A String", # A possible additional shorter value to display.
415 # For example a java_class_name_value of com.mypackage.MyDoFn
416 # will be stored with MyDoFn as the short_str_value and
417 # com.mypackage.MyDoFn as the java_class_name value.
418 # short_str_value can be displayed and java_class_name_value
419 # will be displayed as a tooltip.
420 "timestampValue": "A String", # Contains value if the data is of timestamp type.
421 "url": "A String", # An optional full URL.
422 "floatValue": 3.14, # Contains value if the data is of float type.
423 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
424 # language namespace (i.e. python module) which defines the display data.
425 # This allows a dax monitoring system to specially handle the data
426 # and perform custom rendering.
427 "javaClassValue": "A String", # Contains value if the data is of java class type.
428 "label": "A String", # An optional label to display in a dax UI for the element.
429 "boolValue": True or False, # Contains value if the data is of a boolean type.
430 "strValue": "A String", # Contains value if the data is of string type.
431 "durationValue": "A String", # Contains value if the data is of duration type.
432 "int64Value": "A String", # Contains value if the data is of int64 type.
433 },
434 ],
435 "outputCollectionName": [ # User names for all collection outputs to this transform.
436 "A String",
437 ],
438 "id": "A String", # SDK generated id of this transform instance.
439 },
440 ],
441 "displayData": [ # Pipeline level display data.
442 { # Data provided with a pipeline or transform to provide descriptive info.
443 "key": "A String", # The key identifying the display data.
444 # This is intended to be used as a label for the display data
445 # when viewed in a dax monitoring system.
446 "shortStrValue": "A String", # A possible additional shorter value to display.
447 # For example a java_class_name_value of com.mypackage.MyDoFn
448 # will be stored with MyDoFn as the short_str_value and
449 # com.mypackage.MyDoFn as the java_class_name value.
450 # short_str_value can be displayed and java_class_name_value
451 # will be displayed as a tooltip.
452 "timestampValue": "A String", # Contains value if the data is of timestamp type.
453 "url": "A String", # An optional full URL.
454 "floatValue": 3.14, # Contains value if the data is of float type.
455 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
456 # language namespace (i.e. python module) which defines the display data.
457 # This allows a dax monitoring system to specially handle the data
458 # and perform custom rendering.
459 "javaClassValue": "A String", # Contains value if the data is of java class type.
460 "label": "A String", # An optional label to display in a dax UI for the element.
461 "boolValue": True or False, # Contains value if the data is of a boolean type.
462 "strValue": "A String", # Contains value if the data is of string type.
463 "durationValue": "A String", # Contains value if the data is of duration type.
464 "int64Value": "A String", # Contains value if the data is of int64 type.
465 },
466 ],
467 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
468 { # Description of the composing transforms, names/ids, and input/outputs of a
469 # stage of execution. Some composing transforms and sources may have been
470 # generated by the Dataflow service during execution planning.
471 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
472 { # Description of an interstitial value between transforms in an execution
473 # stage.
474 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
475 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
476 # source is most closely associated.
477 "name": "A String", # Dataflow service generated name for this source.
478 },
479 ],
480 "kind": "A String", # Type of tranform this stage is executing.
481 "name": "A String", # Dataflow service generated name for this stage.
482 "outputSource": [ # Output sources for this stage.
483 { # Description of an input or output of an execution stage.
484 "userName": "A String", # Human-readable name for this source; may be user or system generated.
485 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
486 # source is most closely associated.
487 "name": "A String", # Dataflow service generated name for this source.
488 "sizeBytes": "A String", # Size of the source, if measurable.
489 },
490 ],
491 "inputSource": [ # Input sources for this stage.
492 { # Description of an input or output of an execution stage.
493 "userName": "A String", # Human-readable name for this source; may be user or system generated.
494 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
495 # source is most closely associated.
496 "name": "A String", # Dataflow service generated name for this source.
497 "sizeBytes": "A String", # Size of the source, if measurable.
498 },
499 ],
500 "componentTransform": [ # Transforms that comprise this execution stage.
501 { # Description of a transform executed as part of an execution stage.
502 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
503 "originalTransform": "A String", # User name for the original user transform with which this transform is
504 # most closely associated.
505 "name": "A String", # Dataflow service generated name for this source.
506 },
507 ],
508 "id": "A String", # Dataflow service generated id for this stage.
509 },
510 ],
511 },
Takashi Matsuo06694102015-09-11 13:55:40 -0700512 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400513 { # Defines a particular step within a Cloud Dataflow job.
514 #
515 # A job consists of multiple steps, each of which performs some
516 # specific operation as part of the overall job. Data is typically
517 # passed from one step to another as part of the job.
518 #
519 # Here's an example of a sequence of steps which together implement a
520 # Map-Reduce job:
521 #
522 # * Read a collection of data from some source, parsing the
523 # collection's elements.
524 #
525 # * Validate the elements.
526 #
527 # * Apply a user-defined function to map each element to some value
528 # and extract an element-specific key value.
529 #
530 # * Group elements with the same key into a single element with
531 # that key, transforming a multiply-keyed collection into a
532 # uniquely-keyed collection.
533 #
534 # * Write the elements out to some data sink.
535 #
536 # Note that the Cloud Dataflow service may be used to run many different
537 # types of jobs, not just Map-Reduce.
538 "kind": "A String", # The kind of step in the Cloud Dataflow job.
539 "properties": { # Named properties associated with the step. Each kind of
540 # predefined step has its own required set of properties.
541 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Takashi Matsuo06694102015-09-11 13:55:40 -0700542 "a_key": "", # Properties of the object.
543 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400544 "name": "A String", # The name that identifies the step. This must be unique for each
545 # step with respect to all other steps in the Cloud Dataflow job.
Takashi Matsuo06694102015-09-11 13:55:40 -0700546 },
547 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400548 "currentState": "A String", # The current state of the job.
549 #
550 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
551 # specified.
552 #
553 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
554 # terminal state. After a job has reached a terminal state, no
555 # further state updates may be made.
556 #
557 # This field may be mutated by the Cloud Dataflow service;
558 # callers cannot mutate it.
559 "tempFiles": [ # A set of files the system should be aware of that are used
560 # for temporary storage. These temporary files will be
561 # removed on job completion.
562 # No duplicates are allowed.
563 # No file patterns are supported.
564 #
565 # The supported files are:
566 #
567 # Google Cloud Storage:
568 #
569 # storage.googleapis.com/{bucket}/{object}
570 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott36e41bc2016-02-19 16:02:29 -0800571 "A String",
572 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400573 "type": "A String", # The type of Cloud Dataflow job.
574 "id": "A String", # The unique ID of this job.
575 #
576 # This field is set by the Cloud Dataflow service when the Job is
577 # created, and is immutable for the life of the job.
578 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
579 # of the job it replaced.
580 #
581 # When sending a `CreateJobRequest`, you can update a job by specifying it
582 # here. The job named here is stopped, and its intermediate state is
583 # transferred to this job.
584 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
585 # isn't contained in the submitted job.
Takashi Matsuo06694102015-09-11 13:55:40 -0700586 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400587 "a_key": { # Contains information about how a particular
588 # google.dataflow.v1beta3.Step will be executed.
589 "stepName": [ # The steps associated with the execution stage.
590 # Note that stages may have several steps, and that a given step
591 # might be run by more than one stage.
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000592 "A String",
593 ],
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000594 },
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000595 },
596 },
Takashi Matsuo06694102015-09-11 13:55:40 -0700597 }
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000598
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400599 location: string, The location that contains this job.
Takashi Matsuo06694102015-09-11 13:55:40 -0700600 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400601 Allowed values
602 1 - v1 error format
603 2 - v2 error format
604 replaceJobId: string, Deprecated. This field is now in the Job message.
605 view: string, The level of information requested in response.
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000606
607Returns:
608 An object of the form:
609
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400610 { # Defines a job to be run by the Cloud Dataflow service.
611 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
612 # If this field is set, the service will ensure its uniqueness.
613 # The request to create a job will fail if the service has knowledge of a
614 # previously submitted job with the same client's ID and job name.
615 # The caller may use this field to ensure idempotence of job
616 # creation across retried attempts to create a job.
617 # By default, the field is empty and, in that case, the service ignores it.
618 "requestedState": "A String", # The job's requested state.
619 #
620 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
621 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
622 # also be used to directly set a job's requested state to
623 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
624 # job if it has not already reached a terminal state.
625 "name": "A String", # The user-specified Cloud Dataflow job name.
626 #
627 # Only one Job with a given name may exist in a project at any
628 # given time. If a caller attempts to create a Job with the same
629 # name as an already-existing Job, the attempt returns the
630 # existing Job.
631 #
632 # The name must match the regular expression
633 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
634 "currentStateTime": "A String", # The timestamp associated with the current state.
635 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
636 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
637 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
638 "labels": { # User-defined labels for this job.
639 #
640 # The labels map can contain no more than 64 entries. Entries of the labels
641 # map are UTF8 strings that comply with the following restrictions:
642 #
643 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
644 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
645 # * Both keys and values are additionally constrained to be <= 128 bytes in
646 # size.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700647 "a_key": "A String",
648 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400649 "location": "A String", # The location that contains this job.
650 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
651 # Cloud Dataflow service.
652 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
653 # corresponding name prefixes of the new job.
Takashi Matsuo06694102015-09-11 13:55:40 -0700654 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000655 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400656 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
657 "version": { # A structure describing which components and their versions of the service
658 # are required in order to run the job.
Takashi Matsuo06694102015-09-11 13:55:40 -0700659 "a_key": "", # Properties of the object.
660 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400661 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
662 # storage. The system will append the suffix "/temp-{JOBNAME} to
663 # this resource prefix, where {JOBNAME} is the value of the
664 # job_name field. The resulting bucket and object prefix is used
665 # as the prefix of the resources used to store temporary data
666 # needed during the job execution. NOTE: This will override the
667 # value in taskrunner_settings.
668 # The supported resource type is:
669 #
670 # Google Cloud Storage:
671 #
672 # storage.googleapis.com/{bucket}/{object}
673 # bucket.storage.googleapis.com/{object}
Takashi Matsuo06694102015-09-11 13:55:40 -0700674 "internalExperiments": { # Experimental settings.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700675 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -0700676 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400677 "dataset": "A String", # The dataset for the current project where various workflow
678 # related tables are stored.
679 #
680 # The supported resource type is:
681 #
682 # Google BigQuery:
683 # bigquery.googleapis.com/{dataset}
Takashi Matsuo06694102015-09-11 13:55:40 -0700684 "experiments": [ # The list of experiments to enable.
685 "A String",
686 ],
Sai Cheemalapatiea3a5e12016-10-12 14:05:53 -0700687 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400688 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
689 # options are passed through the service and are used to recreate the
690 # SDK pipeline options on the worker in a language agnostic and platform
691 # independent way.
Takashi Matsuo06694102015-09-11 13:55:40 -0700692 "a_key": "", # Properties of the object.
693 },
694 "userAgent": { # A description of the process that generated the request.
695 "a_key": "", # Properties of the object.
696 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400697 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
698 # unspecified, the service will attempt to choose a reasonable
699 # default. This should be in the form of the API service name,
700 # e.g. "compute.googleapis.com".
701 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
702 # specified in order for the job to have workers.
703 { # Describes one particular pool of Cloud Dataflow workers to be
704 # instantiated by the Cloud Dataflow service in order to perform the
705 # computations required by a job. Note that a workflow job may use
706 # multiple pools, in order to match the various computational
707 # requirements of the various stages of the job.
Takashi Matsuo06694102015-09-11 13:55:40 -0700708 "diskSourceImage": "A String", # Fully qualified source image for disks.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700709 "ipConfiguration": "A String", # Configuration for VM IPs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400710 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
711 # are supported.
712 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
713 # service will attempt to choose a reasonable default.
714 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
715 # the service will use the network "default".
716 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
717 # will attempt to choose a reasonable default.
718 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
719 # attempt to choose a reasonable default.
Takashi Matsuo06694102015-09-11 13:55:40 -0700720 "metadata": { # Metadata to set on the Google Compute Engine VMs.
721 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +0000722 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400723 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
724 # Compute Engine API.
725 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
726 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
727 # `TEARDOWN_NEVER`.
728 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
729 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
730 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
731 # down.
732 #
733 # If the workers are not torn down by the service, they will
734 # continue to run and use Google Compute Engine VM resources in the
735 # user's project until they are explicitly terminated by the user.
736 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
737 # policy except for small, manually supervised test jobs.
738 #
739 # If unknown or unspecified, the service will attempt to choose a reasonable
740 # default.
741 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
742 # service will choose a number of threads (according to the number of cores
743 # on the selected machine type for batch, or 1 by convention for streaming).
744 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
745 # the form "regions/REGION/subnetworks/SUBNETWORK".
Takashi Matsuo06694102015-09-11 13:55:40 -0700746 "poolArgs": { # Extra arguments for this worker pool.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -0700747 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -0700748 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400749 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
750 # execute the job. If zero or unspecified, the service will
751 # attempt to choose a reasonable default.
752 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
753 # using the standard Dataflow task runner. Users should ignore
754 # this field.
755 "workflowFileName": "A String", # The file to store the workflow in.
756 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
757 # will not be uploaded.
758 #
759 # The supported resource type is:
760 #
761 # Google Cloud Storage:
762 # storage.googleapis.com/{bucket}/{object}
763 # bucket.storage.googleapis.com/{object}
764 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
765 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
766 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
767 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
768 "vmId": "A String", # The ID string of the VM.
769 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
770 # taskrunner; e.g. "wheel".
771 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
772 # taskrunner; e.g. "root".
773 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
774 # access the Cloud Dataflow API.
775 "A String",
776 ],
777 "languageHint": "A String", # The suggested backend language.
778 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
779 # console.
780 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
781 "logDir": "A String", # The directory on the VM to store logs.
782 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
783 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
784 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
785 # "shuffle/v1beta1".
786 "workerId": "A String", # The ID of the worker running this pipeline.
787 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
788 #
789 # When workers access Google Cloud APIs, they logically do so via
790 # relative URLs. If this field is specified, it supplies the base
791 # URL to use for resolving these relative URLs. The normative
792 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
793 # Locators".
794 #
795 # If not specified, the default value is "http://www.googleapis.com/"
796 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
797 # "dataflow/v1b3/projects".
798 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
799 # storage.
800 #
801 # The supported resource type is:
802 #
803 # Google Cloud Storage:
804 #
805 # storage.googleapis.com/{bucket}/{object}
806 # bucket.storage.googleapis.com/{object}
807 },
808 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
809 "harnessCommand": "A String", # The command to launch the worker harness.
810 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
811 # temporary storage.
812 #
813 # The supported resource type is:
814 #
815 # Google Cloud Storage:
816 # storage.googleapis.com/{bucket}/{object}
817 # bucket.storage.googleapis.com/{object}
818 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
819 #
820 # When workers access Google Cloud APIs, they logically do so via
821 # relative URLs. If this field is specified, it supplies the base
822 # URL to use for resolving these relative URLs. The normative
823 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
824 # Locators".
825 #
826 # If not specified, the default value is "http://www.googleapis.com/"
827 },
828 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
829 # select a default set of packages which are useful to worker
830 # harnesses written in a particular language.
Takashi Matsuo06694102015-09-11 13:55:40 -0700831 "packages": [ # Packages to be installed on workers.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400832 { # The packages that must be installed in order for a worker to run the
833 # steps of the Cloud Dataflow job that will be assigned to its worker
834 # pool.
835 #
836 # This is the mechanism by which the Cloud Dataflow SDK causes code to
837 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
838 # might use this to install jars containing the user's code and all of the
839 # various dependencies (libraries, data files, etc.) required in order
840 # for that code to run.
841 "location": "A String", # The resource to read the package from. The supported resource type is:
842 #
843 # Google Cloud Storage:
844 #
845 # storage.googleapis.com/{bucket}
846 # bucket.storage.googleapis.com/
Takashi Matsuo06694102015-09-11 13:55:40 -0700847 "name": "A String", # The name of the package.
Takashi Matsuo06694102015-09-11 13:55:40 -0700848 },
849 ],
850 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
Takashi Matsuo06694102015-09-11 13:55:40 -0700851 "algorithm": "A String", # The algorithm to use for autoscaling.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400852 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
Takashi Matsuo06694102015-09-11 13:55:40 -0700853 },
854 "dataDisks": [ # Data disks that are used by a VM in this workflow.
855 { # Describes the data disk used by a workflow job.
856 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400857 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
858 # attempt to choose a reasonable default.
859 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
860 # must be a disk type appropriate to the project and zone in which
861 # the workers will run. If unknown or unspecified, the service
862 # will attempt to choose a reasonable default.
863 #
864 # For example, the standard persistent disk type is a resource name
865 # typically ending in "pd-standard". If SSD persistent disks are
866 # available, the resource name typically ends with "pd-ssd". The
867 # actual valid values are defined the Google Compute Engine API,
868 # not by the Cloud Dataflow API; consult the Google Compute Engine
869 # documentation for more information about determining the set of
870 # available disk types for a particular project and zone.
871 #
872 # Google Compute Engine Disk types are local to a particular
873 # project in a particular zone, and so the resource name will
874 # typically look something like this:
875 #
876 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Takashi Matsuo06694102015-09-11 13:55:40 -0700877 },
878 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400879 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
880 # attempt to choose a reasonable default.
881 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
882 # harness, residing in Google Container Registry.
Takashi Matsuo06694102015-09-11 13:55:40 -0700883 },
884 ],
885 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400886 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
887 # A description of the user pipeline and stages through which it is executed.
888 # Created by Cloud Dataflow service. Only retrieved with
889 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
890 # form. This data is provided by the Dataflow service for ease of visualizing
891 # the pipeline and interpretting Dataflow provided metrics.
892 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
893 { # Description of the type, names/ids, and input/outputs for a transform.
894 "kind": "A String", # Type of transform.
895 "name": "A String", # User provided name for this transform instance.
896 "inputCollectionName": [ # User names for all collection inputs to this transform.
897 "A String",
898 ],
899 "displayData": [ # Transform-specific display data.
900 { # Data provided with a pipeline or transform to provide descriptive info.
901 "key": "A String", # The key identifying the display data.
902 # This is intended to be used as a label for the display data
903 # when viewed in a dax monitoring system.
904 "shortStrValue": "A String", # A possible additional shorter value to display.
905 # For example a java_class_name_value of com.mypackage.MyDoFn
906 # will be stored with MyDoFn as the short_str_value and
907 # com.mypackage.MyDoFn as the java_class_name value.
908 # short_str_value can be displayed and java_class_name_value
909 # will be displayed as a tooltip.
910 "timestampValue": "A String", # Contains value if the data is of timestamp type.
911 "url": "A String", # An optional full URL.
912 "floatValue": 3.14, # Contains value if the data is of float type.
913 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
914 # language namespace (i.e. python module) which defines the display data.
915 # This allows a dax monitoring system to specially handle the data
916 # and perform custom rendering.
917 "javaClassValue": "A String", # Contains value if the data is of java class type.
918 "label": "A String", # An optional label to display in a dax UI for the element.
919 "boolValue": True or False, # Contains value if the data is of a boolean type.
920 "strValue": "A String", # Contains value if the data is of string type.
921 "durationValue": "A String", # Contains value if the data is of duration type.
922 "int64Value": "A String", # Contains value if the data is of int64 type.
923 },
924 ],
925 "outputCollectionName": [ # User names for all collection outputs to this transform.
926 "A String",
927 ],
928 "id": "A String", # SDK generated id of this transform instance.
929 },
930 ],
931 "displayData": [ # Pipeline level display data.
932 { # Data provided with a pipeline or transform to provide descriptive info.
933 "key": "A String", # The key identifying the display data.
934 # This is intended to be used as a label for the display data
935 # when viewed in a dax monitoring system.
936 "shortStrValue": "A String", # A possible additional shorter value to display.
937 # For example a java_class_name_value of com.mypackage.MyDoFn
938 # will be stored with MyDoFn as the short_str_value and
939 # com.mypackage.MyDoFn as the java_class_name value.
940 # short_str_value can be displayed and java_class_name_value
941 # will be displayed as a tooltip.
942 "timestampValue": "A String", # Contains value if the data is of timestamp type.
943 "url": "A String", # An optional full URL.
944 "floatValue": 3.14, # Contains value if the data is of float type.
945 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
946 # language namespace (i.e. python module) which defines the display data.
947 # This allows a dax monitoring system to specially handle the data
948 # and perform custom rendering.
949 "javaClassValue": "A String", # Contains value if the data is of java class type.
950 "label": "A String", # An optional label to display in a dax UI for the element.
951 "boolValue": True or False, # Contains value if the data is of a boolean type.
952 "strValue": "A String", # Contains value if the data is of string type.
953 "durationValue": "A String", # Contains value if the data is of duration type.
954 "int64Value": "A String", # Contains value if the data is of int64 type.
955 },
956 ],
957 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
958 { # Description of the composing transforms, names/ids, and input/outputs of a
959 # stage of execution. Some composing transforms and sources may have been
960 # generated by the Dataflow service during execution planning.
961 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
962 { # Description of an interstitial value between transforms in an execution
963 # stage.
964 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
965 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
966 # source is most closely associated.
967 "name": "A String", # Dataflow service generated name for this source.
968 },
969 ],
970 "kind": "A String", # Type of tranform this stage is executing.
971 "name": "A String", # Dataflow service generated name for this stage.
972 "outputSource": [ # Output sources for this stage.
973 { # Description of an input or output of an execution stage.
974 "userName": "A String", # Human-readable name for this source; may be user or system generated.
975 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
976 # source is most closely associated.
977 "name": "A String", # Dataflow service generated name for this source.
978 "sizeBytes": "A String", # Size of the source, if measurable.
979 },
980 ],
981 "inputSource": [ # Input sources for this stage.
982 { # Description of an input or output of an execution stage.
983 "userName": "A String", # Human-readable name for this source; may be user or system generated.
984 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
985 # source is most closely associated.
986 "name": "A String", # Dataflow service generated name for this source.
987 "sizeBytes": "A String", # Size of the source, if measurable.
988 },
989 ],
990 "componentTransform": [ # Transforms that comprise this execution stage.
991 { # Description of a transform executed as part of an execution stage.
992 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
993 "originalTransform": "A String", # User name for the original user transform with which this transform is
994 # most closely associated.
995 "name": "A String", # Dataflow service generated name for this source.
996 },
997 ],
998 "id": "A String", # Dataflow service generated id for this stage.
999 },
1000 ],
1001 },
Takashi Matsuo06694102015-09-11 13:55:40 -07001002 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001003 { # Defines a particular step within a Cloud Dataflow job.
1004 #
1005 # A job consists of multiple steps, each of which performs some
1006 # specific operation as part of the overall job. Data is typically
1007 # passed from one step to another as part of the job.
1008 #
1009 # Here's an example of a sequence of steps which together implement a
1010 # Map-Reduce job:
1011 #
1012 # * Read a collection of data from some source, parsing the
1013 # collection's elements.
1014 #
1015 # * Validate the elements.
1016 #
1017 # * Apply a user-defined function to map each element to some value
1018 # and extract an element-specific key value.
1019 #
1020 # * Group elements with the same key into a single element with
1021 # that key, transforming a multiply-keyed collection into a
1022 # uniquely-keyed collection.
1023 #
1024 # * Write the elements out to some data sink.
1025 #
1026 # Note that the Cloud Dataflow service may be used to run many different
1027 # types of jobs, not just Map-Reduce.
1028 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1029 "properties": { # Named properties associated with the step. Each kind of
1030 # predefined step has its own required set of properties.
1031 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Takashi Matsuo06694102015-09-11 13:55:40 -07001032 "a_key": "", # Properties of the object.
1033 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001034 "name": "A String", # The name that identifies the step. This must be unique for each
1035 # step with respect to all other steps in the Cloud Dataflow job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001036 },
1037 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001038 "currentState": "A String", # The current state of the job.
1039 #
1040 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1041 # specified.
1042 #
1043 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1044 # terminal state. After a job has reached a terminal state, no
1045 # further state updates may be made.
1046 #
1047 # This field may be mutated by the Cloud Dataflow service;
1048 # callers cannot mutate it.
1049 "tempFiles": [ # A set of files the system should be aware of that are used
1050 # for temporary storage. These temporary files will be
1051 # removed on job completion.
1052 # No duplicates are allowed.
1053 # No file patterns are supported.
1054 #
1055 # The supported files are:
1056 #
1057 # Google Cloud Storage:
1058 #
1059 # storage.googleapis.com/{bucket}/{object}
1060 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott36e41bc2016-02-19 16:02:29 -08001061 "A String",
1062 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001063 "type": "A String", # The type of Cloud Dataflow job.
1064 "id": "A String", # The unique ID of this job.
1065 #
1066 # This field is set by the Cloud Dataflow service when the Job is
1067 # created, and is immutable for the life of the job.
1068 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1069 # of the job it replaced.
1070 #
1071 # When sending a `CreateJobRequest`, you can update a job by specifying it
1072 # here. The job named here is stopped, and its intermediate state is
1073 # transferred to this job.
1074 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1075 # isn't contained in the submitted job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001076 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001077 "a_key": { # Contains information about how a particular
1078 # google.dataflow.v1beta3.Step will be executed.
1079 "stepName": [ # The steps associated with the execution stage.
1080 # Note that stages may have several steps, and that a given step
1081 # might be run by more than one stage.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001082 "A String",
1083 ],
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001084 },
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001085 },
1086 },
Takashi Matsuo06694102015-09-11 13:55:40 -07001087 }</pre>
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001088</div>
1089
1090<div class="method">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001091 <code class="details" id="get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001092 <pre>Gets the state of the specified Cloud Dataflow job.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001093
1094Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001095 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
1096 jobId: string, The job ID. (required)
1097 location: string, The location that contains this job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001098 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001099 Allowed values
1100 1 - v1 error format
1101 2 - v2 error format
1102 view: string, The level of information requested in response.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001103
1104Returns:
1105 An object of the form:
1106
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001107 { # Defines a job to be run by the Cloud Dataflow service.
1108 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1109 # If this field is set, the service will ensure its uniqueness.
1110 # The request to create a job will fail if the service has knowledge of a
1111 # previously submitted job with the same client's ID and job name.
1112 # The caller may use this field to ensure idempotence of job
1113 # creation across retried attempts to create a job.
1114 # By default, the field is empty and, in that case, the service ignores it.
1115 "requestedState": "A String", # The job's requested state.
1116 #
1117 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1118 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1119 # also be used to directly set a job's requested state to
1120 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1121 # job if it has not already reached a terminal state.
1122 "name": "A String", # The user-specified Cloud Dataflow job name.
1123 #
1124 # Only one Job with a given name may exist in a project at any
1125 # given time. If a caller attempts to create a Job with the same
1126 # name as an already-existing Job, the attempt returns the
1127 # existing Job.
1128 #
1129 # The name must match the regular expression
1130 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1131 "currentStateTime": "A String", # The timestamp associated with the current state.
1132 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1133 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1134 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1135 "labels": { # User-defined labels for this job.
1136 #
1137 # The labels map can contain no more than 64 entries. Entries of the labels
1138 # map are UTF8 strings that comply with the following restrictions:
1139 #
1140 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
1141 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1142 # * Both keys and values are additionally constrained to be <= 128 bytes in
1143 # size.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001144 "a_key": "A String",
1145 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001146 "location": "A String", # The location that contains this job.
1147 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1148 # Cloud Dataflow service.
1149 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1150 # corresponding name prefixes of the new job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001151 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001152 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001153 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1154 "version": { # A structure describing which components and their versions of the service
1155 # are required in order to run the job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001156 "a_key": "", # Properties of the object.
1157 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001158 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1159 # storage. The system will append the suffix "/temp-{JOBNAME} to
1160 # this resource prefix, where {JOBNAME} is the value of the
1161 # job_name field. The resulting bucket and object prefix is used
1162 # as the prefix of the resources used to store temporary data
1163 # needed during the job execution. NOTE: This will override the
1164 # value in taskrunner_settings.
1165 # The supported resource type is:
1166 #
1167 # Google Cloud Storage:
1168 #
1169 # storage.googleapis.com/{bucket}/{object}
1170 # bucket.storage.googleapis.com/{object}
Takashi Matsuo06694102015-09-11 13:55:40 -07001171 "internalExperiments": { # Experimental settings.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001172 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -07001173 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001174 "dataset": "A String", # The dataset for the current project where various workflow
1175 # related tables are stored.
1176 #
1177 # The supported resource type is:
1178 #
1179 # Google BigQuery:
1180 # bigquery.googleapis.com/{dataset}
Takashi Matsuo06694102015-09-11 13:55:40 -07001181 "experiments": [ # The list of experiments to enable.
1182 "A String",
1183 ],
Sai Cheemalapatiea3a5e12016-10-12 14:05:53 -07001184 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001185 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1186 # options are passed through the service and are used to recreate the
1187 # SDK pipeline options on the worker in a language agnostic and platform
1188 # independent way.
Takashi Matsuo06694102015-09-11 13:55:40 -07001189 "a_key": "", # Properties of the object.
1190 },
1191 "userAgent": { # A description of the process that generated the request.
1192 "a_key": "", # Properties of the object.
1193 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001194 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1195 # unspecified, the service will attempt to choose a reasonable
1196 # default. This should be in the form of the API service name,
1197 # e.g. "compute.googleapis.com".
1198 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1199 # specified in order for the job to have workers.
1200 { # Describes one particular pool of Cloud Dataflow workers to be
1201 # instantiated by the Cloud Dataflow service in order to perform the
1202 # computations required by a job. Note that a workflow job may use
1203 # multiple pools, in order to match the various computational
1204 # requirements of the various stages of the job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001205 "diskSourceImage": "A String", # Fully qualified source image for disks.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001206 "ipConfiguration": "A String", # Configuration for VM IPs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001207 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1208 # are supported.
1209 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1210 # service will attempt to choose a reasonable default.
1211 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1212 # the service will use the network "default".
1213 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1214 # will attempt to choose a reasonable default.
1215 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1216 # attempt to choose a reasonable default.
Takashi Matsuo06694102015-09-11 13:55:40 -07001217 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1218 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001219 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001220 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1221 # Compute Engine API.
1222 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1223 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1224 # `TEARDOWN_NEVER`.
1225 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1226 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1227 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1228 # down.
1229 #
1230 # If the workers are not torn down by the service, they will
1231 # continue to run and use Google Compute Engine VM resources in the
1232 # user's project until they are explicitly terminated by the user.
1233 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1234 # policy except for small, manually supervised test jobs.
1235 #
1236 # If unknown or unspecified, the service will attempt to choose a reasonable
1237 # default.
1238 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1239 # service will choose a number of threads (according to the number of cores
1240 # on the selected machine type for batch, or 1 by convention for streaming).
1241 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1242 # the form "regions/REGION/subnetworks/SUBNETWORK".
Takashi Matsuo06694102015-09-11 13:55:40 -07001243 "poolArgs": { # Extra arguments for this worker pool.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001244 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -07001245 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001246 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1247 # execute the job. If zero or unspecified, the service will
1248 # attempt to choose a reasonable default.
1249 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1250 # using the standard Dataflow task runner. Users should ignore
1251 # this field.
1252 "workflowFileName": "A String", # The file to store the workflow in.
1253 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1254 # will not be uploaded.
1255 #
1256 # The supported resource type is:
1257 #
1258 # Google Cloud Storage:
1259 # storage.googleapis.com/{bucket}/{object}
1260 # bucket.storage.googleapis.com/{object}
1261 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1262 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1263 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1264 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1265 "vmId": "A String", # The ID string of the VM.
1266 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1267 # taskrunner; e.g. "wheel".
1268 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1269 # taskrunner; e.g. "root".
1270 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1271 # access the Cloud Dataflow API.
1272 "A String",
1273 ],
1274 "languageHint": "A String", # The suggested backend language.
1275 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1276 # console.
1277 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1278 "logDir": "A String", # The directory on the VM to store logs.
1279 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1280 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1281 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1282 # "shuffle/v1beta1".
1283 "workerId": "A String", # The ID of the worker running this pipeline.
1284 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1285 #
1286 # When workers access Google Cloud APIs, they logically do so via
1287 # relative URLs. If this field is specified, it supplies the base
1288 # URL to use for resolving these relative URLs. The normative
1289 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1290 # Locators".
1291 #
1292 # If not specified, the default value is "http://www.googleapis.com/"
1293 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1294 # "dataflow/v1b3/projects".
1295 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1296 # storage.
1297 #
1298 # The supported resource type is:
1299 #
1300 # Google Cloud Storage:
1301 #
1302 # storage.googleapis.com/{bucket}/{object}
1303 # bucket.storage.googleapis.com/{object}
1304 },
1305 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1306 "harnessCommand": "A String", # The command to launch the worker harness.
1307 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1308 # temporary storage.
1309 #
1310 # The supported resource type is:
1311 #
1312 # Google Cloud Storage:
1313 # storage.googleapis.com/{bucket}/{object}
1314 # bucket.storage.googleapis.com/{object}
1315 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1316 #
1317 # When workers access Google Cloud APIs, they logically do so via
1318 # relative URLs. If this field is specified, it supplies the base
1319 # URL to use for resolving these relative URLs. The normative
1320 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1321 # Locators".
1322 #
1323 # If not specified, the default value is "http://www.googleapis.com/"
1324 },
1325 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1326 # select a default set of packages which are useful to worker
1327 # harnesses written in a particular language.
Takashi Matsuo06694102015-09-11 13:55:40 -07001328 "packages": [ # Packages to be installed on workers.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001329 { # The packages that must be installed in order for a worker to run the
1330 # steps of the Cloud Dataflow job that will be assigned to its worker
1331 # pool.
1332 #
1333 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1334 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1335 # might use this to install jars containing the user's code and all of the
1336 # various dependencies (libraries, data files, etc.) required in order
1337 # for that code to run.
1338 "location": "A String", # The resource to read the package from. The supported resource type is:
1339 #
1340 # Google Cloud Storage:
1341 #
1342 # storage.googleapis.com/{bucket}
1343 # bucket.storage.googleapis.com/
Takashi Matsuo06694102015-09-11 13:55:40 -07001344 "name": "A String", # The name of the package.
Takashi Matsuo06694102015-09-11 13:55:40 -07001345 },
1346 ],
1347 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
Takashi Matsuo06694102015-09-11 13:55:40 -07001348 "algorithm": "A String", # The algorithm to use for autoscaling.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001349 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
Takashi Matsuo06694102015-09-11 13:55:40 -07001350 },
1351 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1352 { # Describes the data disk used by a workflow job.
1353 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001354 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1355 # attempt to choose a reasonable default.
1356 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1357 # must be a disk type appropriate to the project and zone in which
1358 # the workers will run. If unknown or unspecified, the service
1359 # will attempt to choose a reasonable default.
1360 #
1361 # For example, the standard persistent disk type is a resource name
1362 # typically ending in "pd-standard". If SSD persistent disks are
1363 # available, the resource name typically ends with "pd-ssd". The
1364 # actual valid values are defined the Google Compute Engine API,
1365 # not by the Cloud Dataflow API; consult the Google Compute Engine
1366 # documentation for more information about determining the set of
1367 # available disk types for a particular project and zone.
1368 #
1369 # Google Compute Engine Disk types are local to a particular
1370 # project in a particular zone, and so the resource name will
1371 # typically look something like this:
1372 #
1373 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Takashi Matsuo06694102015-09-11 13:55:40 -07001374 },
1375 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001376 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1377 # attempt to choose a reasonable default.
1378 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1379 # harness, residing in Google Container Registry.
Takashi Matsuo06694102015-09-11 13:55:40 -07001380 },
1381 ],
1382 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001383 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1384 # A description of the user pipeline and stages through which it is executed.
1385 # Created by Cloud Dataflow service. Only retrieved with
1386 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1387 # form. This data is provided by the Dataflow service for ease of visualizing
1388 # the pipeline and interpretting Dataflow provided metrics.
1389 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1390 { # Description of the type, names/ids, and input/outputs for a transform.
1391 "kind": "A String", # Type of transform.
1392 "name": "A String", # User provided name for this transform instance.
1393 "inputCollectionName": [ # User names for all collection inputs to this transform.
1394 "A String",
1395 ],
1396 "displayData": [ # Transform-specific display data.
1397 { # Data provided with a pipeline or transform to provide descriptive info.
1398 "key": "A String", # The key identifying the display data.
1399 # This is intended to be used as a label for the display data
1400 # when viewed in a dax monitoring system.
1401 "shortStrValue": "A String", # A possible additional shorter value to display.
1402 # For example a java_class_name_value of com.mypackage.MyDoFn
1403 # will be stored with MyDoFn as the short_str_value and
1404 # com.mypackage.MyDoFn as the java_class_name value.
1405 # short_str_value can be displayed and java_class_name_value
1406 # will be displayed as a tooltip.
1407 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1408 "url": "A String", # An optional full URL.
1409 "floatValue": 3.14, # Contains value if the data is of float type.
1410 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1411 # language namespace (i.e. python module) which defines the display data.
1412 # This allows a dax monitoring system to specially handle the data
1413 # and perform custom rendering.
1414 "javaClassValue": "A String", # Contains value if the data is of java class type.
1415 "label": "A String", # An optional label to display in a dax UI for the element.
1416 "boolValue": True or False, # Contains value if the data is of a boolean type.
1417 "strValue": "A String", # Contains value if the data is of string type.
1418 "durationValue": "A String", # Contains value if the data is of duration type.
1419 "int64Value": "A String", # Contains value if the data is of int64 type.
1420 },
1421 ],
1422 "outputCollectionName": [ # User names for all collection outputs to this transform.
1423 "A String",
1424 ],
1425 "id": "A String", # SDK generated id of this transform instance.
1426 },
1427 ],
1428 "displayData": [ # Pipeline level display data.
1429 { # Data provided with a pipeline or transform to provide descriptive info.
1430 "key": "A String", # The key identifying the display data.
1431 # This is intended to be used as a label for the display data
1432 # when viewed in a dax monitoring system.
1433 "shortStrValue": "A String", # A possible additional shorter value to display.
1434 # For example a java_class_name_value of com.mypackage.MyDoFn
1435 # will be stored with MyDoFn as the short_str_value and
1436 # com.mypackage.MyDoFn as the java_class_name value.
1437 # short_str_value can be displayed and java_class_name_value
1438 # will be displayed as a tooltip.
1439 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1440 "url": "A String", # An optional full URL.
1441 "floatValue": 3.14, # Contains value if the data is of float type.
1442 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1443 # language namespace (i.e. python module) which defines the display data.
1444 # This allows a dax monitoring system to specially handle the data
1445 # and perform custom rendering.
1446 "javaClassValue": "A String", # Contains value if the data is of java class type.
1447 "label": "A String", # An optional label to display in a dax UI for the element.
1448 "boolValue": True or False, # Contains value if the data is of a boolean type.
1449 "strValue": "A String", # Contains value if the data is of string type.
1450 "durationValue": "A String", # Contains value if the data is of duration type.
1451 "int64Value": "A String", # Contains value if the data is of int64 type.
1452 },
1453 ],
1454 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1455 { # Description of the composing transforms, names/ids, and input/outputs of a
1456 # stage of execution. Some composing transforms and sources may have been
1457 # generated by the Dataflow service during execution planning.
1458 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1459 { # Description of an interstitial value between transforms in an execution
1460 # stage.
1461 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1462 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1463 # source is most closely associated.
1464 "name": "A String", # Dataflow service generated name for this source.
1465 },
1466 ],
1467 "kind": "A String", # Type of tranform this stage is executing.
1468 "name": "A String", # Dataflow service generated name for this stage.
1469 "outputSource": [ # Output sources for this stage.
1470 { # Description of an input or output of an execution stage.
1471 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1472 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1473 # source is most closely associated.
1474 "name": "A String", # Dataflow service generated name for this source.
1475 "sizeBytes": "A String", # Size of the source, if measurable.
1476 },
1477 ],
1478 "inputSource": [ # Input sources for this stage.
1479 { # Description of an input or output of an execution stage.
1480 "userName": "A String", # Human-readable name for this source; may be user or system generated.
1481 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1482 # source is most closely associated.
1483 "name": "A String", # Dataflow service generated name for this source.
1484 "sizeBytes": "A String", # Size of the source, if measurable.
1485 },
1486 ],
1487 "componentTransform": [ # Transforms that comprise this execution stage.
1488 { # Description of a transform executed as part of an execution stage.
1489 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1490 "originalTransform": "A String", # User name for the original user transform with which this transform is
1491 # most closely associated.
1492 "name": "A String", # Dataflow service generated name for this source.
1493 },
1494 ],
1495 "id": "A String", # Dataflow service generated id for this stage.
1496 },
1497 ],
1498 },
Takashi Matsuo06694102015-09-11 13:55:40 -07001499 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001500 { # Defines a particular step within a Cloud Dataflow job.
1501 #
1502 # A job consists of multiple steps, each of which performs some
1503 # specific operation as part of the overall job. Data is typically
1504 # passed from one step to another as part of the job.
1505 #
1506 # Here's an example of a sequence of steps which together implement a
1507 # Map-Reduce job:
1508 #
1509 # * Read a collection of data from some source, parsing the
1510 # collection's elements.
1511 #
1512 # * Validate the elements.
1513 #
1514 # * Apply a user-defined function to map each element to some value
1515 # and extract an element-specific key value.
1516 #
1517 # * Group elements with the same key into a single element with
1518 # that key, transforming a multiply-keyed collection into a
1519 # uniquely-keyed collection.
1520 #
1521 # * Write the elements out to some data sink.
1522 #
1523 # Note that the Cloud Dataflow service may be used to run many different
1524 # types of jobs, not just Map-Reduce.
1525 "kind": "A String", # The kind of step in the Cloud Dataflow job.
1526 "properties": { # Named properties associated with the step. Each kind of
1527 # predefined step has its own required set of properties.
1528 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Takashi Matsuo06694102015-09-11 13:55:40 -07001529 "a_key": "", # Properties of the object.
1530 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001531 "name": "A String", # The name that identifies the step. This must be unique for each
1532 # step with respect to all other steps in the Cloud Dataflow job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001533 },
1534 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001535 "currentState": "A String", # The current state of the job.
1536 #
1537 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1538 # specified.
1539 #
1540 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1541 # terminal state. After a job has reached a terminal state, no
1542 # further state updates may be made.
1543 #
1544 # This field may be mutated by the Cloud Dataflow service;
1545 # callers cannot mutate it.
1546 "tempFiles": [ # A set of files the system should be aware of that are used
1547 # for temporary storage. These temporary files will be
1548 # removed on job completion.
1549 # No duplicates are allowed.
1550 # No file patterns are supported.
1551 #
1552 # The supported files are:
1553 #
1554 # Google Cloud Storage:
1555 #
1556 # storage.googleapis.com/{bucket}/{object}
1557 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott36e41bc2016-02-19 16:02:29 -08001558 "A String",
1559 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001560 "type": "A String", # The type of Cloud Dataflow job.
1561 "id": "A String", # The unique ID of this job.
1562 #
1563 # This field is set by the Cloud Dataflow service when the Job is
1564 # created, and is immutable for the life of the job.
1565 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1566 # of the job it replaced.
1567 #
1568 # When sending a `CreateJobRequest`, you can update a job by specifying it
1569 # here. The job named here is stopped, and its intermediate state is
1570 # transferred to this job.
1571 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1572 # isn't contained in the submitted job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001573 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001574 "a_key": { # Contains information about how a particular
1575 # google.dataflow.v1beta3.Step will be executed.
1576 "stepName": [ # The steps associated with the execution stage.
1577 # Note that stages may have several steps, and that a given step
1578 # might be run by more than one stage.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001579 "A String",
1580 ],
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001581 },
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001582 },
1583 },
Takashi Matsuo06694102015-09-11 13:55:40 -07001584 }</pre>
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001585</div>
1586
1587<div class="method">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001588 <code class="details" id="getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</code>
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001589 <pre>Request the job status.
1590
1591Args:
Takashi Matsuo06694102015-09-11 13:55:40 -07001592 projectId: string, A project id. (required)
1593 jobId: string, The job to get messages for. (required)
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001594 startTime: string, Return only metric data that has changed since this time.
1595Default is to return all information about all metrics for the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001596 location: string, The location which contains the job specified by job_id.
Takashi Matsuo06694102015-09-11 13:55:40 -07001597 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001598 Allowed values
1599 1 - v1 error format
1600 2 - v2 error format
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001601
1602Returns:
1603 An object of the form:
1604
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001605 { # JobMetrics contains a collection of metrics descibing the detailed progress
1606 # of a Dataflow job. Metrics correspond to user-defined and system-defined
1607 # metrics in the job.
1608 #
1609 # This resource captures only the most recent values of each metric;
1610 # time-series data can be queried for them (under the same metric names)
1611 # from Cloud Monitoring.
Takashi Matsuo06694102015-09-11 13:55:40 -07001612 "metrics": [ # All metrics for this job.
1613 { # Describes the state of a metric.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001614 "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1615 # This holds the count of the aggregated values and is used in combination
1616 # with mean_sum above to obtain the actual mean aggregate value.
1617 # The only possible value type is Long.
1618 "kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are
1619 # "Sum", "Max", "Min", "Mean", "Set", "And", and "Or".
1620 # The specified aggregation kind is case-insensitive.
1621 #
1622 # If omitted, this is not an aggregated value but instead
1623 # a single metric sample value.
1624 "set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only
1625 # possible value type is a list of Values whose type can be Long, Double,
1626 # or String, according to the metric's type. All Values in the list must
1627 # be of the same type.
1628 "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
1629 # metric.
1630 "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
1631 # will be "dataflow" for metrics defined by the Dataflow service or SDK.
Takashi Matsuo06694102015-09-11 13:55:40 -07001632 "name": "A String", # Worker-defined metric name.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001633 "context": { # Zero or more labeled fields which identify the part of the job this
1634 # metric is associated with, such as the name of a step or collection.
1635 #
1636 # For example, built-in counters associated with steps will have
1637 # context['step'] = <step-name>. Counters associated with PCollections
1638 # in the SDK will have context['pcollection'] = <pcollection-name>.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001639 "a_key": "A String",
1640 },
1641 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001642 "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1643 # This holds the sum of the aggregated values and is used in combination
1644 # with mean_count below to obtain the actual mean aggregate value.
1645 # The only possible value types are Long and Double.
1646 "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
1647 # value accumulated since the worker started working on this WorkItem.
1648 # By default this is false, indicating that this metric is reported
1649 # as a delta that is not associated with any WorkItem.
1650 "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
1651 # reporting work progress; it will be filled in responses from the
1652 # metrics API.
1653 "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
1654 # "And", and "Or". The possible value types are Long, Double, and Boolean.
1655 "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
1656 # service.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001657 },
1658 ],
Takashi Matsuo06694102015-09-11 13:55:40 -07001659 "metricTime": "A String", # Timestamp as of which metric values are current.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001660 }</pre>
1661</div>
1662
1663<div class="method">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001664 <code class="details" id="list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001665 <pre>List the jobs of a project.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001666
1667Args:
Takashi Matsuo06694102015-09-11 13:55:40 -07001668 projectId: string, The project which owns the jobs. (required)
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001669 pageSize: integer, If there are many jobs, limit response to at most this many.
1670The actual number of jobs returned will be the lesser of max_responses
1671and an unspecified server-defined limit.
Takashi Matsuo06694102015-09-11 13:55:40 -07001672 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001673 Allowed values
1674 1 - v1 error format
1675 2 - v2 error format
1676 pageToken: string, Set this to the 'next_page_token' field of a previous response
1677to request additional results in a long list.
1678 location: string, The location that contains this job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001679 filter: string, The kind of filter to use.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001680 view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001681
1682Returns:
1683 An object of the form:
1684
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001685 { # Response to a request to list Cloud Dataflow jobs. This may be a partial
1686 # response, depending on the page size in the ListJobsRequest.
Takashi Matsuo06694102015-09-11 13:55:40 -07001687 "nextPageToken": "A String", # Set if there may be more results than fit in this response.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001688 "failedLocation": [ # Zero or more messages describing locations that failed to respond.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001689 { # Indicates which location failed to respond to a request for data.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001690 "name": "A String", # The name of the failed location.
1691 },
1692 ],
Takashi Matsuo06694102015-09-11 13:55:40 -07001693 "jobs": [ # A subset of the requested job information.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001694 { # Defines a job to be run by the Cloud Dataflow service.
1695 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1696 # If this field is set, the service will ensure its uniqueness.
1697 # The request to create a job will fail if the service has knowledge of a
1698 # previously submitted job with the same client's ID and job name.
1699 # The caller may use this field to ensure idempotence of job
1700 # creation across retried attempts to create a job.
1701 # By default, the field is empty and, in that case, the service ignores it.
1702 "requestedState": "A String", # The job's requested state.
1703 #
1704 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1705 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1706 # also be used to directly set a job's requested state to
1707 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1708 # job if it has not already reached a terminal state.
1709 "name": "A String", # The user-specified Cloud Dataflow job name.
1710 #
1711 # Only one Job with a given name may exist in a project at any
1712 # given time. If a caller attempts to create a Job with the same
1713 # name as an already-existing Job, the attempt returns the
1714 # existing Job.
1715 #
1716 # The name must match the regular expression
1717 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1718 "currentStateTime": "A String", # The timestamp associated with the current state.
1719 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1720 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1721 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1722 "labels": { # User-defined labels for this job.
1723 #
1724 # The labels map can contain no more than 64 entries. Entries of the labels
1725 # map are UTF8 strings that comply with the following restrictions:
1726 #
1727 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
1728 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1729 # * Both keys and values are additionally constrained to be <= 128 bytes in
1730 # size.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001731 "a_key": "A String",
1732 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001733 "location": "A String", # The location that contains this job.
1734 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1735 # Cloud Dataflow service.
1736 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1737 # corresponding name prefixes of the new job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001738 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001739 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001740 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1741 "version": { # A structure describing which components and their versions of the service
1742 # are required in order to run the job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001743 "a_key": "", # Properties of the object.
1744 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001745 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1746 # storage. The system will append the suffix "/temp-{JOBNAME} to
1747 # this resource prefix, where {JOBNAME} is the value of the
1748 # job_name field. The resulting bucket and object prefix is used
1749 # as the prefix of the resources used to store temporary data
1750 # needed during the job execution. NOTE: This will override the
1751 # value in taskrunner_settings.
1752 # The supported resource type is:
1753 #
1754 # Google Cloud Storage:
1755 #
1756 # storage.googleapis.com/{bucket}/{object}
1757 # bucket.storage.googleapis.com/{object}
Takashi Matsuo06694102015-09-11 13:55:40 -07001758 "internalExperiments": { # Experimental settings.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001759 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -07001760 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001761 "dataset": "A String", # The dataset for the current project where various workflow
1762 # related tables are stored.
1763 #
1764 # The supported resource type is:
1765 #
1766 # Google BigQuery:
1767 # bigquery.googleapis.com/{dataset}
Takashi Matsuo06694102015-09-11 13:55:40 -07001768 "experiments": [ # The list of experiments to enable.
1769 "A String",
1770 ],
Sai Cheemalapatiea3a5e12016-10-12 14:05:53 -07001771 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001772 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1773 # options are passed through the service and are used to recreate the
1774 # SDK pipeline options on the worker in a language agnostic and platform
1775 # independent way.
Takashi Matsuo06694102015-09-11 13:55:40 -07001776 "a_key": "", # Properties of the object.
1777 },
1778 "userAgent": { # A description of the process that generated the request.
1779 "a_key": "", # Properties of the object.
1780 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001781 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1782 # unspecified, the service will attempt to choose a reasonable
1783 # default. This should be in the form of the API service name,
1784 # e.g. "compute.googleapis.com".
1785 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1786 # specified in order for the job to have workers.
1787 { # Describes one particular pool of Cloud Dataflow workers to be
1788 # instantiated by the Cloud Dataflow service in order to perform the
1789 # computations required by a job. Note that a workflow job may use
1790 # multiple pools, in order to match the various computational
1791 # requirements of the various stages of the job.
Takashi Matsuo06694102015-09-11 13:55:40 -07001792 "diskSourceImage": "A String", # Fully qualified source image for disks.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001793 "ipConfiguration": "A String", # Configuration for VM IPs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001794 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1795 # are supported.
1796 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1797 # service will attempt to choose a reasonable default.
1798 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1799 # the service will use the network "default".
1800 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1801 # will attempt to choose a reasonable default.
1802 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1803 # attempt to choose a reasonable default.
Takashi Matsuo06694102015-09-11 13:55:40 -07001804 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1805 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +00001806 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001807 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1808 # Compute Engine API.
1809 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1810 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1811 # `TEARDOWN_NEVER`.
1812 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1813 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1814 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1815 # down.
1816 #
1817 # If the workers are not torn down by the service, they will
1818 # continue to run and use Google Compute Engine VM resources in the
1819 # user's project until they are explicitly terminated by the user.
1820 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1821 # policy except for small, manually supervised test jobs.
1822 #
1823 # If unknown or unspecified, the service will attempt to choose a reasonable
1824 # default.
1825 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1826 # service will choose a number of threads (according to the number of cores
1827 # on the selected machine type for batch, or 1 by convention for streaming).
1828 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1829 # the form "regions/REGION/subnetworks/SUBNETWORK".
Takashi Matsuo06694102015-09-11 13:55:40 -07001830 "poolArgs": { # Extra arguments for this worker pool.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07001831 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -07001832 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001833 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1834 # execute the job. If zero or unspecified, the service will
1835 # attempt to choose a reasonable default.
1836 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1837 # using the standard Dataflow task runner. Users should ignore
1838 # this field.
1839 "workflowFileName": "A String", # The file to store the workflow in.
1840 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1841 # will not be uploaded.
1842 #
1843 # The supported resource type is:
1844 #
1845 # Google Cloud Storage:
1846 # storage.googleapis.com/{bucket}/{object}
1847 # bucket.storage.googleapis.com/{object}
1848 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1849 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1850 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1851 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1852 "vmId": "A String", # The ID string of the VM.
1853 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1854 # taskrunner; e.g. "wheel".
1855 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1856 # taskrunner; e.g. "root".
1857 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1858 # access the Cloud Dataflow API.
1859 "A String",
1860 ],
1861 "languageHint": "A String", # The suggested backend language.
1862 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1863 # console.
1864 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1865 "logDir": "A String", # The directory on the VM to store logs.
1866 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1867 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1868 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1869 # "shuffle/v1beta1".
1870 "workerId": "A String", # The ID of the worker running this pipeline.
1871 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1872 #
1873 # When workers access Google Cloud APIs, they logically do so via
1874 # relative URLs. If this field is specified, it supplies the base
1875 # URL to use for resolving these relative URLs. The normative
1876 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1877 # Locators".
1878 #
1879 # If not specified, the default value is "http://www.googleapis.com/"
1880 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1881 # "dataflow/v1b3/projects".
1882 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1883 # storage.
1884 #
1885 # The supported resource type is:
1886 #
1887 # Google Cloud Storage:
1888 #
1889 # storage.googleapis.com/{bucket}/{object}
1890 # bucket.storage.googleapis.com/{object}
1891 },
1892 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1893 "harnessCommand": "A String", # The command to launch the worker harness.
1894 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1895 # temporary storage.
1896 #
1897 # The supported resource type is:
1898 #
1899 # Google Cloud Storage:
1900 # storage.googleapis.com/{bucket}/{object}
1901 # bucket.storage.googleapis.com/{object}
1902 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1903 #
1904 # When workers access Google Cloud APIs, they logically do so via
1905 # relative URLs. If this field is specified, it supplies the base
1906 # URL to use for resolving these relative URLs. The normative
1907 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1908 # Locators".
1909 #
1910 # If not specified, the default value is "http://www.googleapis.com/"
1911 },
1912 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1913 # select a default set of packages which are useful to worker
1914 # harnesses written in a particular language.
Takashi Matsuo06694102015-09-11 13:55:40 -07001915 "packages": [ # Packages to be installed on workers.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001916 { # The packages that must be installed in order for a worker to run the
1917 # steps of the Cloud Dataflow job that will be assigned to its worker
1918 # pool.
1919 #
1920 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1921 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1922 # might use this to install jars containing the user's code and all of the
1923 # various dependencies (libraries, data files, etc.) required in order
1924 # for that code to run.
1925 "location": "A String", # The resource to read the package from. The supported resource type is:
1926 #
1927 # Google Cloud Storage:
1928 #
1929 # storage.googleapis.com/{bucket}
1930 # bucket.storage.googleapis.com/
Takashi Matsuo06694102015-09-11 13:55:40 -07001931 "name": "A String", # The name of the package.
Takashi Matsuo06694102015-09-11 13:55:40 -07001932 },
1933 ],
1934 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
Takashi Matsuo06694102015-09-11 13:55:40 -07001935 "algorithm": "A String", # The algorithm to use for autoscaling.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001936 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
Takashi Matsuo06694102015-09-11 13:55:40 -07001937 },
1938 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1939 { # Describes the data disk used by a workflow job.
1940 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001941 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1942 # attempt to choose a reasonable default.
1943 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1944 # must be a disk type appropriate to the project and zone in which
1945 # the workers will run. If unknown or unspecified, the service
1946 # will attempt to choose a reasonable default.
1947 #
1948 # For example, the standard persistent disk type is a resource name
1949 # typically ending in "pd-standard". If SSD persistent disks are
1950 # available, the resource name typically ends with "pd-ssd". The
1951 # actual valid values are defined the Google Compute Engine API,
1952 # not by the Cloud Dataflow API; consult the Google Compute Engine
1953 # documentation for more information about determining the set of
1954 # available disk types for a particular project and zone.
1955 #
1956 # Google Compute Engine Disk types are local to a particular
1957 # project in a particular zone, and so the resource name will
1958 # typically look something like this:
1959 #
1960 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Takashi Matsuo06694102015-09-11 13:55:40 -07001961 },
1962 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001963 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1964 # attempt to choose a reasonable default.
1965 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1966 # harness, residing in Google Container Registry.
Takashi Matsuo06694102015-09-11 13:55:40 -07001967 },
1968 ],
1969 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001970 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1971 # A description of the user pipeline and stages through which it is executed.
1972 # Created by Cloud Dataflow service. Only retrieved with
1973 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1974 # form. This data is provided by the Dataflow service for ease of visualizing
1975 # the pipeline and interpretting Dataflow provided metrics.
1976 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1977 { # Description of the type, names/ids, and input/outputs for a transform.
1978 "kind": "A String", # Type of transform.
1979 "name": "A String", # User provided name for this transform instance.
1980 "inputCollectionName": [ # User names for all collection inputs to this transform.
1981 "A String",
1982 ],
1983 "displayData": [ # Transform-specific display data.
1984 { # Data provided with a pipeline or transform to provide descriptive info.
1985 "key": "A String", # The key identifying the display data.
1986 # This is intended to be used as a label for the display data
1987 # when viewed in a dax monitoring system.
1988 "shortStrValue": "A String", # A possible additional shorter value to display.
1989 # For example a java_class_name_value of com.mypackage.MyDoFn
1990 # will be stored with MyDoFn as the short_str_value and
1991 # com.mypackage.MyDoFn as the java_class_name value.
1992 # short_str_value can be displayed and java_class_name_value
1993 # will be displayed as a tooltip.
1994 "timestampValue": "A String", # Contains value if the data is of timestamp type.
1995 "url": "A String", # An optional full URL.
1996 "floatValue": 3.14, # Contains value if the data is of float type.
1997 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1998 # language namespace (i.e. python module) which defines the display data.
1999 # This allows a dax monitoring system to specially handle the data
2000 # and perform custom rendering.
2001 "javaClassValue": "A String", # Contains value if the data is of java class type.
2002 "label": "A String", # An optional label to display in a dax UI for the element.
2003 "boolValue": True or False, # Contains value if the data is of a boolean type.
2004 "strValue": "A String", # Contains value if the data is of string type.
2005 "durationValue": "A String", # Contains value if the data is of duration type.
2006 "int64Value": "A String", # Contains value if the data is of int64 type.
2007 },
2008 ],
2009 "outputCollectionName": [ # User names for all collection outputs to this transform.
2010 "A String",
2011 ],
2012 "id": "A String", # SDK generated id of this transform instance.
2013 },
2014 ],
2015 "displayData": [ # Pipeline level display data.
2016 { # Data provided with a pipeline or transform to provide descriptive info.
2017 "key": "A String", # The key identifying the display data.
2018 # This is intended to be used as a label for the display data
2019 # when viewed in a dax monitoring system.
2020 "shortStrValue": "A String", # A possible additional shorter value to display.
2021 # For example a java_class_name_value of com.mypackage.MyDoFn
2022 # will be stored with MyDoFn as the short_str_value and
2023 # com.mypackage.MyDoFn as the java_class_name value.
2024 # short_str_value can be displayed and java_class_name_value
2025 # will be displayed as a tooltip.
2026 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2027 "url": "A String", # An optional full URL.
2028 "floatValue": 3.14, # Contains value if the data is of float type.
2029 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2030 # language namespace (i.e. python module) which defines the display data.
2031 # This allows a dax monitoring system to specially handle the data
2032 # and perform custom rendering.
2033 "javaClassValue": "A String", # Contains value if the data is of java class type.
2034 "label": "A String", # An optional label to display in a dax UI for the element.
2035 "boolValue": True or False, # Contains value if the data is of a boolean type.
2036 "strValue": "A String", # Contains value if the data is of string type.
2037 "durationValue": "A String", # Contains value if the data is of duration type.
2038 "int64Value": "A String", # Contains value if the data is of int64 type.
2039 },
2040 ],
2041 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2042 { # Description of the composing transforms, names/ids, and input/outputs of a
2043 # stage of execution. Some composing transforms and sources may have been
2044 # generated by the Dataflow service during execution planning.
2045 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2046 { # Description of an interstitial value between transforms in an execution
2047 # stage.
2048 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2049 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2050 # source is most closely associated.
2051 "name": "A String", # Dataflow service generated name for this source.
2052 },
2053 ],
2054 "kind": "A String", # Type of tranform this stage is executing.
2055 "name": "A String", # Dataflow service generated name for this stage.
2056 "outputSource": [ # Output sources for this stage.
2057 { # Description of an input or output of an execution stage.
2058 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2059 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2060 # source is most closely associated.
2061 "name": "A String", # Dataflow service generated name for this source.
2062 "sizeBytes": "A String", # Size of the source, if measurable.
2063 },
2064 ],
2065 "inputSource": [ # Input sources for this stage.
2066 { # Description of an input or output of an execution stage.
2067 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2068 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2069 # source is most closely associated.
2070 "name": "A String", # Dataflow service generated name for this source.
2071 "sizeBytes": "A String", # Size of the source, if measurable.
2072 },
2073 ],
2074 "componentTransform": [ # Transforms that comprise this execution stage.
2075 { # Description of a transform executed as part of an execution stage.
2076 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2077 "originalTransform": "A String", # User name for the original user transform with which this transform is
2078 # most closely associated.
2079 "name": "A String", # Dataflow service generated name for this source.
2080 },
2081 ],
2082 "id": "A String", # Dataflow service generated id for this stage.
2083 },
2084 ],
2085 },
Takashi Matsuo06694102015-09-11 13:55:40 -07002086 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002087 { # Defines a particular step within a Cloud Dataflow job.
2088 #
2089 # A job consists of multiple steps, each of which performs some
2090 # specific operation as part of the overall job. Data is typically
2091 # passed from one step to another as part of the job.
2092 #
2093 # Here's an example of a sequence of steps which together implement a
2094 # Map-Reduce job:
2095 #
2096 # * Read a collection of data from some source, parsing the
2097 # collection's elements.
2098 #
2099 # * Validate the elements.
2100 #
2101 # * Apply a user-defined function to map each element to some value
2102 # and extract an element-specific key value.
2103 #
2104 # * Group elements with the same key into a single element with
2105 # that key, transforming a multiply-keyed collection into a
2106 # uniquely-keyed collection.
2107 #
2108 # * Write the elements out to some data sink.
2109 #
2110 # Note that the Cloud Dataflow service may be used to run many different
2111 # types of jobs, not just Map-Reduce.
2112 "kind": "A String", # The kind of step in the Cloud Dataflow job.
2113 "properties": { # Named properties associated with the step. Each kind of
2114 # predefined step has its own required set of properties.
2115 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Takashi Matsuo06694102015-09-11 13:55:40 -07002116 "a_key": "", # Properties of the object.
2117 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002118 "name": "A String", # The name that identifies the step. This must be unique for each
2119 # step with respect to all other steps in the Cloud Dataflow job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002120 },
2121 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002122 "currentState": "A String", # The current state of the job.
2123 #
2124 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2125 # specified.
2126 #
2127 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2128 # terminal state. After a job has reached a terminal state, no
2129 # further state updates may be made.
2130 #
2131 # This field may be mutated by the Cloud Dataflow service;
2132 # callers cannot mutate it.
2133 "tempFiles": [ # A set of files the system should be aware of that are used
2134 # for temporary storage. These temporary files will be
2135 # removed on job completion.
2136 # No duplicates are allowed.
2137 # No file patterns are supported.
2138 #
2139 # The supported files are:
2140 #
2141 # Google Cloud Storage:
2142 #
2143 # storage.googleapis.com/{bucket}/{object}
2144 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott36e41bc2016-02-19 16:02:29 -08002145 "A String",
2146 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002147 "type": "A String", # The type of Cloud Dataflow job.
2148 "id": "A String", # The unique ID of this job.
2149 #
2150 # This field is set by the Cloud Dataflow service when the Job is
2151 # created, and is immutable for the life of the job.
2152 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2153 # of the job it replaced.
2154 #
2155 # When sending a `CreateJobRequest`, you can update a job by specifying it
2156 # here. The job named here is stopped, and its intermediate state is
2157 # transferred to this job.
2158 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2159 # isn't contained in the submitted job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002160 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002161 "a_key": { # Contains information about how a particular
2162 # google.dataflow.v1beta3.Step will be executed.
2163 "stepName": [ # The steps associated with the execution stage.
2164 # Note that stages may have several steps, and that a given step
2165 # might be run by more than one stage.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002166 "A String",
2167 ],
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002168 },
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002169 },
2170 },
2171 },
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002172 ],
2173 }</pre>
2174</div>
2175
2176<div class="method">
2177 <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
2178 <pre>Retrieves the next page of results.
2179
2180Args:
2181 previous_request: The request for the previous page. (required)
2182 previous_response: The response from the request for the previous page. (required)
2183
2184Returns:
2185 A request object that you can call 'execute()' on to request the next
2186 page. Returns None if there are no more items in the collection.
2187 </pre>
2188</div>
2189
2190<div class="method">
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002191 <code class="details" id="update">update(projectId, jobId, body, location=None, x__xgafv=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002192 <pre>Updates the state of an existing Cloud Dataflow job.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002193
2194Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002195 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
2196 jobId: string, The job ID. (required)
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002197 body: object, The request body. (required)
2198 The object takes the form of:
2199
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002200{ # Defines a job to be run by the Cloud Dataflow service.
2201 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2202 # If this field is set, the service will ensure its uniqueness.
2203 # The request to create a job will fail if the service has knowledge of a
2204 # previously submitted job with the same client's ID and job name.
2205 # The caller may use this field to ensure idempotence of job
2206 # creation across retried attempts to create a job.
2207 # By default, the field is empty and, in that case, the service ignores it.
2208 "requestedState": "A String", # The job's requested state.
2209 #
2210 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2211 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
2212 # also be used to directly set a job's requested state to
2213 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2214 # job if it has not already reached a terminal state.
2215 "name": "A String", # The user-specified Cloud Dataflow job name.
2216 #
2217 # Only one Job with a given name may exist in a project at any
2218 # given time. If a caller attempts to create a Job with the same
2219 # name as an already-existing Job, the attempt returns the
2220 # existing Job.
2221 #
2222 # The name must match the regular expression
2223 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2224 "currentStateTime": "A String", # The timestamp associated with the current state.
2225 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2226 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2227 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2228 "labels": { # User-defined labels for this job.
2229 #
2230 # The labels map can contain no more than 64 entries. Entries of the labels
2231 # map are UTF8 strings that comply with the following restrictions:
2232 #
2233 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
2234 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2235 # * Both keys and values are additionally constrained to be <= 128 bytes in
2236 # size.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07002237 "a_key": "A String",
2238 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002239 "location": "A String", # The location that contains this job.
2240 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2241 # Cloud Dataflow service.
2242 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2243 # corresponding name prefixes of the new job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002244 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002245 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002246 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2247 "version": { # A structure describing which components and their versions of the service
2248 # are required in order to run the job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002249 "a_key": "", # Properties of the object.
2250 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002251 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2252 # storage. The system will append the suffix "/temp-{JOBNAME} to
2253 # this resource prefix, where {JOBNAME} is the value of the
2254 # job_name field. The resulting bucket and object prefix is used
2255 # as the prefix of the resources used to store temporary data
2256 # needed during the job execution. NOTE: This will override the
2257 # value in taskrunner_settings.
2258 # The supported resource type is:
2259 #
2260 # Google Cloud Storage:
2261 #
2262 # storage.googleapis.com/{bucket}/{object}
2263 # bucket.storage.googleapis.com/{object}
Takashi Matsuo06694102015-09-11 13:55:40 -07002264 "internalExperiments": { # Experimental settings.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07002265 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -07002266 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002267 "dataset": "A String", # The dataset for the current project where various workflow
2268 # related tables are stored.
2269 #
2270 # The supported resource type is:
2271 #
2272 # Google BigQuery:
2273 # bigquery.googleapis.com/{dataset}
Takashi Matsuo06694102015-09-11 13:55:40 -07002274 "experiments": [ # The list of experiments to enable.
2275 "A String",
2276 ],
Sai Cheemalapatiea3a5e12016-10-12 14:05:53 -07002277 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002278 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2279 # options are passed through the service and are used to recreate the
2280 # SDK pipeline options on the worker in a language agnostic and platform
2281 # independent way.
Takashi Matsuo06694102015-09-11 13:55:40 -07002282 "a_key": "", # Properties of the object.
2283 },
2284 "userAgent": { # A description of the process that generated the request.
2285 "a_key": "", # Properties of the object.
2286 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002287 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
2288 # unspecified, the service will attempt to choose a reasonable
2289 # default. This should be in the form of the API service name,
2290 # e.g. "compute.googleapis.com".
2291 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2292 # specified in order for the job to have workers.
2293 { # Describes one particular pool of Cloud Dataflow workers to be
2294 # instantiated by the Cloud Dataflow service in order to perform the
2295 # computations required by a job. Note that a workflow job may use
2296 # multiple pools, in order to match the various computational
2297 # requirements of the various stages of the job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002298 "diskSourceImage": "A String", # Fully qualified source image for disks.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07002299 "ipConfiguration": "A String", # Configuration for VM IPs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002300 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2301 # are supported.
2302 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
2303 # service will attempt to choose a reasonable default.
2304 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
2305 # the service will use the network "default".
2306 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
2307 # will attempt to choose a reasonable default.
2308 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
2309 # attempt to choose a reasonable default.
Takashi Matsuo06694102015-09-11 13:55:40 -07002310 "metadata": { # Metadata to set on the Google Compute Engine VMs.
2311 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002312 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002313 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2314 # Compute Engine API.
2315 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2316 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2317 # `TEARDOWN_NEVER`.
2318 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2319 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2320 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2321 # down.
2322 #
2323 # If the workers are not torn down by the service, they will
2324 # continue to run and use Google Compute Engine VM resources in the
2325 # user's project until they are explicitly terminated by the user.
2326 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2327 # policy except for small, manually supervised test jobs.
2328 #
2329 # If unknown or unspecified, the service will attempt to choose a reasonable
2330 # default.
2331 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2332 # service will choose a number of threads (according to the number of cores
2333 # on the selected machine type for batch, or 1 by convention for streaming).
2334 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
2335 # the form "regions/REGION/subnetworks/SUBNETWORK".
Takashi Matsuo06694102015-09-11 13:55:40 -07002336 "poolArgs": { # Extra arguments for this worker pool.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07002337 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -07002338 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002339 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2340 # execute the job. If zero or unspecified, the service will
2341 # attempt to choose a reasonable default.
2342 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2343 # using the standard Dataflow task runner. Users should ignore
2344 # this field.
2345 "workflowFileName": "A String", # The file to store the workflow in.
2346 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
2347 # will not be uploaded.
2348 #
2349 # The supported resource type is:
2350 #
2351 # Google Cloud Storage:
2352 # storage.googleapis.com/{bucket}/{object}
2353 # bucket.storage.googleapis.com/{object}
2354 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2355 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2356 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2357 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2358 "vmId": "A String", # The ID string of the VM.
2359 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2360 # taskrunner; e.g. "wheel".
2361 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2362 # taskrunner; e.g. "root".
2363 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2364 # access the Cloud Dataflow API.
2365 "A String",
2366 ],
2367 "languageHint": "A String", # The suggested backend language.
2368 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2369 # console.
2370 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2371 "logDir": "A String", # The directory on the VM to store logs.
2372 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2373 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2374 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2375 # "shuffle/v1beta1".
2376 "workerId": "A String", # The ID of the worker running this pipeline.
2377 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2378 #
2379 # When workers access Google Cloud APIs, they logically do so via
2380 # relative URLs. If this field is specified, it supplies the base
2381 # URL to use for resolving these relative URLs. The normative
2382 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2383 # Locators".
2384 #
2385 # If not specified, the default value is "http://www.googleapis.com/"
2386 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2387 # "dataflow/v1b3/projects".
2388 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2389 # storage.
2390 #
2391 # The supported resource type is:
2392 #
2393 # Google Cloud Storage:
2394 #
2395 # storage.googleapis.com/{bucket}/{object}
2396 # bucket.storage.googleapis.com/{object}
2397 },
2398 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2399 "harnessCommand": "A String", # The command to launch the worker harness.
2400 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2401 # temporary storage.
2402 #
2403 # The supported resource type is:
2404 #
2405 # Google Cloud Storage:
2406 # storage.googleapis.com/{bucket}/{object}
2407 # bucket.storage.googleapis.com/{object}
2408 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2409 #
2410 # When workers access Google Cloud APIs, they logically do so via
2411 # relative URLs. If this field is specified, it supplies the base
2412 # URL to use for resolving these relative URLs. The normative
2413 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2414 # Locators".
2415 #
2416 # If not specified, the default value is "http://www.googleapis.com/"
2417 },
2418 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
2419 # select a default set of packages which are useful to worker
2420 # harnesses written in a particular language.
Takashi Matsuo06694102015-09-11 13:55:40 -07002421 "packages": [ # Packages to be installed on workers.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002422 { # The packages that must be installed in order for a worker to run the
2423 # steps of the Cloud Dataflow job that will be assigned to its worker
2424 # pool.
2425 #
2426 # This is the mechanism by which the Cloud Dataflow SDK causes code to
2427 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2428 # might use this to install jars containing the user's code and all of the
2429 # various dependencies (libraries, data files, etc.) required in order
2430 # for that code to run.
2431 "location": "A String", # The resource to read the package from. The supported resource type is:
2432 #
2433 # Google Cloud Storage:
2434 #
2435 # storage.googleapis.com/{bucket}
2436 # bucket.storage.googleapis.com/
Takashi Matsuo06694102015-09-11 13:55:40 -07002437 "name": "A String", # The name of the package.
Takashi Matsuo06694102015-09-11 13:55:40 -07002438 },
2439 ],
2440 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
Takashi Matsuo06694102015-09-11 13:55:40 -07002441 "algorithm": "A String", # The algorithm to use for autoscaling.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002442 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
Takashi Matsuo06694102015-09-11 13:55:40 -07002443 },
2444 "dataDisks": [ # Data disks that are used by a VM in this workflow.
2445 { # Describes the data disk used by a workflow job.
2446 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002447 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
2448 # attempt to choose a reasonable default.
2449 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
2450 # must be a disk type appropriate to the project and zone in which
2451 # the workers will run. If unknown or unspecified, the service
2452 # will attempt to choose a reasonable default.
2453 #
2454 # For example, the standard persistent disk type is a resource name
2455 # typically ending in "pd-standard". If SSD persistent disks are
2456 # available, the resource name typically ends with "pd-ssd". The
2457 # actual valid values are defined the Google Compute Engine API,
2458 # not by the Cloud Dataflow API; consult the Google Compute Engine
2459 # documentation for more information about determining the set of
2460 # available disk types for a particular project and zone.
2461 #
2462 # Google Compute Engine Disk types are local to a particular
2463 # project in a particular zone, and so the resource name will
2464 # typically look something like this:
2465 #
2466 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Takashi Matsuo06694102015-09-11 13:55:40 -07002467 },
2468 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002469 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
2470 # attempt to choose a reasonable default.
2471 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2472 # harness, residing in Google Container Registry.
Takashi Matsuo06694102015-09-11 13:55:40 -07002473 },
2474 ],
2475 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002476 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2477 # A description of the user pipeline and stages through which it is executed.
2478 # Created by Cloud Dataflow service. Only retrieved with
2479 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2480 # form. This data is provided by the Dataflow service for ease of visualizing
2481 # the pipeline and interpretting Dataflow provided metrics.
2482 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2483 { # Description of the type, names/ids, and input/outputs for a transform.
2484 "kind": "A String", # Type of transform.
2485 "name": "A String", # User provided name for this transform instance.
2486 "inputCollectionName": [ # User names for all collection inputs to this transform.
2487 "A String",
2488 ],
2489 "displayData": [ # Transform-specific display data.
2490 { # Data provided with a pipeline or transform to provide descriptive info.
2491 "key": "A String", # The key identifying the display data.
2492 # This is intended to be used as a label for the display data
2493 # when viewed in a dax monitoring system.
2494 "shortStrValue": "A String", # A possible additional shorter value to display.
2495 # For example a java_class_name_value of com.mypackage.MyDoFn
2496 # will be stored with MyDoFn as the short_str_value and
2497 # com.mypackage.MyDoFn as the java_class_name value.
2498 # short_str_value can be displayed and java_class_name_value
2499 # will be displayed as a tooltip.
2500 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2501 "url": "A String", # An optional full URL.
2502 "floatValue": 3.14, # Contains value if the data is of float type.
2503 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2504 # language namespace (i.e. python module) which defines the display data.
2505 # This allows a dax monitoring system to specially handle the data
2506 # and perform custom rendering.
2507 "javaClassValue": "A String", # Contains value if the data is of java class type.
2508 "label": "A String", # An optional label to display in a dax UI for the element.
2509 "boolValue": True or False, # Contains value if the data is of a boolean type.
2510 "strValue": "A String", # Contains value if the data is of string type.
2511 "durationValue": "A String", # Contains value if the data is of duration type.
2512 "int64Value": "A String", # Contains value if the data is of int64 type.
2513 },
2514 ],
2515 "outputCollectionName": [ # User names for all collection outputs to this transform.
2516 "A String",
2517 ],
2518 "id": "A String", # SDK generated id of this transform instance.
2519 },
2520 ],
2521 "displayData": [ # Pipeline level display data.
2522 { # Data provided with a pipeline or transform to provide descriptive info.
2523 "key": "A String", # The key identifying the display data.
2524 # This is intended to be used as a label for the display data
2525 # when viewed in a dax monitoring system.
2526 "shortStrValue": "A String", # A possible additional shorter value to display.
2527 # For example a java_class_name_value of com.mypackage.MyDoFn
2528 # will be stored with MyDoFn as the short_str_value and
2529 # com.mypackage.MyDoFn as the java_class_name value.
2530 # short_str_value can be displayed and java_class_name_value
2531 # will be displayed as a tooltip.
2532 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2533 "url": "A String", # An optional full URL.
2534 "floatValue": 3.14, # Contains value if the data is of float type.
2535 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2536 # language namespace (i.e. python module) which defines the display data.
2537 # This allows a dax monitoring system to specially handle the data
2538 # and perform custom rendering.
2539 "javaClassValue": "A String", # Contains value if the data is of java class type.
2540 "label": "A String", # An optional label to display in a dax UI for the element.
2541 "boolValue": True or False, # Contains value if the data is of a boolean type.
2542 "strValue": "A String", # Contains value if the data is of string type.
2543 "durationValue": "A String", # Contains value if the data is of duration type.
2544 "int64Value": "A String", # Contains value if the data is of int64 type.
2545 },
2546 ],
2547 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2548 { # Description of the composing transforms, names/ids, and input/outputs of a
2549 # stage of execution. Some composing transforms and sources may have been
2550 # generated by the Dataflow service during execution planning.
2551 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2552 { # Description of an interstitial value between transforms in an execution
2553 # stage.
2554 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2555 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2556 # source is most closely associated.
2557 "name": "A String", # Dataflow service generated name for this source.
2558 },
2559 ],
2560 "kind": "A String", # Type of tranform this stage is executing.
2561 "name": "A String", # Dataflow service generated name for this stage.
2562 "outputSource": [ # Output sources for this stage.
2563 { # Description of an input or output of an execution stage.
2564 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2565 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2566 # source is most closely associated.
2567 "name": "A String", # Dataflow service generated name for this source.
2568 "sizeBytes": "A String", # Size of the source, if measurable.
2569 },
2570 ],
2571 "inputSource": [ # Input sources for this stage.
2572 { # Description of an input or output of an execution stage.
2573 "userName": "A String", # Human-readable name for this source; may be user or system generated.
2574 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2575 # source is most closely associated.
2576 "name": "A String", # Dataflow service generated name for this source.
2577 "sizeBytes": "A String", # Size of the source, if measurable.
2578 },
2579 ],
2580 "componentTransform": [ # Transforms that comprise this execution stage.
2581 { # Description of a transform executed as part of an execution stage.
2582 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2583 "originalTransform": "A String", # User name for the original user transform with which this transform is
2584 # most closely associated.
2585 "name": "A String", # Dataflow service generated name for this source.
2586 },
2587 ],
2588 "id": "A String", # Dataflow service generated id for this stage.
2589 },
2590 ],
2591 },
Takashi Matsuo06694102015-09-11 13:55:40 -07002592 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002593 { # Defines a particular step within a Cloud Dataflow job.
2594 #
2595 # A job consists of multiple steps, each of which performs some
2596 # specific operation as part of the overall job. Data is typically
2597 # passed from one step to another as part of the job.
2598 #
2599 # Here's an example of a sequence of steps which together implement a
2600 # Map-Reduce job:
2601 #
2602 # * Read a collection of data from some source, parsing the
2603 # collection's elements.
2604 #
2605 # * Validate the elements.
2606 #
2607 # * Apply a user-defined function to map each element to some value
2608 # and extract an element-specific key value.
2609 #
2610 # * Group elements with the same key into a single element with
2611 # that key, transforming a multiply-keyed collection into a
2612 # uniquely-keyed collection.
2613 #
2614 # * Write the elements out to some data sink.
2615 #
2616 # Note that the Cloud Dataflow service may be used to run many different
2617 # types of jobs, not just Map-Reduce.
2618 "kind": "A String", # The kind of step in the Cloud Dataflow job.
2619 "properties": { # Named properties associated with the step. Each kind of
2620 # predefined step has its own required set of properties.
2621 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Takashi Matsuo06694102015-09-11 13:55:40 -07002622 "a_key": "", # Properties of the object.
2623 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002624 "name": "A String", # The name that identifies the step. This must be unique for each
2625 # step with respect to all other steps in the Cloud Dataflow job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002626 },
2627 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002628 "currentState": "A String", # The current state of the job.
2629 #
2630 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2631 # specified.
2632 #
2633 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2634 # terminal state. After a job has reached a terminal state, no
2635 # further state updates may be made.
2636 #
2637 # This field may be mutated by the Cloud Dataflow service;
2638 # callers cannot mutate it.
2639 "tempFiles": [ # A set of files the system should be aware of that are used
2640 # for temporary storage. These temporary files will be
2641 # removed on job completion.
2642 # No duplicates are allowed.
2643 # No file patterns are supported.
2644 #
2645 # The supported files are:
2646 #
2647 # Google Cloud Storage:
2648 #
2649 # storage.googleapis.com/{bucket}/{object}
2650 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott36e41bc2016-02-19 16:02:29 -08002651 "A String",
2652 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002653 "type": "A String", # The type of Cloud Dataflow job.
2654 "id": "A String", # The unique ID of this job.
2655 #
2656 # This field is set by the Cloud Dataflow service when the Job is
2657 # created, and is immutable for the life of the job.
2658 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2659 # of the job it replaced.
2660 #
2661 # When sending a `CreateJobRequest`, you can update a job by specifying it
2662 # here. The job named here is stopped, and its intermediate state is
2663 # transferred to this job.
2664 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2665 # isn't contained in the submitted job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002666 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002667 "a_key": { # Contains information about how a particular
2668 # google.dataflow.v1beta3.Step will be executed.
2669 "stepName": [ # The steps associated with the execution stage.
2670 # Note that stages may have several steps, and that a given step
2671 # might be run by more than one stage.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002672 "A String",
2673 ],
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002674 },
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002675 },
2676 },
Takashi Matsuo06694102015-09-11 13:55:40 -07002677 }
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002678
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002679 location: string, The location that contains this job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002680 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002681 Allowed values
2682 1 - v1 error format
2683 2 - v2 error format
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002684
2685Returns:
2686 An object of the form:
2687
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002688 { # Defines a job to be run by the Cloud Dataflow service.
2689 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2690 # If this field is set, the service will ensure its uniqueness.
2691 # The request to create a job will fail if the service has knowledge of a
2692 # previously submitted job with the same client's ID and job name.
2693 # The caller may use this field to ensure idempotence of job
2694 # creation across retried attempts to create a job.
2695 # By default, the field is empty and, in that case, the service ignores it.
2696 "requestedState": "A String", # The job's requested state.
2697 #
2698 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2699 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
2700 # also be used to directly set a job's requested state to
2701 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2702 # job if it has not already reached a terminal state.
2703 "name": "A String", # The user-specified Cloud Dataflow job name.
2704 #
2705 # Only one Job with a given name may exist in a project at any
2706 # given time. If a caller attempts to create a Job with the same
2707 # name as an already-existing Job, the attempt returns the
2708 # existing Job.
2709 #
2710 # The name must match the regular expression
2711 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2712 "currentStateTime": "A String", # The timestamp associated with the current state.
2713 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2714 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2715 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2716 "labels": { # User-defined labels for this job.
2717 #
2718 # The labels map can contain no more than 64 entries. Entries of the labels
2719 # map are UTF8 strings that comply with the following restrictions:
2720 #
2721 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
2722 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2723 # * Both keys and values are additionally constrained to be <= 128 bytes in
2724 # size.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07002725 "a_key": "A String",
2726 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002727 "location": "A String", # The location that contains this job.
2728 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2729 # Cloud Dataflow service.
2730 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2731 # corresponding name prefixes of the new job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002732 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002733 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002734 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2735 "version": { # A structure describing which components and their versions of the service
2736 # are required in order to run the job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002737 "a_key": "", # Properties of the object.
2738 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002739 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2740 # storage. The system will append the suffix "/temp-{JOBNAME} to
2741 # this resource prefix, where {JOBNAME} is the value of the
2742 # job_name field. The resulting bucket and object prefix is used
2743 # as the prefix of the resources used to store temporary data
2744 # needed during the job execution. NOTE: This will override the
2745 # value in taskrunner_settings.
2746 # The supported resource type is:
2747 #
2748 # Google Cloud Storage:
2749 #
2750 # storage.googleapis.com/{bucket}/{object}
2751 # bucket.storage.googleapis.com/{object}
Takashi Matsuo06694102015-09-11 13:55:40 -07002752 "internalExperiments": { # Experimental settings.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07002753 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -07002754 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002755 "dataset": "A String", # The dataset for the current project where various workflow
2756 # related tables are stored.
2757 #
2758 # The supported resource type is:
2759 #
2760 # Google BigQuery:
2761 # bigquery.googleapis.com/{dataset}
Takashi Matsuo06694102015-09-11 13:55:40 -07002762 "experiments": [ # The list of experiments to enable.
2763 "A String",
2764 ],
Sai Cheemalapatiea3a5e12016-10-12 14:05:53 -07002765 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002766 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2767 # options are passed through the service and are used to recreate the
2768 # SDK pipeline options on the worker in a language agnostic and platform
2769 # independent way.
Takashi Matsuo06694102015-09-11 13:55:40 -07002770 "a_key": "", # Properties of the object.
2771 },
2772 "userAgent": { # A description of the process that generated the request.
2773 "a_key": "", # Properties of the object.
2774 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002775 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
2776 # unspecified, the service will attempt to choose a reasonable
2777 # default. This should be in the form of the API service name,
2778 # e.g. "compute.googleapis.com".
2779 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2780 # specified in order for the job to have workers.
2781 { # Describes one particular pool of Cloud Dataflow workers to be
2782 # instantiated by the Cloud Dataflow service in order to perform the
2783 # computations required by a job. Note that a workflow job may use
2784 # multiple pools, in order to match the various computational
2785 # requirements of the various stages of the job.
Takashi Matsuo06694102015-09-11 13:55:40 -07002786 "diskSourceImage": "A String", # Fully qualified source image for disks.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07002787 "ipConfiguration": "A String", # Configuration for VM IPs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002788 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2789 # are supported.
2790 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
2791 # service will attempt to choose a reasonable default.
2792 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
2793 # the service will use the network "default".
2794 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
2795 # will attempt to choose a reasonable default.
2796 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
2797 # attempt to choose a reasonable default.
Takashi Matsuo06694102015-09-11 13:55:40 -07002798 "metadata": { # Metadata to set on the Google Compute Engine VMs.
2799 "a_key": "A String",
Nathaniel Manista4f877e52015-06-15 16:44:50 +00002800 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002801 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2802 # Compute Engine API.
2803 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2804 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2805 # `TEARDOWN_NEVER`.
2806 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2807 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2808 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2809 # down.
2810 #
2811 # If the workers are not torn down by the service, they will
2812 # continue to run and use Google Compute Engine VM resources in the
2813 # user's project until they are explicitly terminated by the user.
2814 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2815 # policy except for small, manually supervised test jobs.
2816 #
2817 # If unknown or unspecified, the service will attempt to choose a reasonable
2818 # default.
2819 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2820 # service will choose a number of threads (according to the number of cores
2821 # on the selected machine type for batch, or 1 by convention for streaming).
2822 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
2823 # the form "regions/REGION/subnetworks/SUBNETWORK".
Takashi Matsuo06694102015-09-11 13:55:40 -07002824 "poolArgs": { # Extra arguments for this worker pool.
Jon Wayne Parrott7d5badb2016-08-16 12:44:29 -07002825 "a_key": "", # Properties of the object. Contains field @type with type URL.
Takashi Matsuo06694102015-09-11 13:55:40 -07002826 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002827 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2828 # execute the job. If zero or unspecified, the service will
2829 # attempt to choose a reasonable default.
2830 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2831 # using the standard Dataflow task runner. Users should ignore
2832 # this field.
2833 "workflowFileName": "A String", # The file to store the workflow in.
2834 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
2835 # will not be uploaded.
2836 #
2837 # The supported resource type is:
2838 #
2839 # Google Cloud Storage:
2840 # storage.googleapis.com/{bucket}/{object}
2841 # bucket.storage.googleapis.com/{object}
2842 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2843 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2844 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2845 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2846 "vmId": "A String", # The ID string of the VM.
2847 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2848 # taskrunner; e.g. "wheel".
2849 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2850 # taskrunner; e.g. "root".
2851 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2852 # access the Cloud Dataflow API.
2853 "A String",
2854 ],
2855 "languageHint": "A String", # The suggested backend language.
2856 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2857 # console.
2858 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2859 "logDir": "A String", # The directory on the VM to store logs.
2860 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2861 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2862 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2863 # "shuffle/v1beta1".
2864 "workerId": "A String", # The ID of the worker running this pipeline.
2865 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2866 #
2867 # When workers access Google Cloud APIs, they logically do so via
2868 # relative URLs. If this field is specified, it supplies the base
2869 # URL to use for resolving these relative URLs. The normative
2870 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2871 # Locators".
2872 #
2873 # If not specified, the default value is "http://www.googleapis.com/"
2874 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2875 # "dataflow/v1b3/projects".
2876 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2877 # storage.
2878 #
2879 # The supported resource type is:
2880 #
2881 # Google Cloud Storage:
2882 #
2883 # storage.googleapis.com/{bucket}/{object}
2884 # bucket.storage.googleapis.com/{object}
2885 },
2886 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2887 "harnessCommand": "A String", # The command to launch the worker harness.
2888 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2889 # temporary storage.
2890 #
2891 # The supported resource type is:
2892 #
2893 # Google Cloud Storage:
2894 # storage.googleapis.com/{bucket}/{object}
2895 # bucket.storage.googleapis.com/{object}
2896 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2897 #
2898 # When workers access Google Cloud APIs, they logically do so via
2899 # relative URLs. If this field is specified, it supplies the base
2900 # URL to use for resolving these relative URLs. The normative
2901 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2902 # Locators".
2903 #
2904 # If not specified, the default value is "http://www.googleapis.com/"
2905 },
2906 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
2907 # select a default set of packages which are useful to worker
2908 # harnesses written in a particular language.
Takashi Matsuo06694102015-09-11 13:55:40 -07002909 "packages": [ # Packages to be installed on workers.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002910 { # The packages that must be installed in order for a worker to run the
2911 # steps of the Cloud Dataflow job that will be assigned to its worker
2912 # pool.
2913 #
2914 # This is the mechanism by which the Cloud Dataflow SDK causes code to
2915 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2916 # might use this to install jars containing the user's code and all of the
2917 # various dependencies (libraries, data files, etc.) required in order
2918 # for that code to run.
2919 "location": "A String", # The resource to read the package from. The supported resource type is:
2920 #
2921 # Google Cloud Storage:
2922 #
2923 # storage.googleapis.com/{bucket}
2924 # bucket.storage.googleapis.com/
Takashi Matsuo06694102015-09-11 13:55:40 -07002925 "name": "A String", # The name of the package.
Takashi Matsuo06694102015-09-11 13:55:40 -07002926 },
2927 ],
2928 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
Takashi Matsuo06694102015-09-11 13:55:40 -07002929 "algorithm": "A String", # The algorithm to use for autoscaling.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002930 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
Takashi Matsuo06694102015-09-11 13:55:40 -07002931 },
2932 "dataDisks": [ # Data disks that are used by a VM in this workflow.
2933 { # Describes the data disk used by a workflow job.
2934 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002935 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
2936 # attempt to choose a reasonable default.
2937 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
2938 # must be a disk type appropriate to the project and zone in which
2939 # the workers will run. If unknown or unspecified, the service
2940 # will attempt to choose a reasonable default.
2941 #
2942 # For example, the standard persistent disk type is a resource name
2943 # typically ending in "pd-standard". If SSD persistent disks are
2944 # available, the resource name typically ends with "pd-ssd". The
2945 # actual valid values are defined the Google Compute Engine API,
2946 # not by the Cloud Dataflow API; consult the Google Compute Engine
2947 # documentation for more information about determining the set of
2948 # available disk types for a particular project and zone.
2949 #
2950 # Google Compute Engine Disk types are local to a particular
2951 # project in a particular zone, and so the resource name will
2952 # typically look something like this:
2953 #
2954 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Takashi Matsuo06694102015-09-11 13:55:40 -07002955 },
2956 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002957 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
2958 # attempt to choose a reasonable default.
2959 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2960 # harness, residing in Google Container Registry.
Takashi Matsuo06694102015-09-11 13:55:40 -07002961 },
2962 ],
2963 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002964 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2965 # A description of the user pipeline and stages through which it is executed.
2966 # Created by Cloud Dataflow service. Only retrieved with
2967 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2968 # form. This data is provided by the Dataflow service for ease of visualizing
2969 # the pipeline and interpretting Dataflow provided metrics.
2970 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2971 { # Description of the type, names/ids, and input/outputs for a transform.
2972 "kind": "A String", # Type of transform.
2973 "name": "A String", # User provided name for this transform instance.
2974 "inputCollectionName": [ # User names for all collection inputs to this transform.
2975 "A String",
2976 ],
2977 "displayData": [ # Transform-specific display data.
2978 { # Data provided with a pipeline or transform to provide descriptive info.
2979 "key": "A String", # The key identifying the display data.
2980 # This is intended to be used as a label for the display data
2981 # when viewed in a dax monitoring system.
2982 "shortStrValue": "A String", # A possible additional shorter value to display.
2983 # For example a java_class_name_value of com.mypackage.MyDoFn
2984 # will be stored with MyDoFn as the short_str_value and
2985 # com.mypackage.MyDoFn as the java_class_name value.
2986 # short_str_value can be displayed and java_class_name_value
2987 # will be displayed as a tooltip.
2988 "timestampValue": "A String", # Contains value if the data is of timestamp type.
2989 "url": "A String", # An optional full URL.
2990 "floatValue": 3.14, # Contains value if the data is of float type.
2991 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2992 # language namespace (i.e. python module) which defines the display data.
2993 # This allows a dax monitoring system to specially handle the data
2994 # and perform custom rendering.
2995 "javaClassValue": "A String", # Contains value if the data is of java class type.
2996 "label": "A String", # An optional label to display in a dax UI for the element.
2997 "boolValue": True or False, # Contains value if the data is of a boolean type.
2998 "strValue": "A String", # Contains value if the data is of string type.
2999 "durationValue": "A String", # Contains value if the data is of duration type.
3000 "int64Value": "A String", # Contains value if the data is of int64 type.
3001 },
3002 ],
3003 "outputCollectionName": [ # User names for all collection outputs to this transform.
3004 "A String",
3005 ],
3006 "id": "A String", # SDK generated id of this transform instance.
3007 },
3008 ],
3009 "displayData": [ # Pipeline level display data.
3010 { # Data provided with a pipeline or transform to provide descriptive info.
3011 "key": "A String", # The key identifying the display data.
3012 # This is intended to be used as a label for the display data
3013 # when viewed in a dax monitoring system.
3014 "shortStrValue": "A String", # A possible additional shorter value to display.
3015 # For example a java_class_name_value of com.mypackage.MyDoFn
3016 # will be stored with MyDoFn as the short_str_value and
3017 # com.mypackage.MyDoFn as the java_class_name value.
3018 # short_str_value can be displayed and java_class_name_value
3019 # will be displayed as a tooltip.
3020 "timestampValue": "A String", # Contains value if the data is of timestamp type.
3021 "url": "A String", # An optional full URL.
3022 "floatValue": 3.14, # Contains value if the data is of float type.
3023 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3024 # language namespace (i.e. python module) which defines the display data.
3025 # This allows a dax monitoring system to specially handle the data
3026 # and perform custom rendering.
3027 "javaClassValue": "A String", # Contains value if the data is of java class type.
3028 "label": "A String", # An optional label to display in a dax UI for the element.
3029 "boolValue": True or False, # Contains value if the data is of a boolean type.
3030 "strValue": "A String", # Contains value if the data is of string type.
3031 "durationValue": "A String", # Contains value if the data is of duration type.
3032 "int64Value": "A String", # Contains value if the data is of int64 type.
3033 },
3034 ],
3035 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
3036 { # Description of the composing transforms, names/ids, and input/outputs of a
3037 # stage of execution. Some composing transforms and sources may have been
3038 # generated by the Dataflow service during execution planning.
3039 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
3040 { # Description of an interstitial value between transforms in an execution
3041 # stage.
3042 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3043 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3044 # source is most closely associated.
3045 "name": "A String", # Dataflow service generated name for this source.
3046 },
3047 ],
3048 "kind": "A String", # Type of tranform this stage is executing.
3049 "name": "A String", # Dataflow service generated name for this stage.
3050 "outputSource": [ # Output sources for this stage.
3051 { # Description of an input or output of an execution stage.
3052 "userName": "A String", # Human-readable name for this source; may be user or system generated.
3053 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3054 # source is most closely associated.
3055 "name": "A String", # Dataflow service generated name for this source.
3056 "sizeBytes": "A String", # Size of the source, if measurable.
3057 },
3058 ],
3059 "inputSource": [ # Input sources for this stage.
3060 { # Description of an input or output of an execution stage.
3061 "userName": "A String", # Human-readable name for this source; may be user or system generated.
3062 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3063 # source is most closely associated.
3064 "name": "A String", # Dataflow service generated name for this source.
3065 "sizeBytes": "A String", # Size of the source, if measurable.
3066 },
3067 ],
3068 "componentTransform": [ # Transforms that comprise this execution stage.
3069 { # Description of a transform executed as part of an execution stage.
3070 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3071 "originalTransform": "A String", # User name for the original user transform with which this transform is
3072 # most closely associated.
3073 "name": "A String", # Dataflow service generated name for this source.
3074 },
3075 ],
3076 "id": "A String", # Dataflow service generated id for this stage.
3077 },
3078 ],
3079 },
Takashi Matsuo06694102015-09-11 13:55:40 -07003080 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003081 { # Defines a particular step within a Cloud Dataflow job.
3082 #
3083 # A job consists of multiple steps, each of which performs some
3084 # specific operation as part of the overall job. Data is typically
3085 # passed from one step to another as part of the job.
3086 #
3087 # Here's an example of a sequence of steps which together implement a
3088 # Map-Reduce job:
3089 #
3090 # * Read a collection of data from some source, parsing the
3091 # collection's elements.
3092 #
3093 # * Validate the elements.
3094 #
3095 # * Apply a user-defined function to map each element to some value
3096 # and extract an element-specific key value.
3097 #
3098 # * Group elements with the same key into a single element with
3099 # that key, transforming a multiply-keyed collection into a
3100 # uniquely-keyed collection.
3101 #
3102 # * Write the elements out to some data sink.
3103 #
3104 # Note that the Cloud Dataflow service may be used to run many different
3105 # types of jobs, not just Map-Reduce.
3106 "kind": "A String", # The kind of step in the Cloud Dataflow job.
3107 "properties": { # Named properties associated with the step. Each kind of
3108 # predefined step has its own required set of properties.
3109 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Takashi Matsuo06694102015-09-11 13:55:40 -07003110 "a_key": "", # Properties of the object.
3111 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003112 "name": "A String", # The name that identifies the step. This must be unique for each
3113 # step with respect to all other steps in the Cloud Dataflow job.
Takashi Matsuo06694102015-09-11 13:55:40 -07003114 },
3115 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003116 "currentState": "A String", # The current state of the job.
3117 #
3118 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
3119 # specified.
3120 #
3121 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
3122 # terminal state. After a job has reached a terminal state, no
3123 # further state updates may be made.
3124 #
3125 # This field may be mutated by the Cloud Dataflow service;
3126 # callers cannot mutate it.
3127 "tempFiles": [ # A set of files the system should be aware of that are used
3128 # for temporary storage. These temporary files will be
3129 # removed on job completion.
3130 # No duplicates are allowed.
3131 # No file patterns are supported.
3132 #
3133 # The supported files are:
3134 #
3135 # Google Cloud Storage:
3136 #
3137 # storage.googleapis.com/{bucket}/{object}
3138 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott36e41bc2016-02-19 16:02:29 -08003139 "A String",
3140 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003141 "type": "A String", # The type of Cloud Dataflow job.
3142 "id": "A String", # The unique ID of this job.
3143 #
3144 # This field is set by the Cloud Dataflow service when the Job is
3145 # created, and is immutable for the life of the job.
3146 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
3147 # of the job it replaced.
3148 #
3149 # When sending a `CreateJobRequest`, you can update a job by specifying it
3150 # here. The job named here is stopped, and its intermediate state is
3151 # transferred to this job.
3152 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3153 # isn't contained in the submitted job.
Takashi Matsuo06694102015-09-11 13:55:40 -07003154 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003155 "a_key": { # Contains information about how a particular
3156 # google.dataflow.v1beta3.Step will be executed.
3157 "stepName": [ # The steps associated with the execution stage.
3158 # Note that stages may have several steps, and that a given step
3159 # might be run by more than one stage.
Nathaniel Manista4f877e52015-06-15 16:44:50 +00003160 "A String",
3161 ],
Nathaniel Manista4f877e52015-06-15 16:44:50 +00003162 },
Nathaniel Manista4f877e52015-06-15 16:44:50 +00003163 },
3164 },
Takashi Matsuo06694102015-09-11 13:55:40 -07003165 }</pre>
Nathaniel Manista4f877e52015-06-15 16:44:50 +00003166</div>
3167
3168</body></html>