blob: e7ad6bb4f03a19427dd4532deb76c00c9c80f703 [file] [log] [blame]
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.jobs.html">jobs</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78 <code><a href="dataflow_v1b3.projects.locations.jobs.messages.html">messages()</a></code>
79</p>
80<p class="firstline">Returns the messages Resource.</p>
81
82<p class="toc_element">
83 <code><a href="dataflow_v1b3.projects.locations.jobs.workItems.html">workItems()</a></code>
84</p>
85<p class="firstline">Returns the workItems Resource.</p>
86
87<p class="toc_element">
88 <code><a href="#create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040089<p class="firstline">Creates a Cloud Dataflow job.</p>
Jon Wayne Parrott692617a2017-01-06 09:58:29 -080090<p class="toc_element">
91 <code><a href="#get">get(projectId, location, jobId, x__xgafv=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040092<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
Jon Wayne Parrott692617a2017-01-06 09:58:29 -080093<p class="toc_element">
94 <code><a href="#getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</a></code></p>
95<p class="firstline">Request the job status.</p>
96<p class="toc_element">
97 <code><a href="#list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -040098<p class="firstline">List the jobs of a project.</p>
Jon Wayne Parrott692617a2017-01-06 09:58:29 -080099<p class="toc_element">
100 <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
101<p class="firstline">Retrieves the next page of results.</p>
102<p class="toc_element">
103 <code><a href="#update">update(projectId, location, jobId, body, x__xgafv=None)</a></code></p>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400104<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800105<h3>Method Details</h3>
106<div class="method">
107 <code class="details" id="create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400108 <pre>Creates a Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800109
110Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400111 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
112 location: string, The location that contains this job. (required)
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800113 body: object, The request body. (required)
114 The object takes the form of:
115
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400116{ # Defines a job to be run by the Cloud Dataflow service.
117 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
118 # If this field is set, the service will ensure its uniqueness.
119 # The request to create a job will fail if the service has knowledge of a
120 # previously submitted job with the same client's ID and job name.
121 # The caller may use this field to ensure idempotence of job
122 # creation across retried attempts to create a job.
123 # By default, the field is empty and, in that case, the service ignores it.
124 "requestedState": "A String", # The job's requested state.
125 #
126 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
127 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
128 # also be used to directly set a job's requested state to
129 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
130 # job if it has not already reached a terminal state.
131 "name": "A String", # The user-specified Cloud Dataflow job name.
132 #
133 # Only one Job with a given name may exist in a project at any
134 # given time. If a caller attempts to create a Job with the same
135 # name as an already-existing Job, the attempt returns the
136 # existing Job.
137 #
138 # The name must match the regular expression
139 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
140 "currentStateTime": "A String", # The timestamp associated with the current state.
141 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
142 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
143 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
144 "labels": { # User-defined labels for this job.
145 #
146 # The labels map can contain no more than 64 entries. Entries of the labels
147 # map are UTF8 strings that comply with the following restrictions:
148 #
149 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
150 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
151 # * Both keys and values are additionally constrained to be <= 128 bytes in
152 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800153 "a_key": "A String",
154 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400155 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
156 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800157 "a_key": "A String",
158 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700159 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
160 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400161 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
162 "version": { # A structure describing which components and their versions of the service
163 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800164 "a_key": "", # Properties of the object.
165 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400166 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
167 # storage. The system will append the suffix "/temp-{JOBNAME} to
168 # this resource prefix, where {JOBNAME} is the value of the
169 # job_name field. The resulting bucket and object prefix is used
170 # as the prefix of the resources used to store temporary data
171 # needed during the job execution. NOTE: This will override the
172 # value in taskrunner_settings.
173 # The supported resource type is:
174 #
175 # Google Cloud Storage:
176 #
177 # storage.googleapis.com/{bucket}/{object}
178 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800179 "internalExperiments": { # Experimental settings.
180 "a_key": "", # Properties of the object. Contains field @type with type URL.
181 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400182 "dataset": "A String", # The dataset for the current project where various workflow
183 # related tables are stored.
184 #
185 # The supported resource type is:
186 #
187 # Google BigQuery:
188 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800189 "experiments": [ # The list of experiments to enable.
190 "A String",
191 ],
192 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400193 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
194 # options are passed through the service and are used to recreate the
195 # SDK pipeline options on the worker in a language agnostic and platform
196 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800197 "a_key": "", # Properties of the object.
198 },
199 "userAgent": { # A description of the process that generated the request.
200 "a_key": "", # Properties of the object.
201 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400202 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
203 # unspecified, the service will attempt to choose a reasonable
204 # default. This should be in the form of the API service name,
205 # e.g. "compute.googleapis.com".
206 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
207 # specified in order for the job to have workers.
208 { # Describes one particular pool of Cloud Dataflow workers to be
209 # instantiated by the Cloud Dataflow service in order to perform the
210 # computations required by a job. Note that a workflow job may use
211 # multiple pools, in order to match the various computational
212 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800213 "diskSourceImage": "A String", # Fully qualified source image for disks.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400214 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
215 # using the standard Dataflow task runner. Users should ignore
216 # this field.
217 "workflowFileName": "A String", # The file to store the workflow in.
218 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
219 # will not be uploaded.
220 #
221 # The supported resource type is:
222 #
223 # Google Cloud Storage:
224 # storage.googleapis.com/{bucket}/{object}
225 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400226 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
227 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700228 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
229 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
230 "vmId": "A String", # The ID string of the VM.
231 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
232 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400233 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
234 # access the Cloud Dataflow API.
235 "A String",
236 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400237 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
238 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
239 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
240 # "shuffle/v1beta1".
241 "workerId": "A String", # The ID of the worker running this pipeline.
242 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
243 #
244 # When workers access Google Cloud APIs, they logically do so via
245 # relative URLs. If this field is specified, it supplies the base
246 # URL to use for resolving these relative URLs. The normative
247 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
248 # Locators".
249 #
250 # If not specified, the default value is "http://www.googleapis.com/"
251 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
252 # "dataflow/v1b3/projects".
253 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
254 # storage.
255 #
256 # The supported resource type is:
257 #
258 # Google Cloud Storage:
259 #
260 # storage.googleapis.com/{bucket}/{object}
261 # bucket.storage.googleapis.com/{object}
262 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700263 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
264 # taskrunner; e.g. "wheel".
265 "languageHint": "A String", # The suggested backend language.
266 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
267 # console.
268 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
269 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400270 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400271 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
272 #
273 # When workers access Google Cloud APIs, they logically do so via
274 # relative URLs. If this field is specified, it supplies the base
275 # URL to use for resolving these relative URLs. The normative
276 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
277 # Locators".
278 #
279 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700280 "harnessCommand": "A String", # The command to launch the worker harness.
281 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
282 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400283 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700284 # The supported resource type is:
285 #
286 # Google Cloud Storage:
287 # storage.googleapis.com/{bucket}/{object}
288 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800289 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700290 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
291 # are supported.
292 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
293 # service will attempt to choose a reasonable default.
294 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
295 # the service will use the network "default".
296 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
297 # will attempt to choose a reasonable default.
298 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
299 # attempt to choose a reasonable default.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800300 "dataDisks": [ # Data disks that are used by a VM in this workflow.
301 { # Describes the data disk used by a workflow job.
302 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400303 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
304 # attempt to choose a reasonable default.
305 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
306 # must be a disk type appropriate to the project and zone in which
307 # the workers will run. If unknown or unspecified, the service
308 # will attempt to choose a reasonable default.
309 #
310 # For example, the standard persistent disk type is a resource name
311 # typically ending in "pd-standard". If SSD persistent disks are
312 # available, the resource name typically ends with "pd-ssd". The
313 # actual valid values are defined the Google Compute Engine API,
314 # not by the Cloud Dataflow API; consult the Google Compute Engine
315 # documentation for more information about determining the set of
316 # available disk types for a particular project and zone.
317 #
318 # Google Compute Engine Disk types are local to a particular
319 # project in a particular zone, and so the resource name will
320 # typically look something like this:
321 #
322 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800323 },
324 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700325 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
326 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
327 # `TEARDOWN_NEVER`.
328 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
329 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
330 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
331 # down.
332 #
333 # If the workers are not torn down by the service, they will
334 # continue to run and use Google Compute Engine VM resources in the
335 # user's project until they are explicitly terminated by the user.
336 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
337 # policy except for small, manually supervised test jobs.
338 #
339 # If unknown or unspecified, the service will attempt to choose a reasonable
340 # default.
341 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
342 # Compute Engine API.
343 "ipConfiguration": "A String", # Configuration for VM IPs.
344 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
345 # service will choose a number of threads (according to the number of cores
346 # on the selected machine type for batch, or 1 by convention for streaming).
347 "poolArgs": { # Extra arguments for this worker pool.
348 "a_key": "", # Properties of the object. Contains field @type with type URL.
349 },
350 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
351 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400352 # attempt to choose a reasonable default.
353 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
354 # harness, residing in Google Container Registry.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700355 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
356 # the form "regions/REGION/subnetworks/SUBNETWORK".
357 "packages": [ # Packages to be installed on workers.
358 { # The packages that must be installed in order for a worker to run the
359 # steps of the Cloud Dataflow job that will be assigned to its worker
360 # pool.
361 #
362 # This is the mechanism by which the Cloud Dataflow SDK causes code to
363 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
364 # might use this to install jars containing the user's code and all of the
365 # various dependencies (libraries, data files, etc.) required in order
366 # for that code to run.
367 "location": "A String", # The resource to read the package from. The supported resource type is:
368 #
369 # Google Cloud Storage:
370 #
371 # storage.googleapis.com/{bucket}
372 # bucket.storage.googleapis.com/
373 "name": "A String", # The name of the package.
374 },
375 ],
376 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
377 "algorithm": "A String", # The algorithm to use for autoscaling.
378 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
379 },
380 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
381 # select a default set of packages which are useful to worker
382 # harnesses written in a particular language.
383 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
384 # attempt to choose a reasonable default.
385 "metadata": { # Metadata to set on the Google Compute Engine VMs.
386 "a_key": "A String",
387 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800388 },
389 ],
390 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700391 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
392 # of the job it replaced.
393 #
394 # When sending a `CreateJobRequest`, you can update a job by specifying it
395 # here. The job named here is stopped, and its intermediate state is
396 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400397 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
398 # A description of the user pipeline and stages through which it is executed.
399 # Created by Cloud Dataflow service. Only retrieved with
400 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
401 # form. This data is provided by the Dataflow service for ease of visualizing
402 # the pipeline and interpretting Dataflow provided metrics.
403 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
404 { # Description of the type, names/ids, and input/outputs for a transform.
405 "kind": "A String", # Type of transform.
406 "name": "A String", # User provided name for this transform instance.
407 "inputCollectionName": [ # User names for all collection inputs to this transform.
408 "A String",
409 ],
410 "displayData": [ # Transform-specific display data.
411 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400412 "shortStrValue": "A String", # A possible additional shorter value to display.
413 # For example a java_class_name_value of com.mypackage.MyDoFn
414 # will be stored with MyDoFn as the short_str_value and
415 # com.mypackage.MyDoFn as the java_class_name value.
416 # short_str_value can be displayed and java_class_name_value
417 # will be displayed as a tooltip.
418 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700419 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400420 "url": "A String", # An optional full URL.
421 "floatValue": 3.14, # Contains value if the data is of float type.
422 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
423 # language namespace (i.e. python module) which defines the display data.
424 # This allows a dax monitoring system to specially handle the data
425 # and perform custom rendering.
426 "javaClassValue": "A String", # Contains value if the data is of java class type.
427 "label": "A String", # An optional label to display in a dax UI for the element.
428 "boolValue": True or False, # Contains value if the data is of a boolean type.
429 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700430 "key": "A String", # The key identifying the display data.
431 # This is intended to be used as a label for the display data
432 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400433 "int64Value": "A String", # Contains value if the data is of int64 type.
434 },
435 ],
436 "outputCollectionName": [ # User names for all collection outputs to this transform.
437 "A String",
438 ],
439 "id": "A String", # SDK generated id of this transform instance.
440 },
441 ],
442 "displayData": [ # Pipeline level display data.
443 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400444 "shortStrValue": "A String", # A possible additional shorter value to display.
445 # For example a java_class_name_value of com.mypackage.MyDoFn
446 # will be stored with MyDoFn as the short_str_value and
447 # com.mypackage.MyDoFn as the java_class_name value.
448 # short_str_value can be displayed and java_class_name_value
449 # will be displayed as a tooltip.
450 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700451 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400452 "url": "A String", # An optional full URL.
453 "floatValue": 3.14, # Contains value if the data is of float type.
454 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
455 # language namespace (i.e. python module) which defines the display data.
456 # This allows a dax monitoring system to specially handle the data
457 # and perform custom rendering.
458 "javaClassValue": "A String", # Contains value if the data is of java class type.
459 "label": "A String", # An optional label to display in a dax UI for the element.
460 "boolValue": True or False, # Contains value if the data is of a boolean type.
461 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700462 "key": "A String", # The key identifying the display data.
463 # This is intended to be used as a label for the display data
464 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400465 "int64Value": "A String", # Contains value if the data is of int64 type.
466 },
467 ],
468 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
469 { # Description of the composing transforms, names/ids, and input/outputs of a
470 # stage of execution. Some composing transforms and sources may have been
471 # generated by the Dataflow service during execution planning.
472 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
473 { # Description of an interstitial value between transforms in an execution
474 # stage.
475 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
476 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
477 # source is most closely associated.
478 "name": "A String", # Dataflow service generated name for this source.
479 },
480 ],
481 "kind": "A String", # Type of tranform this stage is executing.
482 "name": "A String", # Dataflow service generated name for this stage.
483 "outputSource": [ # Output sources for this stage.
484 { # Description of an input or output of an execution stage.
485 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700486 "sizeBytes": "A String", # Size of the source, if measurable.
487 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400488 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
489 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400490 },
491 ],
492 "inputSource": [ # Input sources for this stage.
493 { # Description of an input or output of an execution stage.
494 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700495 "sizeBytes": "A String", # Size of the source, if measurable.
496 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400497 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
498 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400499 },
500 ],
501 "componentTransform": [ # Transforms that comprise this execution stage.
502 { # Description of a transform executed as part of an execution stage.
503 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
504 "originalTransform": "A String", # User name for the original user transform with which this transform is
505 # most closely associated.
506 "name": "A String", # Dataflow service generated name for this source.
507 },
508 ],
509 "id": "A String", # Dataflow service generated id for this stage.
510 },
511 ],
512 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800513 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400514 { # Defines a particular step within a Cloud Dataflow job.
515 #
516 # A job consists of multiple steps, each of which performs some
517 # specific operation as part of the overall job. Data is typically
518 # passed from one step to another as part of the job.
519 #
520 # Here's an example of a sequence of steps which together implement a
521 # Map-Reduce job:
522 #
523 # * Read a collection of data from some source, parsing the
524 # collection's elements.
525 #
526 # * Validate the elements.
527 #
528 # * Apply a user-defined function to map each element to some value
529 # and extract an element-specific key value.
530 #
531 # * Group elements with the same key into a single element with
532 # that key, transforming a multiply-keyed collection into a
533 # uniquely-keyed collection.
534 #
535 # * Write the elements out to some data sink.
536 #
537 # Note that the Cloud Dataflow service may be used to run many different
538 # types of jobs, not just Map-Reduce.
539 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700540 "name": "A String", # The name that identifies the step. This must be unique for each
541 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400542 "properties": { # Named properties associated with the step. Each kind of
543 # predefined step has its own required set of properties.
544 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800545 "a_key": "", # Properties of the object.
546 },
547 },
548 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700549 "location": "A String", # The location that contains this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400550 "tempFiles": [ # A set of files the system should be aware of that are used
551 # for temporary storage. These temporary files will be
552 # removed on job completion.
553 # No duplicates are allowed.
554 # No file patterns are supported.
555 #
556 # The supported files are:
557 #
558 # Google Cloud Storage:
559 #
560 # storage.googleapis.com/{bucket}/{object}
561 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800562 "A String",
563 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400564 "type": "A String", # The type of Cloud Dataflow job.
565 "id": "A String", # The unique ID of this job.
566 #
567 # This field is set by the Cloud Dataflow service when the Job is
568 # created, and is immutable for the life of the job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700569 "currentState": "A String", # The current state of the job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400570 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700571 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
572 # specified.
573 #
574 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
575 # terminal state. After a job has reached a terminal state, no
576 # further state updates may be made.
577 #
578 # This field may be mutated by the Cloud Dataflow service;
579 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400580 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
581 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800582 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400583 "a_key": { # Contains information about how a particular
584 # google.dataflow.v1beta3.Step will be executed.
585 "stepName": [ # The steps associated with the execution stage.
586 # Note that stages may have several steps, and that a given step
587 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800588 "A String",
589 ],
590 },
591 },
592 },
593 }
594
595 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400596 Allowed values
597 1 - v1 error format
598 2 - v2 error format
599 replaceJobId: string, Deprecated. This field is now in the Job message.
600 view: string, The level of information requested in response.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800601
602Returns:
603 An object of the form:
604
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400605 { # Defines a job to be run by the Cloud Dataflow service.
606 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
607 # If this field is set, the service will ensure its uniqueness.
608 # The request to create a job will fail if the service has knowledge of a
609 # previously submitted job with the same client's ID and job name.
610 # The caller may use this field to ensure idempotence of job
611 # creation across retried attempts to create a job.
612 # By default, the field is empty and, in that case, the service ignores it.
613 "requestedState": "A String", # The job's requested state.
614 #
615 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
616 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
617 # also be used to directly set a job's requested state to
618 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
619 # job if it has not already reached a terminal state.
620 "name": "A String", # The user-specified Cloud Dataflow job name.
621 #
622 # Only one Job with a given name may exist in a project at any
623 # given time. If a caller attempts to create a Job with the same
624 # name as an already-existing Job, the attempt returns the
625 # existing Job.
626 #
627 # The name must match the regular expression
628 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
629 "currentStateTime": "A String", # The timestamp associated with the current state.
630 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
631 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
632 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
633 "labels": { # User-defined labels for this job.
634 #
635 # The labels map can contain no more than 64 entries. Entries of the labels
636 # map are UTF8 strings that comply with the following restrictions:
637 #
638 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
639 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
640 # * Both keys and values are additionally constrained to be <= 128 bytes in
641 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800642 "a_key": "A String",
643 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400644 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
645 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800646 "a_key": "A String",
647 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700648 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
649 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400650 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
651 "version": { # A structure describing which components and their versions of the service
652 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800653 "a_key": "", # Properties of the object.
654 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400655 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
656 # storage. The system will append the suffix "/temp-{JOBNAME} to
657 # this resource prefix, where {JOBNAME} is the value of the
658 # job_name field. The resulting bucket and object prefix is used
659 # as the prefix of the resources used to store temporary data
660 # needed during the job execution. NOTE: This will override the
661 # value in taskrunner_settings.
662 # The supported resource type is:
663 #
664 # Google Cloud Storage:
665 #
666 # storage.googleapis.com/{bucket}/{object}
667 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800668 "internalExperiments": { # Experimental settings.
669 "a_key": "", # Properties of the object. Contains field @type with type URL.
670 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400671 "dataset": "A String", # The dataset for the current project where various workflow
672 # related tables are stored.
673 #
674 # The supported resource type is:
675 #
676 # Google BigQuery:
677 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800678 "experiments": [ # The list of experiments to enable.
679 "A String",
680 ],
681 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400682 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
683 # options are passed through the service and are used to recreate the
684 # SDK pipeline options on the worker in a language agnostic and platform
685 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800686 "a_key": "", # Properties of the object.
687 },
688 "userAgent": { # A description of the process that generated the request.
689 "a_key": "", # Properties of the object.
690 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400691 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
692 # unspecified, the service will attempt to choose a reasonable
693 # default. This should be in the form of the API service name,
694 # e.g. "compute.googleapis.com".
695 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
696 # specified in order for the job to have workers.
697 { # Describes one particular pool of Cloud Dataflow workers to be
698 # instantiated by the Cloud Dataflow service in order to perform the
699 # computations required by a job. Note that a workflow job may use
700 # multiple pools, in order to match the various computational
701 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800702 "diskSourceImage": "A String", # Fully qualified source image for disks.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400703 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
704 # using the standard Dataflow task runner. Users should ignore
705 # this field.
706 "workflowFileName": "A String", # The file to store the workflow in.
707 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
708 # will not be uploaded.
709 #
710 # The supported resource type is:
711 #
712 # Google Cloud Storage:
713 # storage.googleapis.com/{bucket}/{object}
714 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400715 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
716 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700717 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
718 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
719 "vmId": "A String", # The ID string of the VM.
720 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
721 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400722 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
723 # access the Cloud Dataflow API.
724 "A String",
725 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400726 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
727 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
728 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
729 # "shuffle/v1beta1".
730 "workerId": "A String", # The ID of the worker running this pipeline.
731 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
732 #
733 # When workers access Google Cloud APIs, they logically do so via
734 # relative URLs. If this field is specified, it supplies the base
735 # URL to use for resolving these relative URLs. The normative
736 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
737 # Locators".
738 #
739 # If not specified, the default value is "http://www.googleapis.com/"
740 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
741 # "dataflow/v1b3/projects".
742 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
743 # storage.
744 #
745 # The supported resource type is:
746 #
747 # Google Cloud Storage:
748 #
749 # storage.googleapis.com/{bucket}/{object}
750 # bucket.storage.googleapis.com/{object}
751 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700752 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
753 # taskrunner; e.g. "wheel".
754 "languageHint": "A String", # The suggested backend language.
755 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
756 # console.
757 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
758 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400759 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400760 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
761 #
762 # When workers access Google Cloud APIs, they logically do so via
763 # relative URLs. If this field is specified, it supplies the base
764 # URL to use for resolving these relative URLs. The normative
765 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
766 # Locators".
767 #
768 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700769 "harnessCommand": "A String", # The command to launch the worker harness.
770 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
771 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400772 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700773 # The supported resource type is:
774 #
775 # Google Cloud Storage:
776 # storage.googleapis.com/{bucket}/{object}
777 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800778 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700779 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
780 # are supported.
781 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
782 # service will attempt to choose a reasonable default.
783 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
784 # the service will use the network "default".
785 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
786 # will attempt to choose a reasonable default.
787 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
788 # attempt to choose a reasonable default.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800789 "dataDisks": [ # Data disks that are used by a VM in this workflow.
790 { # Describes the data disk used by a workflow job.
791 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400792 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
793 # attempt to choose a reasonable default.
794 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
795 # must be a disk type appropriate to the project and zone in which
796 # the workers will run. If unknown or unspecified, the service
797 # will attempt to choose a reasonable default.
798 #
799 # For example, the standard persistent disk type is a resource name
800 # typically ending in "pd-standard". If SSD persistent disks are
801 # available, the resource name typically ends with "pd-ssd". The
802 # actual valid values are defined the Google Compute Engine API,
803 # not by the Cloud Dataflow API; consult the Google Compute Engine
804 # documentation for more information about determining the set of
805 # available disk types for a particular project and zone.
806 #
807 # Google Compute Engine Disk types are local to a particular
808 # project in a particular zone, and so the resource name will
809 # typically look something like this:
810 #
811 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800812 },
813 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700814 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
815 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
816 # `TEARDOWN_NEVER`.
817 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
818 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
819 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
820 # down.
821 #
822 # If the workers are not torn down by the service, they will
823 # continue to run and use Google Compute Engine VM resources in the
824 # user's project until they are explicitly terminated by the user.
825 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
826 # policy except for small, manually supervised test jobs.
827 #
828 # If unknown or unspecified, the service will attempt to choose a reasonable
829 # default.
830 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
831 # Compute Engine API.
832 "ipConfiguration": "A String", # Configuration for VM IPs.
833 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
834 # service will choose a number of threads (according to the number of cores
835 # on the selected machine type for batch, or 1 by convention for streaming).
836 "poolArgs": { # Extra arguments for this worker pool.
837 "a_key": "", # Properties of the object. Contains field @type with type URL.
838 },
839 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
840 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400841 # attempt to choose a reasonable default.
842 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
843 # harness, residing in Google Container Registry.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700844 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
845 # the form "regions/REGION/subnetworks/SUBNETWORK".
846 "packages": [ # Packages to be installed on workers.
847 { # The packages that must be installed in order for a worker to run the
848 # steps of the Cloud Dataflow job that will be assigned to its worker
849 # pool.
850 #
851 # This is the mechanism by which the Cloud Dataflow SDK causes code to
852 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
853 # might use this to install jars containing the user's code and all of the
854 # various dependencies (libraries, data files, etc.) required in order
855 # for that code to run.
856 "location": "A String", # The resource to read the package from. The supported resource type is:
857 #
858 # Google Cloud Storage:
859 #
860 # storage.googleapis.com/{bucket}
861 # bucket.storage.googleapis.com/
862 "name": "A String", # The name of the package.
863 },
864 ],
865 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
866 "algorithm": "A String", # The algorithm to use for autoscaling.
867 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
868 },
869 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
870 # select a default set of packages which are useful to worker
871 # harnesses written in a particular language.
872 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
873 # attempt to choose a reasonable default.
874 "metadata": { # Metadata to set on the Google Compute Engine VMs.
875 "a_key": "A String",
876 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -0800877 },
878 ],
879 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700880 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
881 # of the job it replaced.
882 #
883 # When sending a `CreateJobRequest`, you can update a job by specifying it
884 # here. The job named here is stopped, and its intermediate state is
885 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400886 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
887 # A description of the user pipeline and stages through which it is executed.
888 # Created by Cloud Dataflow service. Only retrieved with
889 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
890 # form. This data is provided by the Dataflow service for ease of visualizing
891 # the pipeline and interpretting Dataflow provided metrics.
892 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
893 { # Description of the type, names/ids, and input/outputs for a transform.
894 "kind": "A String", # Type of transform.
895 "name": "A String", # User provided name for this transform instance.
896 "inputCollectionName": [ # User names for all collection inputs to this transform.
897 "A String",
898 ],
899 "displayData": [ # Transform-specific display data.
900 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400901 "shortStrValue": "A String", # A possible additional shorter value to display.
902 # For example a java_class_name_value of com.mypackage.MyDoFn
903 # will be stored with MyDoFn as the short_str_value and
904 # com.mypackage.MyDoFn as the java_class_name value.
905 # short_str_value can be displayed and java_class_name_value
906 # will be displayed as a tooltip.
907 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700908 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400909 "url": "A String", # An optional full URL.
910 "floatValue": 3.14, # Contains value if the data is of float type.
911 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
912 # language namespace (i.e. python module) which defines the display data.
913 # This allows a dax monitoring system to specially handle the data
914 # and perform custom rendering.
915 "javaClassValue": "A String", # Contains value if the data is of java class type.
916 "label": "A String", # An optional label to display in a dax UI for the element.
917 "boolValue": True or False, # Contains value if the data is of a boolean type.
918 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700919 "key": "A String", # The key identifying the display data.
920 # This is intended to be used as a label for the display data
921 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400922 "int64Value": "A String", # Contains value if the data is of int64 type.
923 },
924 ],
925 "outputCollectionName": [ # User names for all collection outputs to this transform.
926 "A String",
927 ],
928 "id": "A String", # SDK generated id of this transform instance.
929 },
930 ],
931 "displayData": [ # Pipeline level display data.
932 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400933 "shortStrValue": "A String", # A possible additional shorter value to display.
934 # For example a java_class_name_value of com.mypackage.MyDoFn
935 # will be stored with MyDoFn as the short_str_value and
936 # com.mypackage.MyDoFn as the java_class_name value.
937 # short_str_value can be displayed and java_class_name_value
938 # will be displayed as a tooltip.
939 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700940 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400941 "url": "A String", # An optional full URL.
942 "floatValue": 3.14, # Contains value if the data is of float type.
943 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
944 # language namespace (i.e. python module) which defines the display data.
945 # This allows a dax monitoring system to specially handle the data
946 # and perform custom rendering.
947 "javaClassValue": "A String", # Contains value if the data is of java class type.
948 "label": "A String", # An optional label to display in a dax UI for the element.
949 "boolValue": True or False, # Contains value if the data is of a boolean type.
950 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700951 "key": "A String", # The key identifying the display data.
952 # This is intended to be used as a label for the display data
953 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400954 "int64Value": "A String", # Contains value if the data is of int64 type.
955 },
956 ],
957 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
958 { # Description of the composing transforms, names/ids, and input/outputs of a
959 # stage of execution. Some composing transforms and sources may have been
960 # generated by the Dataflow service during execution planning.
961 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
962 { # Description of an interstitial value between transforms in an execution
963 # stage.
964 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
965 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
966 # source is most closely associated.
967 "name": "A String", # Dataflow service generated name for this source.
968 },
969 ],
970 "kind": "A String", # Type of tranform this stage is executing.
971 "name": "A String", # Dataflow service generated name for this stage.
972 "outputSource": [ # Output sources for this stage.
973 { # Description of an input or output of an execution stage.
974 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700975 "sizeBytes": "A String", # Size of the source, if measurable.
976 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400977 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
978 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400979 },
980 ],
981 "inputSource": [ # Input sources for this stage.
982 { # Description of an input or output of an execution stage.
983 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -0700984 "sizeBytes": "A String", # Size of the source, if measurable.
985 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400986 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
987 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -0400988 },
989 ],
990 "componentTransform": [ # Transforms that comprise this execution stage.
991 { # Description of a transform executed as part of an execution stage.
992 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
993 "originalTransform": "A String", # User name for the original user transform with which this transform is
994 # most closely associated.
995 "name": "A String", # Dataflow service generated name for this source.
996 },
997 ],
998 "id": "A String", # Dataflow service generated id for this stage.
999 },
1000 ],
1001 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001002 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001003 { # Defines a particular step within a Cloud Dataflow job.
1004 #
1005 # A job consists of multiple steps, each of which performs some
1006 # specific operation as part of the overall job. Data is typically
1007 # passed from one step to another as part of the job.
1008 #
1009 # Here's an example of a sequence of steps which together implement a
1010 # Map-Reduce job:
1011 #
1012 # * Read a collection of data from some source, parsing the
1013 # collection's elements.
1014 #
1015 # * Validate the elements.
1016 #
1017 # * Apply a user-defined function to map each element to some value
1018 # and extract an element-specific key value.
1019 #
1020 # * Group elements with the same key into a single element with
1021 # that key, transforming a multiply-keyed collection into a
1022 # uniquely-keyed collection.
1023 #
1024 # * Write the elements out to some data sink.
1025 #
1026 # Note that the Cloud Dataflow service may be used to run many different
1027 # types of jobs, not just Map-Reduce.
1028 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001029 "name": "A String", # The name that identifies the step. This must be unique for each
1030 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001031 "properties": { # Named properties associated with the step. Each kind of
1032 # predefined step has its own required set of properties.
1033 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001034 "a_key": "", # Properties of the object.
1035 },
1036 },
1037 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001038 "location": "A String", # The location that contains this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001039 "tempFiles": [ # A set of files the system should be aware of that are used
1040 # for temporary storage. These temporary files will be
1041 # removed on job completion.
1042 # No duplicates are allowed.
1043 # No file patterns are supported.
1044 #
1045 # The supported files are:
1046 #
1047 # Google Cloud Storage:
1048 #
1049 # storage.googleapis.com/{bucket}/{object}
1050 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001051 "A String",
1052 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001053 "type": "A String", # The type of Cloud Dataflow job.
1054 "id": "A String", # The unique ID of this job.
1055 #
1056 # This field is set by the Cloud Dataflow service when the Job is
1057 # created, and is immutable for the life of the job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001058 "currentState": "A String", # The current state of the job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001059 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001060 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1061 # specified.
1062 #
1063 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1064 # terminal state. After a job has reached a terminal state, no
1065 # further state updates may be made.
1066 #
1067 # This field may be mutated by the Cloud Dataflow service;
1068 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001069 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1070 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001071 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001072 "a_key": { # Contains information about how a particular
1073 # google.dataflow.v1beta3.Step will be executed.
1074 "stepName": [ # The steps associated with the execution stage.
1075 # Note that stages may have several steps, and that a given step
1076 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001077 "A String",
1078 ],
1079 },
1080 },
1081 },
1082 }</pre>
1083</div>
1084
1085<div class="method">
1086 <code class="details" id="get">get(projectId, location, jobId, x__xgafv=None, view=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001087 <pre>Gets the state of the specified Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001088
1089Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001090 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
1091 location: string, The location that contains this job. (required)
1092 jobId: string, The job ID. (required)
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001093 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001094 Allowed values
1095 1 - v1 error format
1096 2 - v2 error format
1097 view: string, The level of information requested in response.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001098
1099Returns:
1100 An object of the form:
1101
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001102 { # Defines a job to be run by the Cloud Dataflow service.
1103 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1104 # If this field is set, the service will ensure its uniqueness.
1105 # The request to create a job will fail if the service has knowledge of a
1106 # previously submitted job with the same client's ID and job name.
1107 # The caller may use this field to ensure idempotence of job
1108 # creation across retried attempts to create a job.
1109 # By default, the field is empty and, in that case, the service ignores it.
1110 "requestedState": "A String", # The job's requested state.
1111 #
1112 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1113 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1114 # also be used to directly set a job's requested state to
1115 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1116 # job if it has not already reached a terminal state.
1117 "name": "A String", # The user-specified Cloud Dataflow job name.
1118 #
1119 # Only one Job with a given name may exist in a project at any
1120 # given time. If a caller attempts to create a Job with the same
1121 # name as an already-existing Job, the attempt returns the
1122 # existing Job.
1123 #
1124 # The name must match the regular expression
1125 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1126 "currentStateTime": "A String", # The timestamp associated with the current state.
1127 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1128 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1129 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1130 "labels": { # User-defined labels for this job.
1131 #
1132 # The labels map can contain no more than 64 entries. Entries of the labels
1133 # map are UTF8 strings that comply with the following restrictions:
1134 #
1135 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
1136 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1137 # * Both keys and values are additionally constrained to be <= 128 bytes in
1138 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001139 "a_key": "A String",
1140 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001141 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1142 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001143 "a_key": "A String",
1144 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001145 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1146 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001147 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1148 "version": { # A structure describing which components and their versions of the service
1149 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001150 "a_key": "", # Properties of the object.
1151 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001152 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1153 # storage. The system will append the suffix "/temp-{JOBNAME} to
1154 # this resource prefix, where {JOBNAME} is the value of the
1155 # job_name field. The resulting bucket and object prefix is used
1156 # as the prefix of the resources used to store temporary data
1157 # needed during the job execution. NOTE: This will override the
1158 # value in taskrunner_settings.
1159 # The supported resource type is:
1160 #
1161 # Google Cloud Storage:
1162 #
1163 # storage.googleapis.com/{bucket}/{object}
1164 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001165 "internalExperiments": { # Experimental settings.
1166 "a_key": "", # Properties of the object. Contains field @type with type URL.
1167 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001168 "dataset": "A String", # The dataset for the current project where various workflow
1169 # related tables are stored.
1170 #
1171 # The supported resource type is:
1172 #
1173 # Google BigQuery:
1174 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001175 "experiments": [ # The list of experiments to enable.
1176 "A String",
1177 ],
1178 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001179 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1180 # options are passed through the service and are used to recreate the
1181 # SDK pipeline options on the worker in a language agnostic and platform
1182 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001183 "a_key": "", # Properties of the object.
1184 },
1185 "userAgent": { # A description of the process that generated the request.
1186 "a_key": "", # Properties of the object.
1187 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001188 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1189 # unspecified, the service will attempt to choose a reasonable
1190 # default. This should be in the form of the API service name,
1191 # e.g. "compute.googleapis.com".
1192 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1193 # specified in order for the job to have workers.
1194 { # Describes one particular pool of Cloud Dataflow workers to be
1195 # instantiated by the Cloud Dataflow service in order to perform the
1196 # computations required by a job. Note that a workflow job may use
1197 # multiple pools, in order to match the various computational
1198 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001199 "diskSourceImage": "A String", # Fully qualified source image for disks.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001200 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1201 # using the standard Dataflow task runner. Users should ignore
1202 # this field.
1203 "workflowFileName": "A String", # The file to store the workflow in.
1204 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1205 # will not be uploaded.
1206 #
1207 # The supported resource type is:
1208 #
1209 # Google Cloud Storage:
1210 # storage.googleapis.com/{bucket}/{object}
1211 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001212 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1213 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001214 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1215 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1216 "vmId": "A String", # The ID string of the VM.
1217 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1218 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001219 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1220 # access the Cloud Dataflow API.
1221 "A String",
1222 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001223 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1224 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1225 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1226 # "shuffle/v1beta1".
1227 "workerId": "A String", # The ID of the worker running this pipeline.
1228 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1229 #
1230 # When workers access Google Cloud APIs, they logically do so via
1231 # relative URLs. If this field is specified, it supplies the base
1232 # URL to use for resolving these relative URLs. The normative
1233 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1234 # Locators".
1235 #
1236 # If not specified, the default value is "http://www.googleapis.com/"
1237 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1238 # "dataflow/v1b3/projects".
1239 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1240 # storage.
1241 #
1242 # The supported resource type is:
1243 #
1244 # Google Cloud Storage:
1245 #
1246 # storage.googleapis.com/{bucket}/{object}
1247 # bucket.storage.googleapis.com/{object}
1248 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001249 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1250 # taskrunner; e.g. "wheel".
1251 "languageHint": "A String", # The suggested backend language.
1252 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1253 # console.
1254 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1255 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001256 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001257 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1258 #
1259 # When workers access Google Cloud APIs, they logically do so via
1260 # relative URLs. If this field is specified, it supplies the base
1261 # URL to use for resolving these relative URLs. The normative
1262 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1263 # Locators".
1264 #
1265 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001266 "harnessCommand": "A String", # The command to launch the worker harness.
1267 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1268 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001269 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001270 # The supported resource type is:
1271 #
1272 # Google Cloud Storage:
1273 # storage.googleapis.com/{bucket}/{object}
1274 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001275 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001276 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1277 # are supported.
1278 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1279 # service will attempt to choose a reasonable default.
1280 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1281 # the service will use the network "default".
1282 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1283 # will attempt to choose a reasonable default.
1284 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1285 # attempt to choose a reasonable default.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001286 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1287 { # Describes the data disk used by a workflow job.
1288 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001289 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1290 # attempt to choose a reasonable default.
1291 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1292 # must be a disk type appropriate to the project and zone in which
1293 # the workers will run. If unknown or unspecified, the service
1294 # will attempt to choose a reasonable default.
1295 #
1296 # For example, the standard persistent disk type is a resource name
1297 # typically ending in "pd-standard". If SSD persistent disks are
1298 # available, the resource name typically ends with "pd-ssd". The
1299 # actual valid values are defined the Google Compute Engine API,
1300 # not by the Cloud Dataflow API; consult the Google Compute Engine
1301 # documentation for more information about determining the set of
1302 # available disk types for a particular project and zone.
1303 #
1304 # Google Compute Engine Disk types are local to a particular
1305 # project in a particular zone, and so the resource name will
1306 # typically look something like this:
1307 #
1308 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001309 },
1310 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001311 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1312 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1313 # `TEARDOWN_NEVER`.
1314 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1315 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1316 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1317 # down.
1318 #
1319 # If the workers are not torn down by the service, they will
1320 # continue to run and use Google Compute Engine VM resources in the
1321 # user's project until they are explicitly terminated by the user.
1322 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1323 # policy except for small, manually supervised test jobs.
1324 #
1325 # If unknown or unspecified, the service will attempt to choose a reasonable
1326 # default.
1327 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1328 # Compute Engine API.
1329 "ipConfiguration": "A String", # Configuration for VM IPs.
1330 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1331 # service will choose a number of threads (according to the number of cores
1332 # on the selected machine type for batch, or 1 by convention for streaming).
1333 "poolArgs": { # Extra arguments for this worker pool.
1334 "a_key": "", # Properties of the object. Contains field @type with type URL.
1335 },
1336 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1337 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001338 # attempt to choose a reasonable default.
1339 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1340 # harness, residing in Google Container Registry.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001341 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1342 # the form "regions/REGION/subnetworks/SUBNETWORK".
1343 "packages": [ # Packages to be installed on workers.
1344 { # The packages that must be installed in order for a worker to run the
1345 # steps of the Cloud Dataflow job that will be assigned to its worker
1346 # pool.
1347 #
1348 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1349 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1350 # might use this to install jars containing the user's code and all of the
1351 # various dependencies (libraries, data files, etc.) required in order
1352 # for that code to run.
1353 "location": "A String", # The resource to read the package from. The supported resource type is:
1354 #
1355 # Google Cloud Storage:
1356 #
1357 # storage.googleapis.com/{bucket}
1358 # bucket.storage.googleapis.com/
1359 "name": "A String", # The name of the package.
1360 },
1361 ],
1362 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1363 "algorithm": "A String", # The algorithm to use for autoscaling.
1364 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1365 },
1366 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1367 # select a default set of packages which are useful to worker
1368 # harnesses written in a particular language.
1369 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1370 # attempt to choose a reasonable default.
1371 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1372 "a_key": "A String",
1373 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001374 },
1375 ],
1376 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001377 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1378 # of the job it replaced.
1379 #
1380 # When sending a `CreateJobRequest`, you can update a job by specifying it
1381 # here. The job named here is stopped, and its intermediate state is
1382 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001383 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1384 # A description of the user pipeline and stages through which it is executed.
1385 # Created by Cloud Dataflow service. Only retrieved with
1386 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1387 # form. This data is provided by the Dataflow service for ease of visualizing
1388 # the pipeline and interpretting Dataflow provided metrics.
1389 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1390 { # Description of the type, names/ids, and input/outputs for a transform.
1391 "kind": "A String", # Type of transform.
1392 "name": "A String", # User provided name for this transform instance.
1393 "inputCollectionName": [ # User names for all collection inputs to this transform.
1394 "A String",
1395 ],
1396 "displayData": [ # Transform-specific display data.
1397 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001398 "shortStrValue": "A String", # A possible additional shorter value to display.
1399 # For example a java_class_name_value of com.mypackage.MyDoFn
1400 # will be stored with MyDoFn as the short_str_value and
1401 # com.mypackage.MyDoFn as the java_class_name value.
1402 # short_str_value can be displayed and java_class_name_value
1403 # will be displayed as a tooltip.
1404 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001405 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001406 "url": "A String", # An optional full URL.
1407 "floatValue": 3.14, # Contains value if the data is of float type.
1408 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1409 # language namespace (i.e. python module) which defines the display data.
1410 # This allows a dax monitoring system to specially handle the data
1411 # and perform custom rendering.
1412 "javaClassValue": "A String", # Contains value if the data is of java class type.
1413 "label": "A String", # An optional label to display in a dax UI for the element.
1414 "boolValue": True or False, # Contains value if the data is of a boolean type.
1415 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001416 "key": "A String", # The key identifying the display data.
1417 # This is intended to be used as a label for the display data
1418 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001419 "int64Value": "A String", # Contains value if the data is of int64 type.
1420 },
1421 ],
1422 "outputCollectionName": [ # User names for all collection outputs to this transform.
1423 "A String",
1424 ],
1425 "id": "A String", # SDK generated id of this transform instance.
1426 },
1427 ],
1428 "displayData": [ # Pipeline level display data.
1429 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001430 "shortStrValue": "A String", # A possible additional shorter value to display.
1431 # For example a java_class_name_value of com.mypackage.MyDoFn
1432 # will be stored with MyDoFn as the short_str_value and
1433 # com.mypackage.MyDoFn as the java_class_name value.
1434 # short_str_value can be displayed and java_class_name_value
1435 # will be displayed as a tooltip.
1436 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001437 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001438 "url": "A String", # An optional full URL.
1439 "floatValue": 3.14, # Contains value if the data is of float type.
1440 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1441 # language namespace (i.e. python module) which defines the display data.
1442 # This allows a dax monitoring system to specially handle the data
1443 # and perform custom rendering.
1444 "javaClassValue": "A String", # Contains value if the data is of java class type.
1445 "label": "A String", # An optional label to display in a dax UI for the element.
1446 "boolValue": True or False, # Contains value if the data is of a boolean type.
1447 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001448 "key": "A String", # The key identifying the display data.
1449 # This is intended to be used as a label for the display data
1450 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001451 "int64Value": "A String", # Contains value if the data is of int64 type.
1452 },
1453 ],
1454 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1455 { # Description of the composing transforms, names/ids, and input/outputs of a
1456 # stage of execution. Some composing transforms and sources may have been
1457 # generated by the Dataflow service during execution planning.
1458 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1459 { # Description of an interstitial value between transforms in an execution
1460 # stage.
1461 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1462 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1463 # source is most closely associated.
1464 "name": "A String", # Dataflow service generated name for this source.
1465 },
1466 ],
1467 "kind": "A String", # Type of tranform this stage is executing.
1468 "name": "A String", # Dataflow service generated name for this stage.
1469 "outputSource": [ # Output sources for this stage.
1470 { # Description of an input or output of an execution stage.
1471 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001472 "sizeBytes": "A String", # Size of the source, if measurable.
1473 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001474 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1475 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001476 },
1477 ],
1478 "inputSource": [ # Input sources for this stage.
1479 { # Description of an input or output of an execution stage.
1480 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001481 "sizeBytes": "A String", # Size of the source, if measurable.
1482 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001483 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1484 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001485 },
1486 ],
1487 "componentTransform": [ # Transforms that comprise this execution stage.
1488 { # Description of a transform executed as part of an execution stage.
1489 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1490 "originalTransform": "A String", # User name for the original user transform with which this transform is
1491 # most closely associated.
1492 "name": "A String", # Dataflow service generated name for this source.
1493 },
1494 ],
1495 "id": "A String", # Dataflow service generated id for this stage.
1496 },
1497 ],
1498 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001499 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001500 { # Defines a particular step within a Cloud Dataflow job.
1501 #
1502 # A job consists of multiple steps, each of which performs some
1503 # specific operation as part of the overall job. Data is typically
1504 # passed from one step to another as part of the job.
1505 #
1506 # Here's an example of a sequence of steps which together implement a
1507 # Map-Reduce job:
1508 #
1509 # * Read a collection of data from some source, parsing the
1510 # collection's elements.
1511 #
1512 # * Validate the elements.
1513 #
1514 # * Apply a user-defined function to map each element to some value
1515 # and extract an element-specific key value.
1516 #
1517 # * Group elements with the same key into a single element with
1518 # that key, transforming a multiply-keyed collection into a
1519 # uniquely-keyed collection.
1520 #
1521 # * Write the elements out to some data sink.
1522 #
1523 # Note that the Cloud Dataflow service may be used to run many different
1524 # types of jobs, not just Map-Reduce.
1525 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001526 "name": "A String", # The name that identifies the step. This must be unique for each
1527 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001528 "properties": { # Named properties associated with the step. Each kind of
1529 # predefined step has its own required set of properties.
1530 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001531 "a_key": "", # Properties of the object.
1532 },
1533 },
1534 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001535 "location": "A String", # The location that contains this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001536 "tempFiles": [ # A set of files the system should be aware of that are used
1537 # for temporary storage. These temporary files will be
1538 # removed on job completion.
1539 # No duplicates are allowed.
1540 # No file patterns are supported.
1541 #
1542 # The supported files are:
1543 #
1544 # Google Cloud Storage:
1545 #
1546 # storage.googleapis.com/{bucket}/{object}
1547 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001548 "A String",
1549 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001550 "type": "A String", # The type of Cloud Dataflow job.
1551 "id": "A String", # The unique ID of this job.
1552 #
1553 # This field is set by the Cloud Dataflow service when the Job is
1554 # created, and is immutable for the life of the job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001555 "currentState": "A String", # The current state of the job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001556 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001557 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1558 # specified.
1559 #
1560 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1561 # terminal state. After a job has reached a terminal state, no
1562 # further state updates may be made.
1563 #
1564 # This field may be mutated by the Cloud Dataflow service;
1565 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001566 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1567 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001568 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001569 "a_key": { # Contains information about how a particular
1570 # google.dataflow.v1beta3.Step will be executed.
1571 "stepName": [ # The steps associated with the execution stage.
1572 # Note that stages may have several steps, and that a given step
1573 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001574 "A String",
1575 ],
1576 },
1577 },
1578 },
1579 }</pre>
1580</div>
1581
1582<div class="method">
1583 <code class="details" id="getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</code>
1584 <pre>Request the job status.
1585
1586Args:
1587 projectId: string, A project id. (required)
1588 location: string, The location which contains the job specified by job_id. (required)
1589 jobId: string, The job to get messages for. (required)
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001590 startTime: string, Return only metric data that has changed since this time.
1591Default is to return all information about all metrics for the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001592 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001593 Allowed values
1594 1 - v1 error format
1595 2 - v2 error format
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001596
1597Returns:
1598 An object of the form:
1599
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001600 { # JobMetrics contains a collection of metrics descibing the detailed progress
1601 # of a Dataflow job. Metrics correspond to user-defined and system-defined
1602 # metrics in the job.
1603 #
1604 # This resource captures only the most recent values of each metric;
1605 # time-series data can be queried for them (under the same metric names)
1606 # from Cloud Monitoring.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001607 "metrics": [ # All metrics for this job.
1608 { # Describes the state of a metric.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001609 "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1610 # This holds the count of the aggregated values and is used in combination
1611 # with mean_sum above to obtain the actual mean aggregate value.
1612 # The only possible value type is Long.
1613 "kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are
1614 # "Sum", "Max", "Min", "Mean", "Set", "And", and "Or".
1615 # The specified aggregation kind is case-insensitive.
1616 #
1617 # If omitted, this is not an aggregated value but instead
1618 # a single metric sample value.
1619 "set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only
1620 # possible value type is a list of Values whose type can be Long, Double,
1621 # or String, according to the metric's type. All Values in the list must
1622 # be of the same type.
1623 "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
1624 # metric.
1625 "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
1626 # will be "dataflow" for metrics defined by the Dataflow service or SDK.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001627 "name": "A String", # Worker-defined metric name.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001628 "context": { # Zero or more labeled fields which identify the part of the job this
1629 # metric is associated with, such as the name of a step or collection.
1630 #
1631 # For example, built-in counters associated with steps will have
1632 # context['step'] = <step-name>. Counters associated with PCollections
1633 # in the SDK will have context['pcollection'] = <pcollection-name>.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001634 "a_key": "A String",
1635 },
1636 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001637 "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1638 # This holds the sum of the aggregated values and is used in combination
1639 # with mean_count below to obtain the actual mean aggregate value.
1640 # The only possible value types are Long and Double.
1641 "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
1642 # value accumulated since the worker started working on this WorkItem.
1643 # By default this is false, indicating that this metric is reported
1644 # as a delta that is not associated with any WorkItem.
1645 "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
1646 # reporting work progress; it will be filled in responses from the
1647 # metrics API.
1648 "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
1649 # "And", and "Or". The possible value types are Long, Double, and Boolean.
1650 "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
1651 # service.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001652 },
1653 ],
1654 "metricTime": "A String", # Timestamp as of which metric values are current.
1655 }</pre>
1656</div>
1657
1658<div class="method">
1659 <code class="details" id="list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001660 <pre>List the jobs of a project.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001661
1662Args:
1663 projectId: string, The project which owns the jobs. (required)
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001664 location: string, The location that contains this job. (required)
1665 pageSize: integer, If there are many jobs, limit response to at most this many.
1666The actual number of jobs returned will be the lesser of max_responses
1667and an unspecified server-defined limit.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001668 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001669 Allowed values
1670 1 - v1 error format
1671 2 - v2 error format
1672 pageToken: string, Set this to the 'next_page_token' field of a previous response
1673to request additional results in a long list.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001674 filter: string, The kind of filter to use.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001675 view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001676
1677Returns:
1678 An object of the form:
1679
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001680 { # Response to a request to list Cloud Dataflow jobs. This may be a partial
1681 # response, depending on the page size in the ListJobsRequest.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001682 "nextPageToken": "A String", # Set if there may be more results than fit in this response.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001683 "jobs": [ # A subset of the requested job information.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001684 { # Defines a job to be run by the Cloud Dataflow service.
1685 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1686 # If this field is set, the service will ensure its uniqueness.
1687 # The request to create a job will fail if the service has knowledge of a
1688 # previously submitted job with the same client's ID and job name.
1689 # The caller may use this field to ensure idempotence of job
1690 # creation across retried attempts to create a job.
1691 # By default, the field is empty and, in that case, the service ignores it.
1692 "requestedState": "A String", # The job's requested state.
1693 #
1694 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1695 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
1696 # also be used to directly set a job's requested state to
1697 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1698 # job if it has not already reached a terminal state.
1699 "name": "A String", # The user-specified Cloud Dataflow job name.
1700 #
1701 # Only one Job with a given name may exist in a project at any
1702 # given time. If a caller attempts to create a Job with the same
1703 # name as an already-existing Job, the attempt returns the
1704 # existing Job.
1705 #
1706 # The name must match the regular expression
1707 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1708 "currentStateTime": "A String", # The timestamp associated with the current state.
1709 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1710 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1711 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1712 "labels": { # User-defined labels for this job.
1713 #
1714 # The labels map can contain no more than 64 entries. Entries of the labels
1715 # map are UTF8 strings that comply with the following restrictions:
1716 #
1717 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
1718 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1719 # * Both keys and values are additionally constrained to be <= 128 bytes in
1720 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001721 "a_key": "A String",
1722 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001723 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1724 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001725 "a_key": "A String",
1726 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001727 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1728 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001729 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1730 "version": { # A structure describing which components and their versions of the service
1731 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001732 "a_key": "", # Properties of the object.
1733 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001734 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1735 # storage. The system will append the suffix "/temp-{JOBNAME} to
1736 # this resource prefix, where {JOBNAME} is the value of the
1737 # job_name field. The resulting bucket and object prefix is used
1738 # as the prefix of the resources used to store temporary data
1739 # needed during the job execution. NOTE: This will override the
1740 # value in taskrunner_settings.
1741 # The supported resource type is:
1742 #
1743 # Google Cloud Storage:
1744 #
1745 # storage.googleapis.com/{bucket}/{object}
1746 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001747 "internalExperiments": { # Experimental settings.
1748 "a_key": "", # Properties of the object. Contains field @type with type URL.
1749 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001750 "dataset": "A String", # The dataset for the current project where various workflow
1751 # related tables are stored.
1752 #
1753 # The supported resource type is:
1754 #
1755 # Google BigQuery:
1756 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001757 "experiments": [ # The list of experiments to enable.
1758 "A String",
1759 ],
1760 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001761 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1762 # options are passed through the service and are used to recreate the
1763 # SDK pipeline options on the worker in a language agnostic and platform
1764 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001765 "a_key": "", # Properties of the object.
1766 },
1767 "userAgent": { # A description of the process that generated the request.
1768 "a_key": "", # Properties of the object.
1769 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001770 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
1771 # unspecified, the service will attempt to choose a reasonable
1772 # default. This should be in the form of the API service name,
1773 # e.g. "compute.googleapis.com".
1774 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1775 # specified in order for the job to have workers.
1776 { # Describes one particular pool of Cloud Dataflow workers to be
1777 # instantiated by the Cloud Dataflow service in order to perform the
1778 # computations required by a job. Note that a workflow job may use
1779 # multiple pools, in order to match the various computational
1780 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001781 "diskSourceImage": "A String", # Fully qualified source image for disks.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001782 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1783 # using the standard Dataflow task runner. Users should ignore
1784 # this field.
1785 "workflowFileName": "A String", # The file to store the workflow in.
1786 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
1787 # will not be uploaded.
1788 #
1789 # The supported resource type is:
1790 #
1791 # Google Cloud Storage:
1792 # storage.googleapis.com/{bucket}/{object}
1793 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001794 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1795 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001796 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1797 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1798 "vmId": "A String", # The ID string of the VM.
1799 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1800 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001801 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1802 # access the Cloud Dataflow API.
1803 "A String",
1804 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001805 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1806 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1807 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1808 # "shuffle/v1beta1".
1809 "workerId": "A String", # The ID of the worker running this pipeline.
1810 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1811 #
1812 # When workers access Google Cloud APIs, they logically do so via
1813 # relative URLs. If this field is specified, it supplies the base
1814 # URL to use for resolving these relative URLs. The normative
1815 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1816 # Locators".
1817 #
1818 # If not specified, the default value is "http://www.googleapis.com/"
1819 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1820 # "dataflow/v1b3/projects".
1821 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1822 # storage.
1823 #
1824 # The supported resource type is:
1825 #
1826 # Google Cloud Storage:
1827 #
1828 # storage.googleapis.com/{bucket}/{object}
1829 # bucket.storage.googleapis.com/{object}
1830 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001831 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1832 # taskrunner; e.g. "wheel".
1833 "languageHint": "A String", # The suggested backend language.
1834 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1835 # console.
1836 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1837 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001838 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001839 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1840 #
1841 # When workers access Google Cloud APIs, they logically do so via
1842 # relative URLs. If this field is specified, it supplies the base
1843 # URL to use for resolving these relative URLs. The normative
1844 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1845 # Locators".
1846 #
1847 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001848 "harnessCommand": "A String", # The command to launch the worker harness.
1849 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1850 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001851 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001852 # The supported resource type is:
1853 #
1854 # Google Cloud Storage:
1855 # storage.googleapis.com/{bucket}/{object}
1856 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001857 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001858 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1859 # are supported.
1860 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
1861 # service will attempt to choose a reasonable default.
1862 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
1863 # the service will use the network "default".
1864 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
1865 # will attempt to choose a reasonable default.
1866 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
1867 # attempt to choose a reasonable default.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001868 "dataDisks": [ # Data disks that are used by a VM in this workflow.
1869 { # Describes the data disk used by a workflow job.
1870 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001871 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
1872 # attempt to choose a reasonable default.
1873 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
1874 # must be a disk type appropriate to the project and zone in which
1875 # the workers will run. If unknown or unspecified, the service
1876 # will attempt to choose a reasonable default.
1877 #
1878 # For example, the standard persistent disk type is a resource name
1879 # typically ending in "pd-standard". If SSD persistent disks are
1880 # available, the resource name typically ends with "pd-ssd". The
1881 # actual valid values are defined the Google Compute Engine API,
1882 # not by the Cloud Dataflow API; consult the Google Compute Engine
1883 # documentation for more information about determining the set of
1884 # available disk types for a particular project and zone.
1885 #
1886 # Google Compute Engine Disk types are local to a particular
1887 # project in a particular zone, and so the resource name will
1888 # typically look something like this:
1889 #
1890 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001891 },
1892 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001893 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1894 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1895 # `TEARDOWN_NEVER`.
1896 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1897 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1898 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1899 # down.
1900 #
1901 # If the workers are not torn down by the service, they will
1902 # continue to run and use Google Compute Engine VM resources in the
1903 # user's project until they are explicitly terminated by the user.
1904 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1905 # policy except for small, manually supervised test jobs.
1906 #
1907 # If unknown or unspecified, the service will attempt to choose a reasonable
1908 # default.
1909 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1910 # Compute Engine API.
1911 "ipConfiguration": "A String", # Configuration for VM IPs.
1912 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1913 # service will choose a number of threads (according to the number of cores
1914 # on the selected machine type for batch, or 1 by convention for streaming).
1915 "poolArgs": { # Extra arguments for this worker pool.
1916 "a_key": "", # Properties of the object. Contains field @type with type URL.
1917 },
1918 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1919 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001920 # attempt to choose a reasonable default.
1921 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1922 # harness, residing in Google Container Registry.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001923 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
1924 # the form "regions/REGION/subnetworks/SUBNETWORK".
1925 "packages": [ # Packages to be installed on workers.
1926 { # The packages that must be installed in order for a worker to run the
1927 # steps of the Cloud Dataflow job that will be assigned to its worker
1928 # pool.
1929 #
1930 # This is the mechanism by which the Cloud Dataflow SDK causes code to
1931 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1932 # might use this to install jars containing the user's code and all of the
1933 # various dependencies (libraries, data files, etc.) required in order
1934 # for that code to run.
1935 "location": "A String", # The resource to read the package from. The supported resource type is:
1936 #
1937 # Google Cloud Storage:
1938 #
1939 # storage.googleapis.com/{bucket}
1940 # bucket.storage.googleapis.com/
1941 "name": "A String", # The name of the package.
1942 },
1943 ],
1944 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1945 "algorithm": "A String", # The algorithm to use for autoscaling.
1946 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1947 },
1948 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
1949 # select a default set of packages which are useful to worker
1950 # harnesses written in a particular language.
1951 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
1952 # attempt to choose a reasonable default.
1953 "metadata": { # Metadata to set on the Google Compute Engine VMs.
1954 "a_key": "A String",
1955 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08001956 },
1957 ],
1958 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001959 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1960 # of the job it replaced.
1961 #
1962 # When sending a `CreateJobRequest`, you can update a job by specifying it
1963 # here. The job named here is stopped, and its intermediate state is
1964 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001965 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1966 # A description of the user pipeline and stages through which it is executed.
1967 # Created by Cloud Dataflow service. Only retrieved with
1968 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1969 # form. This data is provided by the Dataflow service for ease of visualizing
1970 # the pipeline and interpretting Dataflow provided metrics.
1971 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1972 { # Description of the type, names/ids, and input/outputs for a transform.
1973 "kind": "A String", # Type of transform.
1974 "name": "A String", # User provided name for this transform instance.
1975 "inputCollectionName": [ # User names for all collection inputs to this transform.
1976 "A String",
1977 ],
1978 "displayData": [ # Transform-specific display data.
1979 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001980 "shortStrValue": "A String", # A possible additional shorter value to display.
1981 # For example a java_class_name_value of com.mypackage.MyDoFn
1982 # will be stored with MyDoFn as the short_str_value and
1983 # com.mypackage.MyDoFn as the java_class_name value.
1984 # short_str_value can be displayed and java_class_name_value
1985 # will be displayed as a tooltip.
1986 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001987 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04001988 "url": "A String", # An optional full URL.
1989 "floatValue": 3.14, # Contains value if the data is of float type.
1990 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1991 # language namespace (i.e. python module) which defines the display data.
1992 # This allows a dax monitoring system to specially handle the data
1993 # and perform custom rendering.
1994 "javaClassValue": "A String", # Contains value if the data is of java class type.
1995 "label": "A String", # An optional label to display in a dax UI for the element.
1996 "boolValue": True or False, # Contains value if the data is of a boolean type.
1997 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07001998 "key": "A String", # The key identifying the display data.
1999 # This is intended to be used as a label for the display data
2000 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002001 "int64Value": "A String", # Contains value if the data is of int64 type.
2002 },
2003 ],
2004 "outputCollectionName": [ # User names for all collection outputs to this transform.
2005 "A String",
2006 ],
2007 "id": "A String", # SDK generated id of this transform instance.
2008 },
2009 ],
2010 "displayData": [ # Pipeline level display data.
2011 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002012 "shortStrValue": "A String", # A possible additional shorter value to display.
2013 # For example a java_class_name_value of com.mypackage.MyDoFn
2014 # will be stored with MyDoFn as the short_str_value and
2015 # com.mypackage.MyDoFn as the java_class_name value.
2016 # short_str_value can be displayed and java_class_name_value
2017 # will be displayed as a tooltip.
2018 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002019 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002020 "url": "A String", # An optional full URL.
2021 "floatValue": 3.14, # Contains value if the data is of float type.
2022 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2023 # language namespace (i.e. python module) which defines the display data.
2024 # This allows a dax monitoring system to specially handle the data
2025 # and perform custom rendering.
2026 "javaClassValue": "A String", # Contains value if the data is of java class type.
2027 "label": "A String", # An optional label to display in a dax UI for the element.
2028 "boolValue": True or False, # Contains value if the data is of a boolean type.
2029 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002030 "key": "A String", # The key identifying the display data.
2031 # This is intended to be used as a label for the display data
2032 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002033 "int64Value": "A String", # Contains value if the data is of int64 type.
2034 },
2035 ],
2036 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2037 { # Description of the composing transforms, names/ids, and input/outputs of a
2038 # stage of execution. Some composing transforms and sources may have been
2039 # generated by the Dataflow service during execution planning.
2040 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2041 { # Description of an interstitial value between transforms in an execution
2042 # stage.
2043 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2044 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2045 # source is most closely associated.
2046 "name": "A String", # Dataflow service generated name for this source.
2047 },
2048 ],
2049 "kind": "A String", # Type of tranform this stage is executing.
2050 "name": "A String", # Dataflow service generated name for this stage.
2051 "outputSource": [ # Output sources for this stage.
2052 { # Description of an input or output of an execution stage.
2053 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002054 "sizeBytes": "A String", # Size of the source, if measurable.
2055 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002056 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2057 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002058 },
2059 ],
2060 "inputSource": [ # Input sources for this stage.
2061 { # Description of an input or output of an execution stage.
2062 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002063 "sizeBytes": "A String", # Size of the source, if measurable.
2064 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002065 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2066 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002067 },
2068 ],
2069 "componentTransform": [ # Transforms that comprise this execution stage.
2070 { # Description of a transform executed as part of an execution stage.
2071 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2072 "originalTransform": "A String", # User name for the original user transform with which this transform is
2073 # most closely associated.
2074 "name": "A String", # Dataflow service generated name for this source.
2075 },
2076 ],
2077 "id": "A String", # Dataflow service generated id for this stage.
2078 },
2079 ],
2080 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002081 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002082 { # Defines a particular step within a Cloud Dataflow job.
2083 #
2084 # A job consists of multiple steps, each of which performs some
2085 # specific operation as part of the overall job. Data is typically
2086 # passed from one step to another as part of the job.
2087 #
2088 # Here's an example of a sequence of steps which together implement a
2089 # Map-Reduce job:
2090 #
2091 # * Read a collection of data from some source, parsing the
2092 # collection's elements.
2093 #
2094 # * Validate the elements.
2095 #
2096 # * Apply a user-defined function to map each element to some value
2097 # and extract an element-specific key value.
2098 #
2099 # * Group elements with the same key into a single element with
2100 # that key, transforming a multiply-keyed collection into a
2101 # uniquely-keyed collection.
2102 #
2103 # * Write the elements out to some data sink.
2104 #
2105 # Note that the Cloud Dataflow service may be used to run many different
2106 # types of jobs, not just Map-Reduce.
2107 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002108 "name": "A String", # The name that identifies the step. This must be unique for each
2109 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002110 "properties": { # Named properties associated with the step. Each kind of
2111 # predefined step has its own required set of properties.
2112 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002113 "a_key": "", # Properties of the object.
2114 },
2115 },
2116 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002117 "location": "A String", # The location that contains this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002118 "tempFiles": [ # A set of files the system should be aware of that are used
2119 # for temporary storage. These temporary files will be
2120 # removed on job completion.
2121 # No duplicates are allowed.
2122 # No file patterns are supported.
2123 #
2124 # The supported files are:
2125 #
2126 # Google Cloud Storage:
2127 #
2128 # storage.googleapis.com/{bucket}/{object}
2129 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002130 "A String",
2131 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002132 "type": "A String", # The type of Cloud Dataflow job.
2133 "id": "A String", # The unique ID of this job.
2134 #
2135 # This field is set by the Cloud Dataflow service when the Job is
2136 # created, and is immutable for the life of the job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002137 "currentState": "A String", # The current state of the job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002138 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002139 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2140 # specified.
2141 #
2142 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2143 # terminal state. After a job has reached a terminal state, no
2144 # further state updates may be made.
2145 #
2146 # This field may be mutated by the Cloud Dataflow service;
2147 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002148 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2149 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002150 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002151 "a_key": { # Contains information about how a particular
2152 # google.dataflow.v1beta3.Step will be executed.
2153 "stepName": [ # The steps associated with the execution stage.
2154 # Note that stages may have several steps, and that a given step
2155 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002156 "A String",
2157 ],
2158 },
2159 },
2160 },
2161 },
2162 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002163 "failedLocation": [ # Zero or more messages describing locations that failed to respond.
2164 { # Indicates which location failed to respond to a request for data.
2165 "name": "A String", # The name of the failed location.
2166 },
2167 ],
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002168 }</pre>
2169</div>
2170
2171<div class="method">
2172 <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
2173 <pre>Retrieves the next page of results.
2174
2175Args:
2176 previous_request: The request for the previous page. (required)
2177 previous_response: The response from the request for the previous page. (required)
2178
2179Returns:
2180 A request object that you can call 'execute()' on to request the next
2181 page. Returns None if there are no more items in the collection.
2182 </pre>
2183</div>
2184
2185<div class="method">
2186 <code class="details" id="update">update(projectId, location, jobId, body, x__xgafv=None)</code>
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002187 <pre>Updates the state of an existing Cloud Dataflow job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002188
2189Args:
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002190 projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
2191 location: string, The location that contains this job. (required)
2192 jobId: string, The job ID. (required)
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002193 body: object, The request body. (required)
2194 The object takes the form of:
2195
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002196{ # Defines a job to be run by the Cloud Dataflow service.
2197 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2198 # If this field is set, the service will ensure its uniqueness.
2199 # The request to create a job will fail if the service has knowledge of a
2200 # previously submitted job with the same client's ID and job name.
2201 # The caller may use this field to ensure idempotence of job
2202 # creation across retried attempts to create a job.
2203 # By default, the field is empty and, in that case, the service ignores it.
2204 "requestedState": "A String", # The job's requested state.
2205 #
2206 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2207 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
2208 # also be used to directly set a job's requested state to
2209 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2210 # job if it has not already reached a terminal state.
2211 "name": "A String", # The user-specified Cloud Dataflow job name.
2212 #
2213 # Only one Job with a given name may exist in a project at any
2214 # given time. If a caller attempts to create a Job with the same
2215 # name as an already-existing Job, the attempt returns the
2216 # existing Job.
2217 #
2218 # The name must match the regular expression
2219 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2220 "currentStateTime": "A String", # The timestamp associated with the current state.
2221 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2222 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2223 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2224 "labels": { # User-defined labels for this job.
2225 #
2226 # The labels map can contain no more than 64 entries. Entries of the labels
2227 # map are UTF8 strings that comply with the following restrictions:
2228 #
2229 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
2230 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2231 # * Both keys and values are additionally constrained to be <= 128 bytes in
2232 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002233 "a_key": "A String",
2234 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002235 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2236 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002237 "a_key": "A String",
2238 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002239 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2240 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002241 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2242 "version": { # A structure describing which components and their versions of the service
2243 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002244 "a_key": "", # Properties of the object.
2245 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002246 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2247 # storage. The system will append the suffix "/temp-{JOBNAME} to
2248 # this resource prefix, where {JOBNAME} is the value of the
2249 # job_name field. The resulting bucket and object prefix is used
2250 # as the prefix of the resources used to store temporary data
2251 # needed during the job execution. NOTE: This will override the
2252 # value in taskrunner_settings.
2253 # The supported resource type is:
2254 #
2255 # Google Cloud Storage:
2256 #
2257 # storage.googleapis.com/{bucket}/{object}
2258 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002259 "internalExperiments": { # Experimental settings.
2260 "a_key": "", # Properties of the object. Contains field @type with type URL.
2261 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002262 "dataset": "A String", # The dataset for the current project where various workflow
2263 # related tables are stored.
2264 #
2265 # The supported resource type is:
2266 #
2267 # Google BigQuery:
2268 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002269 "experiments": [ # The list of experiments to enable.
2270 "A String",
2271 ],
2272 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002273 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2274 # options are passed through the service and are used to recreate the
2275 # SDK pipeline options on the worker in a language agnostic and platform
2276 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002277 "a_key": "", # Properties of the object.
2278 },
2279 "userAgent": { # A description of the process that generated the request.
2280 "a_key": "", # Properties of the object.
2281 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002282 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
2283 # unspecified, the service will attempt to choose a reasonable
2284 # default. This should be in the form of the API service name,
2285 # e.g. "compute.googleapis.com".
2286 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2287 # specified in order for the job to have workers.
2288 { # Describes one particular pool of Cloud Dataflow workers to be
2289 # instantiated by the Cloud Dataflow service in order to perform the
2290 # computations required by a job. Note that a workflow job may use
2291 # multiple pools, in order to match the various computational
2292 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002293 "diskSourceImage": "A String", # Fully qualified source image for disks.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002294 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2295 # using the standard Dataflow task runner. Users should ignore
2296 # this field.
2297 "workflowFileName": "A String", # The file to store the workflow in.
2298 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
2299 # will not be uploaded.
2300 #
2301 # The supported resource type is:
2302 #
2303 # Google Cloud Storage:
2304 # storage.googleapis.com/{bucket}/{object}
2305 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002306 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2307 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002308 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2309 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2310 "vmId": "A String", # The ID string of the VM.
2311 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2312 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002313 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2314 # access the Cloud Dataflow API.
2315 "A String",
2316 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002317 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2318 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2319 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2320 # "shuffle/v1beta1".
2321 "workerId": "A String", # The ID of the worker running this pipeline.
2322 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2323 #
2324 # When workers access Google Cloud APIs, they logically do so via
2325 # relative URLs. If this field is specified, it supplies the base
2326 # URL to use for resolving these relative URLs. The normative
2327 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2328 # Locators".
2329 #
2330 # If not specified, the default value is "http://www.googleapis.com/"
2331 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2332 # "dataflow/v1b3/projects".
2333 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2334 # storage.
2335 #
2336 # The supported resource type is:
2337 #
2338 # Google Cloud Storage:
2339 #
2340 # storage.googleapis.com/{bucket}/{object}
2341 # bucket.storage.googleapis.com/{object}
2342 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002343 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2344 # taskrunner; e.g. "wheel".
2345 "languageHint": "A String", # The suggested backend language.
2346 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2347 # console.
2348 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2349 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002350 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002351 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2352 #
2353 # When workers access Google Cloud APIs, they logically do so via
2354 # relative URLs. If this field is specified, it supplies the base
2355 # URL to use for resolving these relative URLs. The normative
2356 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2357 # Locators".
2358 #
2359 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002360 "harnessCommand": "A String", # The command to launch the worker harness.
2361 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2362 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002363 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002364 # The supported resource type is:
2365 #
2366 # Google Cloud Storage:
2367 # storage.googleapis.com/{bucket}/{object}
2368 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002369 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002370 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2371 # are supported.
2372 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
2373 # service will attempt to choose a reasonable default.
2374 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
2375 # the service will use the network "default".
2376 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
2377 # will attempt to choose a reasonable default.
2378 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
2379 # attempt to choose a reasonable default.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002380 "dataDisks": [ # Data disks that are used by a VM in this workflow.
2381 { # Describes the data disk used by a workflow job.
2382 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002383 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
2384 # attempt to choose a reasonable default.
2385 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
2386 # must be a disk type appropriate to the project and zone in which
2387 # the workers will run. If unknown or unspecified, the service
2388 # will attempt to choose a reasonable default.
2389 #
2390 # For example, the standard persistent disk type is a resource name
2391 # typically ending in "pd-standard". If SSD persistent disks are
2392 # available, the resource name typically ends with "pd-ssd". The
2393 # actual valid values are defined the Google Compute Engine API,
2394 # not by the Cloud Dataflow API; consult the Google Compute Engine
2395 # documentation for more information about determining the set of
2396 # available disk types for a particular project and zone.
2397 #
2398 # Google Compute Engine Disk types are local to a particular
2399 # project in a particular zone, and so the resource name will
2400 # typically look something like this:
2401 #
2402 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002403 },
2404 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002405 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2406 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2407 # `TEARDOWN_NEVER`.
2408 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2409 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2410 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2411 # down.
2412 #
2413 # If the workers are not torn down by the service, they will
2414 # continue to run and use Google Compute Engine VM resources in the
2415 # user's project until they are explicitly terminated by the user.
2416 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2417 # policy except for small, manually supervised test jobs.
2418 #
2419 # If unknown or unspecified, the service will attempt to choose a reasonable
2420 # default.
2421 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2422 # Compute Engine API.
2423 "ipConfiguration": "A String", # Configuration for VM IPs.
2424 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2425 # service will choose a number of threads (according to the number of cores
2426 # on the selected machine type for batch, or 1 by convention for streaming).
2427 "poolArgs": { # Extra arguments for this worker pool.
2428 "a_key": "", # Properties of the object. Contains field @type with type URL.
2429 },
2430 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2431 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002432 # attempt to choose a reasonable default.
2433 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2434 # harness, residing in Google Container Registry.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002435 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
2436 # the form "regions/REGION/subnetworks/SUBNETWORK".
2437 "packages": [ # Packages to be installed on workers.
2438 { # The packages that must be installed in order for a worker to run the
2439 # steps of the Cloud Dataflow job that will be assigned to its worker
2440 # pool.
2441 #
2442 # This is the mechanism by which the Cloud Dataflow SDK causes code to
2443 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2444 # might use this to install jars containing the user's code and all of the
2445 # various dependencies (libraries, data files, etc.) required in order
2446 # for that code to run.
2447 "location": "A String", # The resource to read the package from. The supported resource type is:
2448 #
2449 # Google Cloud Storage:
2450 #
2451 # storage.googleapis.com/{bucket}
2452 # bucket.storage.googleapis.com/
2453 "name": "A String", # The name of the package.
2454 },
2455 ],
2456 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2457 "algorithm": "A String", # The algorithm to use for autoscaling.
2458 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2459 },
2460 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
2461 # select a default set of packages which are useful to worker
2462 # harnesses written in a particular language.
2463 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
2464 # attempt to choose a reasonable default.
2465 "metadata": { # Metadata to set on the Google Compute Engine VMs.
2466 "a_key": "A String",
2467 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002468 },
2469 ],
2470 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002471 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2472 # of the job it replaced.
2473 #
2474 # When sending a `CreateJobRequest`, you can update a job by specifying it
2475 # here. The job named here is stopped, and its intermediate state is
2476 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002477 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2478 # A description of the user pipeline and stages through which it is executed.
2479 # Created by Cloud Dataflow service. Only retrieved with
2480 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2481 # form. This data is provided by the Dataflow service for ease of visualizing
2482 # the pipeline and interpretting Dataflow provided metrics.
2483 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2484 { # Description of the type, names/ids, and input/outputs for a transform.
2485 "kind": "A String", # Type of transform.
2486 "name": "A String", # User provided name for this transform instance.
2487 "inputCollectionName": [ # User names for all collection inputs to this transform.
2488 "A String",
2489 ],
2490 "displayData": [ # Transform-specific display data.
2491 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002492 "shortStrValue": "A String", # A possible additional shorter value to display.
2493 # For example a java_class_name_value of com.mypackage.MyDoFn
2494 # will be stored with MyDoFn as the short_str_value and
2495 # com.mypackage.MyDoFn as the java_class_name value.
2496 # short_str_value can be displayed and java_class_name_value
2497 # will be displayed as a tooltip.
2498 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002499 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002500 "url": "A String", # An optional full URL.
2501 "floatValue": 3.14, # Contains value if the data is of float type.
2502 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2503 # language namespace (i.e. python module) which defines the display data.
2504 # This allows a dax monitoring system to specially handle the data
2505 # and perform custom rendering.
2506 "javaClassValue": "A String", # Contains value if the data is of java class type.
2507 "label": "A String", # An optional label to display in a dax UI for the element.
2508 "boolValue": True or False, # Contains value if the data is of a boolean type.
2509 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002510 "key": "A String", # The key identifying the display data.
2511 # This is intended to be used as a label for the display data
2512 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002513 "int64Value": "A String", # Contains value if the data is of int64 type.
2514 },
2515 ],
2516 "outputCollectionName": [ # User names for all collection outputs to this transform.
2517 "A String",
2518 ],
2519 "id": "A String", # SDK generated id of this transform instance.
2520 },
2521 ],
2522 "displayData": [ # Pipeline level display data.
2523 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002524 "shortStrValue": "A String", # A possible additional shorter value to display.
2525 # For example a java_class_name_value of com.mypackage.MyDoFn
2526 # will be stored with MyDoFn as the short_str_value and
2527 # com.mypackage.MyDoFn as the java_class_name value.
2528 # short_str_value can be displayed and java_class_name_value
2529 # will be displayed as a tooltip.
2530 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002531 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002532 "url": "A String", # An optional full URL.
2533 "floatValue": 3.14, # Contains value if the data is of float type.
2534 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2535 # language namespace (i.e. python module) which defines the display data.
2536 # This allows a dax monitoring system to specially handle the data
2537 # and perform custom rendering.
2538 "javaClassValue": "A String", # Contains value if the data is of java class type.
2539 "label": "A String", # An optional label to display in a dax UI for the element.
2540 "boolValue": True or False, # Contains value if the data is of a boolean type.
2541 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002542 "key": "A String", # The key identifying the display data.
2543 # This is intended to be used as a label for the display data
2544 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002545 "int64Value": "A String", # Contains value if the data is of int64 type.
2546 },
2547 ],
2548 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2549 { # Description of the composing transforms, names/ids, and input/outputs of a
2550 # stage of execution. Some composing transforms and sources may have been
2551 # generated by the Dataflow service during execution planning.
2552 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2553 { # Description of an interstitial value between transforms in an execution
2554 # stage.
2555 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2556 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2557 # source is most closely associated.
2558 "name": "A String", # Dataflow service generated name for this source.
2559 },
2560 ],
2561 "kind": "A String", # Type of tranform this stage is executing.
2562 "name": "A String", # Dataflow service generated name for this stage.
2563 "outputSource": [ # Output sources for this stage.
2564 { # Description of an input or output of an execution stage.
2565 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002566 "sizeBytes": "A String", # Size of the source, if measurable.
2567 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002568 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2569 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002570 },
2571 ],
2572 "inputSource": [ # Input sources for this stage.
2573 { # Description of an input or output of an execution stage.
2574 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002575 "sizeBytes": "A String", # Size of the source, if measurable.
2576 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002577 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2578 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002579 },
2580 ],
2581 "componentTransform": [ # Transforms that comprise this execution stage.
2582 { # Description of a transform executed as part of an execution stage.
2583 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2584 "originalTransform": "A String", # User name for the original user transform with which this transform is
2585 # most closely associated.
2586 "name": "A String", # Dataflow service generated name for this source.
2587 },
2588 ],
2589 "id": "A String", # Dataflow service generated id for this stage.
2590 },
2591 ],
2592 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002593 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002594 { # Defines a particular step within a Cloud Dataflow job.
2595 #
2596 # A job consists of multiple steps, each of which performs some
2597 # specific operation as part of the overall job. Data is typically
2598 # passed from one step to another as part of the job.
2599 #
2600 # Here's an example of a sequence of steps which together implement a
2601 # Map-Reduce job:
2602 #
2603 # * Read a collection of data from some source, parsing the
2604 # collection's elements.
2605 #
2606 # * Validate the elements.
2607 #
2608 # * Apply a user-defined function to map each element to some value
2609 # and extract an element-specific key value.
2610 #
2611 # * Group elements with the same key into a single element with
2612 # that key, transforming a multiply-keyed collection into a
2613 # uniquely-keyed collection.
2614 #
2615 # * Write the elements out to some data sink.
2616 #
2617 # Note that the Cloud Dataflow service may be used to run many different
2618 # types of jobs, not just Map-Reduce.
2619 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002620 "name": "A String", # The name that identifies the step. This must be unique for each
2621 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002622 "properties": { # Named properties associated with the step. Each kind of
2623 # predefined step has its own required set of properties.
2624 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002625 "a_key": "", # Properties of the object.
2626 },
2627 },
2628 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002629 "location": "A String", # The location that contains this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002630 "tempFiles": [ # A set of files the system should be aware of that are used
2631 # for temporary storage. These temporary files will be
2632 # removed on job completion.
2633 # No duplicates are allowed.
2634 # No file patterns are supported.
2635 #
2636 # The supported files are:
2637 #
2638 # Google Cloud Storage:
2639 #
2640 # storage.googleapis.com/{bucket}/{object}
2641 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002642 "A String",
2643 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002644 "type": "A String", # The type of Cloud Dataflow job.
2645 "id": "A String", # The unique ID of this job.
2646 #
2647 # This field is set by the Cloud Dataflow service when the Job is
2648 # created, and is immutable for the life of the job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002649 "currentState": "A String", # The current state of the job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002650 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002651 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2652 # specified.
2653 #
2654 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2655 # terminal state. After a job has reached a terminal state, no
2656 # further state updates may be made.
2657 #
2658 # This field may be mutated by the Cloud Dataflow service;
2659 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002660 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2661 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002662 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002663 "a_key": { # Contains information about how a particular
2664 # google.dataflow.v1beta3.Step will be executed.
2665 "stepName": [ # The steps associated with the execution stage.
2666 # Note that stages may have several steps, and that a given step
2667 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002668 "A String",
2669 ],
2670 },
2671 },
2672 },
2673 }
2674
2675 x__xgafv: string, V1 error format.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002676 Allowed values
2677 1 - v1 error format
2678 2 - v2 error format
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002679
2680Returns:
2681 An object of the form:
2682
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002683 { # Defines a job to be run by the Cloud Dataflow service.
2684 "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2685 # If this field is set, the service will ensure its uniqueness.
2686 # The request to create a job will fail if the service has knowledge of a
2687 # previously submitted job with the same client's ID and job name.
2688 # The caller may use this field to ensure idempotence of job
2689 # creation across retried attempts to create a job.
2690 # By default, the field is empty and, in that case, the service ignores it.
2691 "requestedState": "A String", # The job's requested state.
2692 #
2693 # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2694 # `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
2695 # also be used to directly set a job's requested state to
2696 # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2697 # job if it has not already reached a terminal state.
2698 "name": "A String", # The user-specified Cloud Dataflow job name.
2699 #
2700 # Only one Job with a given name may exist in a project at any
2701 # given time. If a caller attempts to create a Job with the same
2702 # name as an already-existing Job, the attempt returns the
2703 # existing Job.
2704 #
2705 # The name must match the regular expression
2706 # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2707 "currentStateTime": "A String", # The timestamp associated with the current state.
2708 "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2709 # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2710 "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2711 "labels": { # User-defined labels for this job.
2712 #
2713 # The labels map can contain no more than 64 entries. Entries of the labels
2714 # map are UTF8 strings that comply with the following restrictions:
2715 #
2716 # * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}
2717 # * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2718 # * Both keys and values are additionally constrained to be <= 128 bytes in
2719 # size.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002720 "a_key": "A String",
2721 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002722 "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2723 # corresponding name prefixes of the new job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002724 "a_key": "A String",
2725 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002726 "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2727 # Cloud Dataflow service.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002728 "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2729 "version": { # A structure describing which components and their versions of the service
2730 # are required in order to run the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002731 "a_key": "", # Properties of the object.
2732 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002733 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2734 # storage. The system will append the suffix "/temp-{JOBNAME} to
2735 # this resource prefix, where {JOBNAME} is the value of the
2736 # job_name field. The resulting bucket and object prefix is used
2737 # as the prefix of the resources used to store temporary data
2738 # needed during the job execution. NOTE: This will override the
2739 # value in taskrunner_settings.
2740 # The supported resource type is:
2741 #
2742 # Google Cloud Storage:
2743 #
2744 # storage.googleapis.com/{bucket}/{object}
2745 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002746 "internalExperiments": { # Experimental settings.
2747 "a_key": "", # Properties of the object. Contains field @type with type URL.
2748 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002749 "dataset": "A String", # The dataset for the current project where various workflow
2750 # related tables are stored.
2751 #
2752 # The supported resource type is:
2753 #
2754 # Google BigQuery:
2755 # bigquery.googleapis.com/{dataset}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002756 "experiments": [ # The list of experiments to enable.
2757 "A String",
2758 ],
2759 "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002760 "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2761 # options are passed through the service and are used to recreate the
2762 # SDK pipeline options on the worker in a language agnostic and platform
2763 # independent way.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002764 "a_key": "", # Properties of the object.
2765 },
2766 "userAgent": { # A description of the process that generated the request.
2767 "a_key": "", # Properties of the object.
2768 },
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002769 "clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or
2770 # unspecified, the service will attempt to choose a reasonable
2771 # default. This should be in the form of the API service name,
2772 # e.g. "compute.googleapis.com".
2773 "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2774 # specified in order for the job to have workers.
2775 { # Describes one particular pool of Cloud Dataflow workers to be
2776 # instantiated by the Cloud Dataflow service in order to perform the
2777 # computations required by a job. Note that a workflow job may use
2778 # multiple pools, in order to match the various computational
2779 # requirements of the various stages of the job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002780 "diskSourceImage": "A String", # Fully qualified source image for disks.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002781 "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2782 # using the standard Dataflow task runner. Users should ignore
2783 # this field.
2784 "workflowFileName": "A String", # The file to store the workflow in.
2785 "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
2786 # will not be uploaded.
2787 #
2788 # The supported resource type is:
2789 #
2790 # Google Cloud Storage:
2791 # storage.googleapis.com/{bucket}/{object}
2792 # bucket.storage.googleapis.com/{object}
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002793 "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2794 # taskrunner; e.g. "root".
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002795 "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2796 "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2797 "vmId": "A String", # The ID string of the VM.
2798 "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2799 "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002800 "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2801 # access the Cloud Dataflow API.
2802 "A String",
2803 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002804 "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2805 "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2806 "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2807 # "shuffle/v1beta1".
2808 "workerId": "A String", # The ID of the worker running this pipeline.
2809 "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2810 #
2811 # When workers access Google Cloud APIs, they logically do so via
2812 # relative URLs. If this field is specified, it supplies the base
2813 # URL to use for resolving these relative URLs. The normative
2814 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2815 # Locators".
2816 #
2817 # If not specified, the default value is "http://www.googleapis.com/"
2818 "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2819 # "dataflow/v1b3/projects".
2820 "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2821 # storage.
2822 #
2823 # The supported resource type is:
2824 #
2825 # Google Cloud Storage:
2826 #
2827 # storage.googleapis.com/{bucket}/{object}
2828 # bucket.storage.googleapis.com/{object}
2829 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002830 "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2831 # taskrunner; e.g. "wheel".
2832 "languageHint": "A String", # The suggested backend language.
2833 "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2834 # console.
2835 "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2836 "logDir": "A String", # The directory on the VM to store logs.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002837 "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002838 "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2839 #
2840 # When workers access Google Cloud APIs, they logically do so via
2841 # relative URLs. If this field is specified, it supplies the base
2842 # URL to use for resolving these relative URLs. The normative
2843 # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2844 # Locators".
2845 #
2846 # If not specified, the default value is "http://www.googleapis.com/"
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002847 "harnessCommand": "A String", # The command to launch the worker harness.
2848 "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2849 # temporary storage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002850 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002851 # The supported resource type is:
2852 #
2853 # Google Cloud Storage:
2854 # storage.googleapis.com/{bucket}/{object}
2855 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002856 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002857 "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2858 # are supported.
2859 "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
2860 # service will attempt to choose a reasonable default.
2861 "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
2862 # the service will use the network "default".
2863 "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
2864 # will attempt to choose a reasonable default.
2865 "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
2866 # attempt to choose a reasonable default.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002867 "dataDisks": [ # Data disks that are used by a VM in this workflow.
2868 { # Describes the data disk used by a workflow job.
2869 "mountPoint": "A String", # Directory in a VM where disk is mounted.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002870 "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
2871 # attempt to choose a reasonable default.
2872 "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
2873 # must be a disk type appropriate to the project and zone in which
2874 # the workers will run. If unknown or unspecified, the service
2875 # will attempt to choose a reasonable default.
2876 #
2877 # For example, the standard persistent disk type is a resource name
2878 # typically ending in "pd-standard". If SSD persistent disks are
2879 # available, the resource name typically ends with "pd-ssd". The
2880 # actual valid values are defined the Google Compute Engine API,
2881 # not by the Cloud Dataflow API; consult the Google Compute Engine
2882 # documentation for more information about determining the set of
2883 # available disk types for a particular project and zone.
2884 #
2885 # Google Compute Engine Disk types are local to a particular
2886 # project in a particular zone, and so the resource name will
2887 # typically look something like this:
2888 #
2889 # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002890 },
2891 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002892 "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2893 # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2894 # `TEARDOWN_NEVER`.
2895 # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2896 # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2897 # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2898 # down.
2899 #
2900 # If the workers are not torn down by the service, they will
2901 # continue to run and use Google Compute Engine VM resources in the
2902 # user's project until they are explicitly terminated by the user.
2903 # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2904 # policy except for small, manually supervised test jobs.
2905 #
2906 # If unknown or unspecified, the service will attempt to choose a reasonable
2907 # default.
2908 "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2909 # Compute Engine API.
2910 "ipConfiguration": "A String", # Configuration for VM IPs.
2911 "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2912 # service will choose a number of threads (according to the number of cores
2913 # on the selected machine type for batch, or 1 by convention for streaming).
2914 "poolArgs": { # Extra arguments for this worker pool.
2915 "a_key": "", # Properties of the object. Contains field @type with type URL.
2916 },
2917 "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2918 # execute the job. If zero or unspecified, the service will
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002919 # attempt to choose a reasonable default.
2920 "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2921 # harness, residing in Google Container Registry.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002922 "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
2923 # the form "regions/REGION/subnetworks/SUBNETWORK".
2924 "packages": [ # Packages to be installed on workers.
2925 { # The packages that must be installed in order for a worker to run the
2926 # steps of the Cloud Dataflow job that will be assigned to its worker
2927 # pool.
2928 #
2929 # This is the mechanism by which the Cloud Dataflow SDK causes code to
2930 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2931 # might use this to install jars containing the user's code and all of the
2932 # various dependencies (libraries, data files, etc.) required in order
2933 # for that code to run.
2934 "location": "A String", # The resource to read the package from. The supported resource type is:
2935 #
2936 # Google Cloud Storage:
2937 #
2938 # storage.googleapis.com/{bucket}
2939 # bucket.storage.googleapis.com/
2940 "name": "A String", # The name of the package.
2941 },
2942 ],
2943 "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2944 "algorithm": "A String", # The algorithm to use for autoscaling.
2945 "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2946 },
2947 "defaultPackageSet": "A String", # The default package set to install. This allows the service to
2948 # select a default set of packages which are useful to worker
2949 # harnesses written in a particular language.
2950 "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
2951 # attempt to choose a reasonable default.
2952 "metadata": { # Metadata to set on the Google Compute Engine VMs.
2953 "a_key": "A String",
2954 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08002955 },
2956 ],
2957 },
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002958 "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2959 # of the job it replaced.
2960 #
2961 # When sending a `CreateJobRequest`, you can update a job by specifying it
2962 # here. The job named here is stopped, and its intermediate state is
2963 # transferred to this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002964 "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2965 # A description of the user pipeline and stages through which it is executed.
2966 # Created by Cloud Dataflow service. Only retrieved with
2967 # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2968 # form. This data is provided by the Dataflow service for ease of visualizing
2969 # the pipeline and interpretting Dataflow provided metrics.
2970 "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2971 { # Description of the type, names/ids, and input/outputs for a transform.
2972 "kind": "A String", # Type of transform.
2973 "name": "A String", # User provided name for this transform instance.
2974 "inputCollectionName": [ # User names for all collection inputs to this transform.
2975 "A String",
2976 ],
2977 "displayData": [ # Transform-specific display data.
2978 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002979 "shortStrValue": "A String", # A possible additional shorter value to display.
2980 # For example a java_class_name_value of com.mypackage.MyDoFn
2981 # will be stored with MyDoFn as the short_str_value and
2982 # com.mypackage.MyDoFn as the java_class_name value.
2983 # short_str_value can be displayed and java_class_name_value
2984 # will be displayed as a tooltip.
2985 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002986 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04002987 "url": "A String", # An optional full URL.
2988 "floatValue": 3.14, # Contains value if the data is of float type.
2989 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2990 # language namespace (i.e. python module) which defines the display data.
2991 # This allows a dax monitoring system to specially handle the data
2992 # and perform custom rendering.
2993 "javaClassValue": "A String", # Contains value if the data is of java class type.
2994 "label": "A String", # An optional label to display in a dax UI for the element.
2995 "boolValue": True or False, # Contains value if the data is of a boolean type.
2996 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07002997 "key": "A String", # The key identifying the display data.
2998 # This is intended to be used as a label for the display data
2999 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003000 "int64Value": "A String", # Contains value if the data is of int64 type.
3001 },
3002 ],
3003 "outputCollectionName": [ # User names for all collection outputs to this transform.
3004 "A String",
3005 ],
3006 "id": "A String", # SDK generated id of this transform instance.
3007 },
3008 ],
3009 "displayData": [ # Pipeline level display data.
3010 { # Data provided with a pipeline or transform to provide descriptive info.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003011 "shortStrValue": "A String", # A possible additional shorter value to display.
3012 # For example a java_class_name_value of com.mypackage.MyDoFn
3013 # will be stored with MyDoFn as the short_str_value and
3014 # com.mypackage.MyDoFn as the java_class_name value.
3015 # short_str_value can be displayed and java_class_name_value
3016 # will be displayed as a tooltip.
3017 "timestampValue": "A String", # Contains value if the data is of timestamp type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07003018 "durationValue": "A String", # Contains value if the data is of duration type.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003019 "url": "A String", # An optional full URL.
3020 "floatValue": 3.14, # Contains value if the data is of float type.
3021 "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3022 # language namespace (i.e. python module) which defines the display data.
3023 # This allows a dax monitoring system to specially handle the data
3024 # and perform custom rendering.
3025 "javaClassValue": "A String", # Contains value if the data is of java class type.
3026 "label": "A String", # An optional label to display in a dax UI for the element.
3027 "boolValue": True or False, # Contains value if the data is of a boolean type.
3028 "strValue": "A String", # Contains value if the data is of string type.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07003029 "key": "A String", # The key identifying the display data.
3030 # This is intended to be used as a label for the display data
3031 # when viewed in a dax monitoring system.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003032 "int64Value": "A String", # Contains value if the data is of int64 type.
3033 },
3034 ],
3035 "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
3036 { # Description of the composing transforms, names/ids, and input/outputs of a
3037 # stage of execution. Some composing transforms and sources may have been
3038 # generated by the Dataflow service during execution planning.
3039 "componentSource": [ # Collections produced and consumed by component transforms of this stage.
3040 { # Description of an interstitial value between transforms in an execution
3041 # stage.
3042 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3043 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3044 # source is most closely associated.
3045 "name": "A String", # Dataflow service generated name for this source.
3046 },
3047 ],
3048 "kind": "A String", # Type of tranform this stage is executing.
3049 "name": "A String", # Dataflow service generated name for this stage.
3050 "outputSource": [ # Output sources for this stage.
3051 { # Description of an input or output of an execution stage.
3052 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07003053 "sizeBytes": "A String", # Size of the source, if measurable.
3054 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003055 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3056 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003057 },
3058 ],
3059 "inputSource": [ # Input sources for this stage.
3060 { # Description of an input or output of an execution stage.
3061 "userName": "A String", # Human-readable name for this source; may be user or system generated.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07003062 "sizeBytes": "A String", # Size of the source, if measurable.
3063 "name": "A String", # Dataflow service generated name for this source.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003064 "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3065 # source is most closely associated.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003066 },
3067 ],
3068 "componentTransform": [ # Transforms that comprise this execution stage.
3069 { # Description of a transform executed as part of an execution stage.
3070 "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3071 "originalTransform": "A String", # User name for the original user transform with which this transform is
3072 # most closely associated.
3073 "name": "A String", # Dataflow service generated name for this source.
3074 },
3075 ],
3076 "id": "A String", # Dataflow service generated id for this stage.
3077 },
3078 ],
3079 },
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003080 "steps": [ # The top-level steps that constitute the entire job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003081 { # Defines a particular step within a Cloud Dataflow job.
3082 #
3083 # A job consists of multiple steps, each of which performs some
3084 # specific operation as part of the overall job. Data is typically
3085 # passed from one step to another as part of the job.
3086 #
3087 # Here's an example of a sequence of steps which together implement a
3088 # Map-Reduce job:
3089 #
3090 # * Read a collection of data from some source, parsing the
3091 # collection's elements.
3092 #
3093 # * Validate the elements.
3094 #
3095 # * Apply a user-defined function to map each element to some value
3096 # and extract an element-specific key value.
3097 #
3098 # * Group elements with the same key into a single element with
3099 # that key, transforming a multiply-keyed collection into a
3100 # uniquely-keyed collection.
3101 #
3102 # * Write the elements out to some data sink.
3103 #
3104 # Note that the Cloud Dataflow service may be used to run many different
3105 # types of jobs, not just Map-Reduce.
3106 "kind": "A String", # The kind of step in the Cloud Dataflow job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07003107 "name": "A String", # The name that identifies the step. This must be unique for each
3108 # step with respect to all other steps in the Cloud Dataflow job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003109 "properties": { # Named properties associated with the step. Each kind of
3110 # predefined step has its own required set of properties.
3111 # Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003112 "a_key": "", # Properties of the object.
3113 },
3114 },
3115 ],
Sai Cheemalapatie833b792017-03-24 15:06:46 -07003116 "location": "A String", # The location that contains this job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003117 "tempFiles": [ # A set of files the system should be aware of that are used
3118 # for temporary storage. These temporary files will be
3119 # removed on job completion.
3120 # No duplicates are allowed.
3121 # No file patterns are supported.
3122 #
3123 # The supported files are:
3124 #
3125 # Google Cloud Storage:
3126 #
3127 # storage.googleapis.com/{bucket}/{object}
3128 # bucket.storage.googleapis.com/{object}
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003129 "A String",
3130 ],
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003131 "type": "A String", # The type of Cloud Dataflow job.
3132 "id": "A String", # The unique ID of this job.
3133 #
3134 # This field is set by the Cloud Dataflow service when the Job is
3135 # created, and is immutable for the life of the job.
Sai Cheemalapatie833b792017-03-24 15:06:46 -07003136 "currentState": "A String", # The current state of the job.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003137 #
Sai Cheemalapatie833b792017-03-24 15:06:46 -07003138 # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
3139 # specified.
3140 #
3141 # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
3142 # terminal state. After a job has reached a terminal state, no
3143 # further state updates may be made.
3144 #
3145 # This field may be mutated by the Cloud Dataflow service;
3146 # callers cannot mutate it.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003147 "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3148 # isn't contained in the submitted job.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003149 "stages": { # A mapping from each stage to the information about that stage.
Sai Cheemalapatic30d2b52017-03-13 12:12:03 -04003150 "a_key": { # Contains information about how a particular
3151 # google.dataflow.v1beta3.Step will be executed.
3152 "stepName": [ # The steps associated with the execution stage.
3153 # Note that stages may have several steps, and that a given step
3154 # might be run by more than one stage.
Jon Wayne Parrott692617a2017-01-06 09:58:29 -08003155 "A String",
3156 ],
3157 },
3158 },
3159 },
3160 }</pre>
3161</div>
3162
3163</body></html>