Blame - docs/dyn/dataflow_v1b3.projects.jobs.html - platform/external/python/google-api-python-client

2016-08-16 12:44:29 -0700

[diff] [blame]

78

<code><a href="dataflow_v1b3.projects.jobs.debug.html">debug()</a></code>

79

</p>

80

<p class="firstline">Returns the debug Resource.</p>

81

82

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

83

<code><a href="dataflow_v1b3.projects.jobs.messages.html">messages()</a></code>

84

</p>

85

<p class="firstline">Returns the messages Resource.</p>

86

87

88

<code><a href="dataflow_v1b3.projects.jobs.workItems.html">workItems()</a></code>

89

</p>

90

<p class="firstline">Returns the workItems Resource.</p>

91

92

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

93

<code><a href="#create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

94

<p class="firstline">Creates a Cloud Dataflow job.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

95

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

96

<code><a href="#get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

97

<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

98

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

99

<code><a href="#getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</a></code></p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

100

<p class="firstline">Request the job status.</p>

101

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

102

<code><a href="#list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

103

<p class="firstline">List the jobs of a project.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

104

105

<code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>

106

<p class="firstline">Retrieves the next page of results.</p>

107

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

108

<code><a href="#update">update(projectId, jobId, body, location=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

109

<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

110

<h3>Method Details</h3>

111

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

112

<code class="details" id="create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

113

<pre>Creates a Cloud Dataflow job.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

114

115

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

116

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

117

body: object, The request body. (required)

118

The object takes the form of:

119

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

120

{ # Defines a job to be run by the Cloud Dataflow service.

121

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

122

# If this field is set, the service will ensure its uniqueness.

123

# The request to create a job will fail if the service has knowledge of a

124

# previously submitted job with the same client's ID and job name.

125

# The caller may use this field to ensure idempotence of job

126

# creation across retried attempts to create a job.

127

# By default, the field is empty and, in that case, the service ignores it.

128

"requestedState": "A String", # The job's requested state.

129

#

130

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

131

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

132

# also be used to directly set a job's requested state to

133

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

134

# job if it has not already reached a terminal state.

135

"name": "A String", # The user-specified Cloud Dataflow job name.

136

#

137

# Only one Job with a given name may exist in a project at any

138

# given time. If a caller attempts to create a Job with the same

139

# name as an already-existing Job, the attempt returns the

140

# existing Job.

141

#

142

# The name must match the regular expression

143

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

144

"currentStateTime": "A String", # The timestamp associated with the current state.

145

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

146

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

147

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

148

"labels": { # User-defined labels for this job.

149

#

150

# The labels map can contain no more than 64 entries. Entries of the labels

151

# map are UTF8 strings that comply with the following restrictions:

152

#

153

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

154

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

155

# * Both keys and values are additionally constrained to be <= 128 bytes in

156

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

157

"a_key": "A String",

158

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

159

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

160

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

161

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

162

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

163

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

164

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

165

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

166

"version": { # A structure describing which components and their versions of the service

167

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

168

"a_key": "", # Properties of the object.

169

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

170

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

171

# storage. The system will append the suffix "/temp-{JOBNAME} to

172

# this resource prefix, where {JOBNAME} is the value of the

173

# job_name field. The resulting bucket and object prefix is used

174

# as the prefix of the resources used to store temporary data

175

# needed during the job execution. NOTE: This will override the

176

# value in taskrunner_settings.

177

# The supported resource type is:

178

#

179

# Google Cloud Storage:

180

#

181

# storage.googleapis.com/{bucket}/{object}

182

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

183

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

184

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

185

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

186

"dataset": "A String", # The dataset for the current project where various workflow

187

# related tables are stored.

188

#

189

# The supported resource type is:

190

#

191

# Google BigQuery:

192

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

193

"experiments": [ # The list of experiments to enable.

194

"A String",

195

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

196

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

197

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

198

# options are passed through the service and are used to recreate the

199

# SDK pipeline options on the worker in a language agnostic and platform

200

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

201

"a_key": "", # Properties of the object.

202

},

203

"userAgent": { # A description of the process that generated the request.

204

"a_key": "", # Properties of the object.

205

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

206

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

207

# unspecified, the service will attempt to choose a reasonable

208

# default. This should be in the form of the API service name,

209

# e.g. "compute.googleapis.com".

210

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

211

# specified in order for the job to have workers.

212

{ # Describes one particular pool of Cloud Dataflow workers to be

213

# instantiated by the Cloud Dataflow service in order to perform the

214

# computations required by a job. Note that a workflow job may use

215

# multiple pools, in order to match the various computational

216

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

217

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

218

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

219

# using the standard Dataflow task runner. Users should ignore

220

# this field.

221

"workflowFileName": "A String", # The file to store the workflow in.

222

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

223

# will not be uploaded.

224

#

225

# The supported resource type is:

226

#

227

# Google Cloud Storage:

228

# storage.googleapis.com/{bucket}/{object}

229

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

230

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

231

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

232

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

233

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

234

"vmId": "A String", # The ID string of the VM.

235

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

236

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

237

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

238

# access the Cloud Dataflow API.

239

"A String",

240

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

241

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

242

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

243

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

244

# "shuffle/v1beta1".

245

"workerId": "A String", # The ID of the worker running this pipeline.

246

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

247

#

248

# When workers access Google Cloud APIs, they logically do so via

249

# relative URLs. If this field is specified, it supplies the base

250

# URL to use for resolving these relative URLs. The normative

251

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

252

# Locators".

253

#

254

# If not specified, the default value is "http://www.googleapis.com/"

255

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

256

# "dataflow/v1b3/projects".

257

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

258

# storage.

259

#

260

# The supported resource type is:

261

#

262

# Google Cloud Storage:

263

#

264

# storage.googleapis.com/{bucket}/{object}

265

# bucket.storage.googleapis.com/{object}

266

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

267

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

268

# taskrunner; e.g. "wheel".

269

"languageHint": "A String", # The suggested backend language.

270

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

271

# console.

272

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

273

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

274

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

275

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

276

#

277

# When workers access Google Cloud APIs, they logically do so via

278

# relative URLs. If this field is specified, it supplies the base

279

# URL to use for resolving these relative URLs. The normative

280

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

281

# Locators".

282

#

283

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

284

"harnessCommand": "A String", # The command to launch the worker harness.

285

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

286

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

287

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

288

# The supported resource type is:

289

#

290

# Google Cloud Storage:

291

# storage.googleapis.com/{bucket}/{object}

292

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

293

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

294

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

295

# are supported.

296

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

297

# service will attempt to choose a reasonable default.

298

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

299

# the service will use the network "default".

300

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

301

# will attempt to choose a reasonable default.

302

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

303

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

304

"dataDisks": [ # Data disks that are used by a VM in this workflow.

305

{ # Describes the data disk used by a workflow job.

306

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

307

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

308

# attempt to choose a reasonable default.

309

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

310

# must be a disk type appropriate to the project and zone in which

311

# the workers will run. If unknown or unspecified, the service

312

# will attempt to choose a reasonable default.

313

#

314

# For example, the standard persistent disk type is a resource name

315

# typically ending in "pd-standard". If SSD persistent disks are

316

# available, the resource name typically ends with "pd-ssd". The

317

# actual valid values are defined the Google Compute Engine API,

318

# not by the Cloud Dataflow API; consult the Google Compute Engine

319

# documentation for more information about determining the set of

320

# available disk types for a particular project and zone.

321

#

322

# Google Compute Engine Disk types are local to a particular

323

# project in a particular zone, and so the resource name will

324

# typically look something like this:

325

#

326

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

327

},

328

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

329

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

330

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

331

# `TEARDOWN_NEVER`.

332

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

333

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

334

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

335

# down.

336

#

337

# If the workers are not torn down by the service, they will

338

# continue to run and use Google Compute Engine VM resources in the

339

# user's project until they are explicitly terminated by the user.

340

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

341

# policy except for small, manually supervised test jobs.

342

#

343

# If unknown or unspecified, the service will attempt to choose a reasonable

344

# default.

345

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

346

# Compute Engine API.

347

"ipConfiguration": "A String", # Configuration for VM IPs.

348

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

349

# service will choose a number of threads (according to the number of cores

350

# on the selected machine type for batch, or 1 by convention for streaming).

351

"poolArgs": { # Extra arguments for this worker pool.

352

"a_key": "", # Properties of the object. Contains field @type with type URL.

353

},

354

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

355

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

356

# attempt to choose a reasonable default.

357

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

358

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

359

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

360

# the form "regions/REGION/subnetworks/SUBNETWORK".

361

"packages": [ # Packages to be installed on workers.

362

{ # The packages that must be installed in order for a worker to run the

363

# steps of the Cloud Dataflow job that will be assigned to its worker

364

# pool.

365

#

366

# This is the mechanism by which the Cloud Dataflow SDK causes code to

367

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

368

# might use this to install jars containing the user's code and all of the

369

# various dependencies (libraries, data files, etc.) required in order

370

# for that code to run.

371

"location": "A String", # The resource to read the package from. The supported resource type is:

372

#

373

# Google Cloud Storage:

374

#

375

# storage.googleapis.com/{bucket}

376

# bucket.storage.googleapis.com/

377

"name": "A String", # The name of the package.

378

},

379

],

380

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

381

"algorithm": "A String", # The algorithm to use for autoscaling.

382

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

383

},

384

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

385

# select a default set of packages which are useful to worker

386

# harnesses written in a particular language.

387

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

388

# attempt to choose a reasonable default.

389

"metadata": { # Metadata to set on the Google Compute Engine VMs.

390

"a_key": "A String",

391

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

392

},

393

],

394

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

395

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

396

# of the job it replaced.

397

#

398

# When sending a `CreateJobRequest`, you can update a job by specifying it

399

# here. The job named here is stopped, and its intermediate state is

400

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

401

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

402

# A description of the user pipeline and stages through which it is executed.

403

# Created by Cloud Dataflow service. Only retrieved with

404

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

405

# form. This data is provided by the Dataflow service for ease of visualizing

406

# the pipeline and interpretting Dataflow provided metrics.

407

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

408

{ # Description of the type, names/ids, and input/outputs for a transform.

409

"kind": "A String", # Type of transform.

410

"name": "A String", # User provided name for this transform instance.

411

"inputCollectionName": [ # User names for all collection inputs to this transform.

412

"A String",

413

],

414

"displayData": [ # Transform-specific display data.

415

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

416

"shortStrValue": "A String", # A possible additional shorter value to display.

417

# For example a java_class_name_value of com.mypackage.MyDoFn

418

# will be stored with MyDoFn as the short_str_value and

419

# com.mypackage.MyDoFn as the java_class_name value.

420

# short_str_value can be displayed and java_class_name_value

421

# will be displayed as a tooltip.

422

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

423

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

424

"url": "A String", # An optional full URL.

425

"floatValue": 3.14, # Contains value if the data is of float type.

426

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

427

# language namespace (i.e. python module) which defines the display data.

428

# This allows a dax monitoring system to specially handle the data

429

# and perform custom rendering.

430

"javaClassValue": "A String", # Contains value if the data is of java class type.

431

"label": "A String", # An optional label to display in a dax UI for the element.

432

"boolValue": True or False, # Contains value if the data is of a boolean type.

433

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

434

"key": "A String", # The key identifying the display data.

435

# This is intended to be used as a label for the display data

436

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

437

"int64Value": "A String", # Contains value if the data is of int64 type.

438

},

439

],

440

"outputCollectionName": [ # User names for all collection outputs to this transform.

441

"A String",

442

],

443

"id": "A String", # SDK generated id of this transform instance.

444

},

445

],

446

"displayData": [ # Pipeline level display data.

447

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

448

"shortStrValue": "A String", # A possible additional shorter value to display.

449

# For example a java_class_name_value of com.mypackage.MyDoFn

450

# will be stored with MyDoFn as the short_str_value and

451

# com.mypackage.MyDoFn as the java_class_name value.

452

# short_str_value can be displayed and java_class_name_value

453

# will be displayed as a tooltip.

454

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

455

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

456

"url": "A String", # An optional full URL.

457

"floatValue": 3.14, # Contains value if the data is of float type.

458

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

459

# language namespace (i.e. python module) which defines the display data.

460

# This allows a dax monitoring system to specially handle the data

461

# and perform custom rendering.

462

"javaClassValue": "A String", # Contains value if the data is of java class type.

463

"label": "A String", # An optional label to display in a dax UI for the element.

464

"boolValue": True or False, # Contains value if the data is of a boolean type.

465

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

466

"key": "A String", # The key identifying the display data.

467

# This is intended to be used as a label for the display data

468

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

469

"int64Value": "A String", # Contains value if the data is of int64 type.

470

},

471

],

472

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

473

{ # Description of the composing transforms, names/ids, and input/outputs of a

474

# stage of execution. Some composing transforms and sources may have been

475

# generated by the Dataflow service during execution planning.

476

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

477

{ # Description of an interstitial value between transforms in an execution

478

# stage.

479

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

480

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

481

# source is most closely associated.

482

"name": "A String", # Dataflow service generated name for this source.

483

},

484

],

485

"kind": "A String", # Type of tranform this stage is executing.

486

"name": "A String", # Dataflow service generated name for this stage.

487

"outputSource": [ # Output sources for this stage.

488

{ # Description of an input or output of an execution stage.

489

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

490

"sizeBytes": "A String", # Size of the source, if measurable.

491

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

492

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

493

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

494

},

495

],

496

"inputSource": [ # Input sources for this stage.

497

{ # Description of an input or output of an execution stage.

498

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

499

"sizeBytes": "A String", # Size of the source, if measurable.

500

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

501

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

502

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

503

},

504

],

505

"componentTransform": [ # Transforms that comprise this execution stage.

506

{ # Description of a transform executed as part of an execution stage.

507

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

508

"originalTransform": "A String", # User name for the original user transform with which this transform is

509

# most closely associated.

510

"name": "A String", # Dataflow service generated name for this source.

511

},

512

],

513

"id": "A String", # Dataflow service generated id for this stage.

514

},

515

],

516

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

517

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

518

{ # Defines a particular step within a Cloud Dataflow job.

519

#

520

# A job consists of multiple steps, each of which performs some

521

# specific operation as part of the overall job. Data is typically

522

# passed from one step to another as part of the job.

523

#

524

# Here's an example of a sequence of steps which together implement a

525

# Map-Reduce job:

526

#

527

# * Read a collection of data from some source, parsing the

528

# collection's elements.

529

#

530

# * Validate the elements.

531

#

532

# * Apply a user-defined function to map each element to some value

533

# and extract an element-specific key value.

534

#

535

# * Group elements with the same key into a single element with

536

# that key, transforming a multiply-keyed collection into a

537

# uniquely-keyed collection.

538

#

539

# * Write the elements out to some data sink.

540

#

541

# Note that the Cloud Dataflow service may be used to run many different

542

# types of jobs, not just Map-Reduce.

543

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

544

"name": "A String", # The name that identifies the step. This must be unique for each

545

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

546

"properties": { # Named properties associated with the step. Each kind of

547

# predefined step has its own required set of properties.

548

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

549

"a_key": "", # Properties of the object.

550

},

551

},

552

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

553

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

554

"tempFiles": [ # A set of files the system should be aware of that are used

555

# for temporary storage. These temporary files will be

556

# removed on job completion.

557

# No duplicates are allowed.

558

# No file patterns are supported.

559

#

560

# The supported files are:

561

#

562

# Google Cloud Storage:

563

#

564

# storage.googleapis.com/{bucket}/{object}

565

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

566

"A String",

567

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

568

"type": "A String", # The type of Cloud Dataflow job.

569

"id": "A String", # The unique ID of this job.

570

#

571

# This field is set by the Cloud Dataflow service when the Job is

572

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

573

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

574

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

575

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

576

# specified.

577

#

578

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

579

# terminal state. After a job has reached a terminal state, no

580

# further state updates may be made.

581

#

582

# This field may be mutated by the Cloud Dataflow service;

583

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

584

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

585

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

586

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

587

"a_key": { # Contains information about how a particular

588

# google.dataflow.v1beta3.Step will be executed.

589

"stepName": [ # The steps associated with the execution stage.

590

# Note that stages may have several steps, and that a given step

591

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

592

"A String",

593

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

594

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

595

},

596

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

597

}

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

598

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

599

location: string, The location that contains this job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

600

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

Allowed values

1 - v1 error format

2 - v2 error format

replaceJobId: string, Deprecated. This field is now in the Job message.

605

view: string, The level of information requested in response.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

606

607

Returns:

608

An object of the form:

609

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

610

{ # Defines a job to be run by the Cloud Dataflow service.

611

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

612

# If this field is set, the service will ensure its uniqueness.

613

# The request to create a job will fail if the service has knowledge of a

614

# previously submitted job with the same client's ID and job name.

615

# The caller may use this field to ensure idempotence of job

616

# creation across retried attempts to create a job.

617

# By default, the field is empty and, in that case, the service ignores it.

618

"requestedState": "A String", # The job's requested state.

619

#

620

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

621

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

622

# also be used to directly set a job's requested state to

623

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

624

# job if it has not already reached a terminal state.

625

"name": "A String", # The user-specified Cloud Dataflow job name.

626

#

627

# Only one Job with a given name may exist in a project at any

628

# given time. If a caller attempts to create a Job with the same

629

# name as an already-existing Job, the attempt returns the

630

# existing Job.

631

#

632

# The name must match the regular expression

633

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

634

"currentStateTime": "A String", # The timestamp associated with the current state.

635

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

636

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

637

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

638

"labels": { # User-defined labels for this job.

639

#

640

# The labels map can contain no more than 64 entries. Entries of the labels

641

# map are UTF8 strings that comply with the following restrictions:

642

#

643

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

644

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

645

# * Both keys and values are additionally constrained to be <= 128 bytes in

646

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

647

"a_key": "A String",

648

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

649

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

650

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

651

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

652

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

653

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

654

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

655

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

656

"version": { # A structure describing which components and their versions of the service

657

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

658

"a_key": "", # Properties of the object.

659

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

660

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

661

# storage. The system will append the suffix "/temp-{JOBNAME} to

662

# this resource prefix, where {JOBNAME} is the value of the

663

# job_name field. The resulting bucket and object prefix is used

664

# as the prefix of the resources used to store temporary data

665

# needed during the job execution. NOTE: This will override the

666

# value in taskrunner_settings.

667

# The supported resource type is:

668

#

669

# Google Cloud Storage:

670

#

671

# storage.googleapis.com/{bucket}/{object}

672

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

673

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

674

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

675

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

676

"dataset": "A String", # The dataset for the current project where various workflow

677

# related tables are stored.

678

#

679

# The supported resource type is:

680

#

681

# Google BigQuery:

682

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

683

"experiments": [ # The list of experiments to enable.

684

"A String",

685

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

686

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

687

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

688

# options are passed through the service and are used to recreate the

689

# SDK pipeline options on the worker in a language agnostic and platform

690

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

691

"a_key": "", # Properties of the object.

692

},

693

"userAgent": { # A description of the process that generated the request.

694

"a_key": "", # Properties of the object.

695

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

696

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

697

# unspecified, the service will attempt to choose a reasonable

698

# default. This should be in the form of the API service name,

699

# e.g. "compute.googleapis.com".

700

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

701

# specified in order for the job to have workers.

702

{ # Describes one particular pool of Cloud Dataflow workers to be

703

# instantiated by the Cloud Dataflow service in order to perform the

704

# computations required by a job. Note that a workflow job may use

705

# multiple pools, in order to match the various computational

706

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

707

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

708

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

709

# using the standard Dataflow task runner. Users should ignore

710

# this field.

711

"workflowFileName": "A String", # The file to store the workflow in.

712

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

713

# will not be uploaded.

714

#

715

# The supported resource type is:

716

#

717

# Google Cloud Storage:

718

# storage.googleapis.com/{bucket}/{object}

719

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

720

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

721

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

722

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

723

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

724

"vmId": "A String", # The ID string of the VM.

725

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

726

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

727

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

728

# access the Cloud Dataflow API.

729

"A String",

730

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

731

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

732

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

733

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

734

# "shuffle/v1beta1".

735

"workerId": "A String", # The ID of the worker running this pipeline.

736

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

737

#

738

# When workers access Google Cloud APIs, they logically do so via

739

# relative URLs. If this field is specified, it supplies the base

740

# URL to use for resolving these relative URLs. The normative

741

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

742

# Locators".

743

#

744

# If not specified, the default value is "http://www.googleapis.com/"

745

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

746

# "dataflow/v1b3/projects".

747

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

748

# storage.

749

#

750

# The supported resource type is:

751

#

752

# Google Cloud Storage:

753

#

754

# storage.googleapis.com/{bucket}/{object}

755

# bucket.storage.googleapis.com/{object}

756

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

757

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

758

# taskrunner; e.g. "wheel".

759

"languageHint": "A String", # The suggested backend language.

760

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

761

# console.

762

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

763

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

764

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

765

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

766

#

767

# When workers access Google Cloud APIs, they logically do so via

768

# relative URLs. If this field is specified, it supplies the base

769

# URL to use for resolving these relative URLs. The normative

770

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

771

# Locators".

772

#

773

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

774

"harnessCommand": "A String", # The command to launch the worker harness.

775

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

776

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

777

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

778

# The supported resource type is:

779

#

780

# Google Cloud Storage:

781

# storage.googleapis.com/{bucket}/{object}

782

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

783

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

784

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

785

# are supported.

786

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

787

# service will attempt to choose a reasonable default.

788

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

789

# the service will use the network "default".

790

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

791

# will attempt to choose a reasonable default.

792

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

793

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

794

"dataDisks": [ # Data disks that are used by a VM in this workflow.

795

{ # Describes the data disk used by a workflow job.

796

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

797

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

798

# attempt to choose a reasonable default.

799

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

800

# must be a disk type appropriate to the project and zone in which

801

# the workers will run. If unknown or unspecified, the service

802

# will attempt to choose a reasonable default.

803

#

804

# For example, the standard persistent disk type is a resource name

805

# typically ending in "pd-standard". If SSD persistent disks are

806

# available, the resource name typically ends with "pd-ssd". The

807

# actual valid values are defined the Google Compute Engine API,

808

# not by the Cloud Dataflow API; consult the Google Compute Engine

809

# documentation for more information about determining the set of

810

# available disk types for a particular project and zone.

811

#

812

# Google Compute Engine Disk types are local to a particular

813

# project in a particular zone, and so the resource name will

814

# typically look something like this:

815

#

816

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

817

},

818

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

819

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

820

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

821

# `TEARDOWN_NEVER`.

822

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

823

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

824

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

825

# down.

826

#

827

# If the workers are not torn down by the service, they will

828

# continue to run and use Google Compute Engine VM resources in the

829

# user's project until they are explicitly terminated by the user.

830

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

831

# policy except for small, manually supervised test jobs.

832

#

833

# If unknown or unspecified, the service will attempt to choose a reasonable

834

# default.

835

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

836

# Compute Engine API.

837

"ipConfiguration": "A String", # Configuration for VM IPs.

838

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

839

# service will choose a number of threads (according to the number of cores

840

# on the selected machine type for batch, or 1 by convention for streaming).

841

"poolArgs": { # Extra arguments for this worker pool.

842

"a_key": "", # Properties of the object. Contains field @type with type URL.

843

},

844

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

845

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

846

# attempt to choose a reasonable default.

847

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

848

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

849

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

850

# the form "regions/REGION/subnetworks/SUBNETWORK".

851

"packages": [ # Packages to be installed on workers.

852

{ # The packages that must be installed in order for a worker to run the

853

# steps of the Cloud Dataflow job that will be assigned to its worker

854

# pool.

855

#

856

# This is the mechanism by which the Cloud Dataflow SDK causes code to

857

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

858

# might use this to install jars containing the user's code and all of the

859

# various dependencies (libraries, data files, etc.) required in order

860

# for that code to run.

861

"location": "A String", # The resource to read the package from. The supported resource type is:

862

#

863

# Google Cloud Storage:

864

#

865

# storage.googleapis.com/{bucket}

866

# bucket.storage.googleapis.com/

867

"name": "A String", # The name of the package.

868

},

869

],

870

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

871

"algorithm": "A String", # The algorithm to use for autoscaling.

872

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

873

},

874

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

875

# select a default set of packages which are useful to worker

876

# harnesses written in a particular language.

877

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

878

# attempt to choose a reasonable default.

879

"metadata": { # Metadata to set on the Google Compute Engine VMs.

880

"a_key": "A String",

881

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

882

},

883

],

884

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

885

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

886

# of the job it replaced.

887

#

888

# When sending a `CreateJobRequest`, you can update a job by specifying it

889

# here. The job named here is stopped, and its intermediate state is

890

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

891

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

892

# A description of the user pipeline and stages through which it is executed.

893

# Created by Cloud Dataflow service. Only retrieved with

894

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

895

# form. This data is provided by the Dataflow service for ease of visualizing

896

# the pipeline and interpretting Dataflow provided metrics.

897

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

898

{ # Description of the type, names/ids, and input/outputs for a transform.

899

"kind": "A String", # Type of transform.

900

"name": "A String", # User provided name for this transform instance.

901

"inputCollectionName": [ # User names for all collection inputs to this transform.

902

"A String",

903

],

904

"displayData": [ # Transform-specific display data.

905

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

906

"shortStrValue": "A String", # A possible additional shorter value to display.

907

# For example a java_class_name_value of com.mypackage.MyDoFn

908

# will be stored with MyDoFn as the short_str_value and

909

# com.mypackage.MyDoFn as the java_class_name value.

910

# short_str_value can be displayed and java_class_name_value

911

# will be displayed as a tooltip.

912

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

913

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

914

"url": "A String", # An optional full URL.

915

"floatValue": 3.14, # Contains value if the data is of float type.

916

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

917

# language namespace (i.e. python module) which defines the display data.

918

# This allows a dax monitoring system to specially handle the data

919

# and perform custom rendering.

920

"javaClassValue": "A String", # Contains value if the data is of java class type.

921

"label": "A String", # An optional label to display in a dax UI for the element.

922

"boolValue": True or False, # Contains value if the data is of a boolean type.

923

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

924

"key": "A String", # The key identifying the display data.

925

# This is intended to be used as a label for the display data

926

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

927

"int64Value": "A String", # Contains value if the data is of int64 type.

928

},

929

],

930

"outputCollectionName": [ # User names for all collection outputs to this transform.

931

"A String",

932

],

933

"id": "A String", # SDK generated id of this transform instance.

934

},

935

],

936

"displayData": [ # Pipeline level display data.

937

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

938

"shortStrValue": "A String", # A possible additional shorter value to display.

939

# For example a java_class_name_value of com.mypackage.MyDoFn

940

# will be stored with MyDoFn as the short_str_value and

941

# com.mypackage.MyDoFn as the java_class_name value.

942

# short_str_value can be displayed and java_class_name_value

943

# will be displayed as a tooltip.

944

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

945

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

946

"url": "A String", # An optional full URL.

947

"floatValue": 3.14, # Contains value if the data is of float type.

948

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

949

# language namespace (i.e. python module) which defines the display data.

950

# This allows a dax monitoring system to specially handle the data

951

# and perform custom rendering.

952

"javaClassValue": "A String", # Contains value if the data is of java class type.

953

"label": "A String", # An optional label to display in a dax UI for the element.

954

"boolValue": True or False, # Contains value if the data is of a boolean type.

955

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

956

"key": "A String", # The key identifying the display data.

957

# This is intended to be used as a label for the display data

958

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

959

"int64Value": "A String", # Contains value if the data is of int64 type.

960

},

961

],

962

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

963

{ # Description of the composing transforms, names/ids, and input/outputs of a

964

# stage of execution. Some composing transforms and sources may have been

965

# generated by the Dataflow service during execution planning.

966

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

967

{ # Description of an interstitial value between transforms in an execution

968

# stage.

969

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

970

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

971

# source is most closely associated.

972

"name": "A String", # Dataflow service generated name for this source.

973

},

974

],

975

"kind": "A String", # Type of tranform this stage is executing.

976

"name": "A String", # Dataflow service generated name for this stage.

977

"outputSource": [ # Output sources for this stage.

978

{ # Description of an input or output of an execution stage.

979

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

980

"sizeBytes": "A String", # Size of the source, if measurable.

981

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

982

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

983

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

984

},

985

],

986

"inputSource": [ # Input sources for this stage.

987

{ # Description of an input or output of an execution stage.

988

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

989

"sizeBytes": "A String", # Size of the source, if measurable.

990

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

991

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

992

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

993

},

994

],

995

"componentTransform": [ # Transforms that comprise this execution stage.

996

{ # Description of a transform executed as part of an execution stage.

997

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

998

"originalTransform": "A String", # User name for the original user transform with which this transform is

999

# most closely associated.

1000

"name": "A String", # Dataflow service generated name for this source.

1001

},

1002

],

1003

"id": "A String", # Dataflow service generated id for this stage.

1004

},

1005

],

1006

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1007

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1008

{ # Defines a particular step within a Cloud Dataflow job.

1009

#

1010

# A job consists of multiple steps, each of which performs some

1011

# specific operation as part of the overall job. Data is typically

1012

# passed from one step to another as part of the job.

1013

#

1014

# Here's an example of a sequence of steps which together implement a

1015

# Map-Reduce job:

1016

#

1017

# * Read a collection of data from some source, parsing the

1018

# collection's elements.

1019

#

1020

# * Validate the elements.

1021

#

1022

# * Apply a user-defined function to map each element to some value

1023

# and extract an element-specific key value.

1024

#

1025

# * Group elements with the same key into a single element with

1026

# that key, transforming a multiply-keyed collection into a

1027

# uniquely-keyed collection.

1028

#

1029

# * Write the elements out to some data sink.

1030

#

1031

# Note that the Cloud Dataflow service may be used to run many different

1032

# types of jobs, not just Map-Reduce.

1033

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1034

"name": "A String", # The name that identifies the step. This must be unique for each

1035

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1036

"properties": { # Named properties associated with the step. Each kind of

1037

# predefined step has its own required set of properties.

1038

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1039

"a_key": "", # Properties of the object.

1040

},

1041

},

1042

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1043

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1044

"tempFiles": [ # A set of files the system should be aware of that are used

1045

# for temporary storage. These temporary files will be

1046

# removed on job completion.

1047

# No duplicates are allowed.

1048

# No file patterns are supported.

1049

#

1050

# The supported files are:

1051

#

1052

# Google Cloud Storage:

1053

#

1054

# storage.googleapis.com/{bucket}/{object}

1055

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

1056

"A String",

1057

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1058

"type": "A String", # The type of Cloud Dataflow job.

1059

"id": "A String", # The unique ID of this job.

1060

#

1061

# This field is set by the Cloud Dataflow service when the Job is

1062

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1063

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1064

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1065

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1066

# specified.

1067

#

1068

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1069

# terminal state. After a job has reached a terminal state, no

1070

# further state updates may be made.

1071

#

1072

# This field may be mutated by the Cloud Dataflow service;

1073

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1074

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1075

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1076

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1077

"a_key": { # Contains information about how a particular

1078

# google.dataflow.v1beta3.Step will be executed.

1079

"stepName": [ # The steps associated with the execution stage.

1080

# Note that stages may have several steps, and that a given step

1081

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1082

"A String",

1083

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1084

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1085

},

1086

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1087

}</pre>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1091

<code class="details" id="get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1092

<pre>Gets the state of the specified Cloud Dataflow job.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1093

1094

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1095

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

1096

jobId: string, The job ID. (required)

1097

location: string, The location that contains this job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1098

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

Allowed values

1 - v1 error format

2 - v2 error format

view: string, The level of information requested in response.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1103

1104

Returns:

1105

An object of the form:

1106

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1107

{ # Defines a job to be run by the Cloud Dataflow service.

1108

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1109

# If this field is set, the service will ensure its uniqueness.

1110

# The request to create a job will fail if the service has knowledge of a

1111

# previously submitted job with the same client's ID and job name.

1112

# The caller may use this field to ensure idempotence of job

1113

# creation across retried attempts to create a job.

1114

# By default, the field is empty and, in that case, the service ignores it.

1115

"requestedState": "A String", # The job's requested state.

1116

#

1117

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1118

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1119

# also be used to directly set a job's requested state to

1120

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1121

# job if it has not already reached a terminal state.

1122

"name": "A String", # The user-specified Cloud Dataflow job name.

1123

#

1124

# Only one Job with a given name may exist in a project at any

1125

# given time. If a caller attempts to create a Job with the same

1126

# name as an already-existing Job, the attempt returns the

1127

# existing Job.

1128

#

1129

# The name must match the regular expression

1130

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1131

"currentStateTime": "A String", # The timestamp associated with the current state.

1132

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1133

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1134

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1135

"labels": { # User-defined labels for this job.

1136

#

1137

# The labels map can contain no more than 64 entries. Entries of the labels

1138

# map are UTF8 strings that comply with the following restrictions:

1139

#

1140

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1141

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1142

# * Both keys and values are additionally constrained to be <= 128 bytes in

1143

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1144

"a_key": "A String",

1145

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1146

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1147

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1148

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1149

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1150

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1151

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1152

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1153

"version": { # A structure describing which components and their versions of the service

1154

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1155

"a_key": "", # Properties of the object.

1156

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1157

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1158

# storage. The system will append the suffix "/temp-{JOBNAME} to

1159

# this resource prefix, where {JOBNAME} is the value of the

1160

# job_name field. The resulting bucket and object prefix is used

1161

# as the prefix of the resources used to store temporary data

1162

# needed during the job execution. NOTE: This will override the

1163

# value in taskrunner_settings.

1164

# The supported resource type is:

1165

#

1166

# Google Cloud Storage:

1167

#

1168

# storage.googleapis.com/{bucket}/{object}

1169

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1170

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1171

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1172

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1173

"dataset": "A String", # The dataset for the current project where various workflow

1174

# related tables are stored.

1175

#

1176

# The supported resource type is:

1177

#

1178

# Google BigQuery:

1179

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1180

"experiments": [ # The list of experiments to enable.

1181

"A String",

1182

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

1183

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1184

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1185

# options are passed through the service and are used to recreate the

1186

# SDK pipeline options on the worker in a language agnostic and platform

1187

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1188

"a_key": "", # Properties of the object.

1189

},

1190

"userAgent": { # A description of the process that generated the request.

1191

"a_key": "", # Properties of the object.

1192

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1193

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1194

# unspecified, the service will attempt to choose a reasonable

1195

# default. This should be in the form of the API service name,

1196

# e.g. "compute.googleapis.com".

1197

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1198

# specified in order for the job to have workers.

1199

{ # Describes one particular pool of Cloud Dataflow workers to be

1200

# instantiated by the Cloud Dataflow service in order to perform the

1201

# computations required by a job. Note that a workflow job may use

1202

# multiple pools, in order to match the various computational

1203

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1204

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1205

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1206

# using the standard Dataflow task runner. Users should ignore

1207

# this field.

1208

"workflowFileName": "A String", # The file to store the workflow in.

1209

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1210

# will not be uploaded.

1211

#

1212

# The supported resource type is:

1213

#

1214

# Google Cloud Storage:

1215

# storage.googleapis.com/{bucket}/{object}

1216

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1217

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1218

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1219

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1220

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1221

"vmId": "A String", # The ID string of the VM.

1222

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1223

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1224

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1225

# access the Cloud Dataflow API.

1226

"A String",

1227

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1228

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1229

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1230

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1231

# "shuffle/v1beta1".

1232

"workerId": "A String", # The ID of the worker running this pipeline.

1233

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1234

#

1235

# When workers access Google Cloud APIs, they logically do so via

1236

# relative URLs. If this field is specified, it supplies the base

1237

# URL to use for resolving these relative URLs. The normative

1238

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1239

# Locators".

1240

#

1241

# If not specified, the default value is "http://www.googleapis.com/"

1242

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1243

# "dataflow/v1b3/projects".

1244

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1245

# storage.

1246

#

1247

# The supported resource type is:

1248

#

1249

# Google Cloud Storage:

1250

#

1251

# storage.googleapis.com/{bucket}/{object}

1252

# bucket.storage.googleapis.com/{object}

1253

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1254

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1255

# taskrunner; e.g. "wheel".

1256

"languageHint": "A String", # The suggested backend language.

1257

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1258

# console.

1259

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1260

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1261

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1262

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1263

#

1264

# When workers access Google Cloud APIs, they logically do so via

1265

# relative URLs. If this field is specified, it supplies the base

1266

# URL to use for resolving these relative URLs. The normative

1267

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1268

# Locators".

1269

#

1270

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1271

"harnessCommand": "A String", # The command to launch the worker harness.

1272

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1273

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1274

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1275

# The supported resource type is:

1276

#

1277

# Google Cloud Storage:

1278

# storage.googleapis.com/{bucket}/{object}

1279

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1280

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1281

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1282

# are supported.

1283

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1284

# service will attempt to choose a reasonable default.

1285

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1286

# the service will use the network "default".

1287

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1288

# will attempt to choose a reasonable default.

1289

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1290

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1291

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1292

{ # Describes the data disk used by a workflow job.

1293

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1294

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1295

# attempt to choose a reasonable default.

1296

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1297

# must be a disk type appropriate to the project and zone in which

1298

# the workers will run. If unknown or unspecified, the service

1299

# will attempt to choose a reasonable default.

1300

#

1301

# For example, the standard persistent disk type is a resource name

1302

# typically ending in "pd-standard". If SSD persistent disks are

1303

# available, the resource name typically ends with "pd-ssd". The

1304

# actual valid values are defined the Google Compute Engine API,

1305

# not by the Cloud Dataflow API; consult the Google Compute Engine

1306

# documentation for more information about determining the set of

1307

# available disk types for a particular project and zone.

1308

#

1309

# Google Compute Engine Disk types are local to a particular

1310

# project in a particular zone, and so the resource name will

1311

# typically look something like this:

1312

#

1313

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1314

},

1315

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1316

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1317

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1318

# `TEARDOWN_NEVER`.

1319

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1320

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1321

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1322

# down.

1323

#

1324

# If the workers are not torn down by the service, they will

1325

# continue to run and use Google Compute Engine VM resources in the

1326

# user's project until they are explicitly terminated by the user.

1327

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1328

# policy except for small, manually supervised test jobs.

1329

#

1330

# If unknown or unspecified, the service will attempt to choose a reasonable

1331

# default.

1332

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1333

# Compute Engine API.

1334

"ipConfiguration": "A String", # Configuration for VM IPs.

1335

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1336

# service will choose a number of threads (according to the number of cores

1337

# on the selected machine type for batch, or 1 by convention for streaming).

1338

"poolArgs": { # Extra arguments for this worker pool.

1339

"a_key": "", # Properties of the object. Contains field @type with type URL.

1340

},

1341

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1342

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1343

# attempt to choose a reasonable default.

1344

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1345

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1346

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1347

# the form "regions/REGION/subnetworks/SUBNETWORK".

1348

"packages": [ # Packages to be installed on workers.

1349

{ # The packages that must be installed in order for a worker to run the

1350

# steps of the Cloud Dataflow job that will be assigned to its worker

1351

# pool.

1352

#

1353

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1354

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1355

# might use this to install jars containing the user's code and all of the

1356

# various dependencies (libraries, data files, etc.) required in order

1357

# for that code to run.

1358

"location": "A String", # The resource to read the package from. The supported resource type is:

1359

#

1360

# Google Cloud Storage:

1361

#

1362

# storage.googleapis.com/{bucket}

1363

# bucket.storage.googleapis.com/

1364

"name": "A String", # The name of the package.

1365

},

1366

],

1367

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1368

"algorithm": "A String", # The algorithm to use for autoscaling.

1369

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1370

},

1371

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1372

# select a default set of packages which are useful to worker

1373

# harnesses written in a particular language.

1374

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1375

# attempt to choose a reasonable default.

1376

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1377

"a_key": "A String",

1378

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1379

},

1380

],

1381

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1382

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1383

# of the job it replaced.

1384

#

1385

# When sending a `CreateJobRequest`, you can update a job by specifying it

1386

# here. The job named here is stopped, and its intermediate state is

1387

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1388

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1389

# A description of the user pipeline and stages through which it is executed.

1390

# Created by Cloud Dataflow service. Only retrieved with

1391

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1392

# form. This data is provided by the Dataflow service for ease of visualizing

1393

# the pipeline and interpretting Dataflow provided metrics.

1394

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1395

{ # Description of the type, names/ids, and input/outputs for a transform.

1396

"kind": "A String", # Type of transform.

1397

"name": "A String", # User provided name for this transform instance.

1398

"inputCollectionName": [ # User names for all collection inputs to this transform.

1399

"A String",

1400

],

1401

"displayData": [ # Transform-specific display data.

1402

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1403

"shortStrValue": "A String", # A possible additional shorter value to display.

1404

# For example a java_class_name_value of com.mypackage.MyDoFn

1405

# will be stored with MyDoFn as the short_str_value and

1406

# com.mypackage.MyDoFn as the java_class_name value.

1407

# short_str_value can be displayed and java_class_name_value

1408

# will be displayed as a tooltip.

1409

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1410

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1411

"url": "A String", # An optional full URL.

1412

"floatValue": 3.14, # Contains value if the data is of float type.

1413

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1414

# language namespace (i.e. python module) which defines the display data.

1415

# This allows a dax monitoring system to specially handle the data

1416

# and perform custom rendering.

1417

"javaClassValue": "A String", # Contains value if the data is of java class type.

1418

"label": "A String", # An optional label to display in a dax UI for the element.

1419

"boolValue": True or False, # Contains value if the data is of a boolean type.

1420

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1421

"key": "A String", # The key identifying the display data.

1422

# This is intended to be used as a label for the display data

1423

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1424

"int64Value": "A String", # Contains value if the data is of int64 type.

1425

},

1426

],

1427

"outputCollectionName": [ # User names for all collection outputs to this transform.

1428

"A String",

1429

],

1430

"id": "A String", # SDK generated id of this transform instance.

1431

},

1432

],

1433

"displayData": [ # Pipeline level display data.

1434

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1435

"shortStrValue": "A String", # A possible additional shorter value to display.

1436

# For example a java_class_name_value of com.mypackage.MyDoFn

1437

# will be stored with MyDoFn as the short_str_value and

1438

# com.mypackage.MyDoFn as the java_class_name value.

1439

# short_str_value can be displayed and java_class_name_value

1440

# will be displayed as a tooltip.

1441

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1442

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1443

"url": "A String", # An optional full URL.

1444

"floatValue": 3.14, # Contains value if the data is of float type.

1445

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1446

# language namespace (i.e. python module) which defines the display data.

1447

# This allows a dax monitoring system to specially handle the data

1448

# and perform custom rendering.

1449

"javaClassValue": "A String", # Contains value if the data is of java class type.

1450

"label": "A String", # An optional label to display in a dax UI for the element.

1451

"boolValue": True or False, # Contains value if the data is of a boolean type.

1452

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1453

"key": "A String", # The key identifying the display data.

1454

# This is intended to be used as a label for the display data

1455

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1456

"int64Value": "A String", # Contains value if the data is of int64 type.

1457

},

1458

],

1459

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

1460

{ # Description of the composing transforms, names/ids, and input/outputs of a

1461

# stage of execution. Some composing transforms and sources may have been

1462

# generated by the Dataflow service during execution planning.

1463

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

1464

{ # Description of an interstitial value between transforms in an execution

1465

# stage.

1466

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1467

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1468

# source is most closely associated.

1469

"name": "A String", # Dataflow service generated name for this source.

1470

},

1471

],

1472

"kind": "A String", # Type of tranform this stage is executing.

1473

"name": "A String", # Dataflow service generated name for this stage.

1474

"outputSource": [ # Output sources for this stage.

1475

{ # Description of an input or output of an execution stage.

1476

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1477

"sizeBytes": "A String", # Size of the source, if measurable.

1478

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1479

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1480

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1481

},

1482

],

1483

"inputSource": [ # Input sources for this stage.

1484

{ # Description of an input or output of an execution stage.

1485

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1486

"sizeBytes": "A String", # Size of the source, if measurable.

1487

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1488

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1489

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1490

},

1491

],

1492

"componentTransform": [ # Transforms that comprise this execution stage.

1493

{ # Description of a transform executed as part of an execution stage.

1494

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1495

"originalTransform": "A String", # User name for the original user transform with which this transform is

1496

# most closely associated.

1497

"name": "A String", # Dataflow service generated name for this source.

1498

},

1499

],

1500

"id": "A String", # Dataflow service generated id for this stage.

1501

},

1502

],

1503

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1504

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1505

{ # Defines a particular step within a Cloud Dataflow job.

1506

#

1507

# A job consists of multiple steps, each of which performs some

1508

# specific operation as part of the overall job. Data is typically

1509

# passed from one step to another as part of the job.

1510

#

1511

# Here's an example of a sequence of steps which together implement a

1512

# Map-Reduce job:

1513

#

1514

# * Read a collection of data from some source, parsing the

1515

# collection's elements.

1516

#

1517

# * Validate the elements.

1518

#

1519

# * Apply a user-defined function to map each element to some value

1520

# and extract an element-specific key value.

1521

#

1522

# * Group elements with the same key into a single element with

1523

# that key, transforming a multiply-keyed collection into a

1524

# uniquely-keyed collection.

1525

#

1526

# * Write the elements out to some data sink.

1527

#

1528

# Note that the Cloud Dataflow service may be used to run many different

1529

# types of jobs, not just Map-Reduce.

1530

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1531

"name": "A String", # The name that identifies the step. This must be unique for each

1532

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1533

"properties": { # Named properties associated with the step. Each kind of

1534

# predefined step has its own required set of properties.

1535

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1536

"a_key": "", # Properties of the object.

1537

},

1538

},

1539

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1540

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1541

"tempFiles": [ # A set of files the system should be aware of that are used

1542

# for temporary storage. These temporary files will be

1543

# removed on job completion.

1544

# No duplicates are allowed.

1545

# No file patterns are supported.

1546

#

1547

# The supported files are:

1548

#

1549

# Google Cloud Storage:

1550

#

1551

# storage.googleapis.com/{bucket}/{object}

1552

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

1553

"A String",

1554

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1555

"type": "A String", # The type of Cloud Dataflow job.

1556

"id": "A String", # The unique ID of this job.

1557

#

1558

# This field is set by the Cloud Dataflow service when the Job is

1559

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1560

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1561

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1562

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1563

# specified.

1564

#

1565

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1566

# terminal state. After a job has reached a terminal state, no

1567

# further state updates may be made.

1568

#

1569

# This field may be mutated by the Cloud Dataflow service;

1570

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1571

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1572

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1573

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1574

"a_key": { # Contains information about how a particular

1575

# google.dataflow.v1beta3.Step will be executed.

1576

"stepName": [ # The steps associated with the execution stage.

1577

# Note that stages may have several steps, and that a given step

1578

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1579

"A String",

1580

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1581

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1582

},

1583

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1584

}</pre>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1588

<code class="details" id="getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</code>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1589

<pre>Request the job status.

1590

1591

Args:

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1592

projectId: string, A project id. (required)

1593

jobId: string, The job to get messages for. (required)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1594

startTime: string, Return only metric data that has changed since this time.

1595

Default is to return all information about all metrics for the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1596

location: string, The location which contains the job specified by job_id.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1597

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1598

Allowed values

1599

1 - v1 error format

1600

2 - v2 error format

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1601

1602

Returns:

1603

An object of the form:

1604

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1605

{ # JobMetrics contains a collection of metrics descibing the detailed progress

1606

# of a Dataflow job. Metrics correspond to user-defined and system-defined

1607

# metrics in the job.

1608

#

1609

# This resource captures only the most recent values of each metric;

1610

# time-series data can be queried for them (under the same metric names)

1611

# from Cloud Monitoring.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1612

"metrics": [ # All metrics for this job.

1613

{ # Describes the state of a metric.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1614

"meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1615

# This holds the count of the aggregated values and is used in combination

1616

# with mean_sum above to obtain the actual mean aggregate value.

1617

# The only possible value type is Long.

1618

"kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are

1619

# "Sum", "Max", "Min", "Mean", "Set", "And", and "Or".

1620

# The specified aggregation kind is case-insensitive.

1621

#

1622

# If omitted, this is not an aggregated value but instead

1623

# a single metric sample value.

1624

"set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only

1625

# possible value type is a list of Values whose type can be Long, Double,

1626

# or String, according to the metric's type. All Values in the list must

1627

# be of the same type.

1628

"name": { # Identifies a metric, by describing the source which generated the # Name of the metric.

1629

# metric.

1630

"origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;

1631

# will be "dataflow" for metrics defined by the Dataflow service or SDK.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1632

"name": "A String", # Worker-defined metric name.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1633

"context": { # Zero or more labeled fields which identify the part of the job this

1634

# metric is associated with, such as the name of a step or collection.

1635

#

1636

# For example, built-in counters associated with steps will have

1637

# context['step'] = <step-name>. Counters associated with PCollections

1638

# in the SDK will have context['pcollection'] = <pcollection-name>.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1639

"a_key": "A String",

1640

},

1641

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1642

"meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1643

# This holds the sum of the aggregated values and is used in combination

1644

# with mean_count below to obtain the actual mean aggregate value.

1645

# The only possible value types are Long and Double.

1646

"cumulative": True or False, # True if this metric is reported as the total cumulative aggregate

1647

# value accumulated since the worker started working on this WorkItem.

1648

# By default this is false, indicating that this metric is reported

1649

# as a delta that is not associated with any WorkItem.

1650

"updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are

1651

# reporting work progress; it will be filled in responses from the

1652

# metrics API.

1653

"scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",

1654

# "And", and "Or". The possible value types are Long, Double, and Boolean.

1655

"internal": "", # Worker-computed aggregate value for internal use by the Dataflow

1656

# service.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1657

},

1658

],

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1659

"metricTime": "A String", # Timestamp as of which metric values are current.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

}</pre>

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1664

<code class="details" id="list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1665

<pre>List the jobs of a project.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1666

1667

Args:

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1668

projectId: string, The project which owns the jobs. (required)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1669

pageSize: integer, If there are many jobs, limit response to at most this many.

1670

The actual number of jobs returned will be the lesser of max_responses

1671

and an unspecified server-defined limit.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1672

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

Allowed values

1 - v1 error format

2 - v2 error format

pageToken: string, Set this to the 'next_page_token' field of a previous response

1677

to request additional results in a long list.

1678

location: string, The location that contains this job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1679

filter: string, The kind of filter to use.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1680

view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1681

1682

Returns:

1683

An object of the form:

1684

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1685

{ # Response to a request to list Cloud Dataflow jobs. This may be a partial

1686

# response, depending on the page size in the ListJobsRequest.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1687

"nextPageToken": "A String", # Set if there may be more results than fit in this response.

1688

"jobs": [ # A subset of the requested job information.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1689

{ # Defines a job to be run by the Cloud Dataflow service.

1690

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1691

# If this field is set, the service will ensure its uniqueness.

1692

# The request to create a job will fail if the service has knowledge of a

1693

# previously submitted job with the same client's ID and job name.

1694

# The caller may use this field to ensure idempotence of job

1695

# creation across retried attempts to create a job.

1696

# By default, the field is empty and, in that case, the service ignores it.

1697

"requestedState": "A String", # The job's requested state.

1698

#

1699

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1700

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1701

# also be used to directly set a job's requested state to

1702

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1703

# job if it has not already reached a terminal state.

1704

"name": "A String", # The user-specified Cloud Dataflow job name.

1705

#

1706

# Only one Job with a given name may exist in a project at any

1707

# given time. If a caller attempts to create a Job with the same

1708

# name as an already-existing Job, the attempt returns the

1709

# existing Job.

1710

#

1711

# The name must match the regular expression

1712

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1713

"currentStateTime": "A String", # The timestamp associated with the current state.

1714

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1715

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1716

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1717

"labels": { # User-defined labels for this job.

1718

#

1719

# The labels map can contain no more than 64 entries. Entries of the labels

1720

# map are UTF8 strings that comply with the following restrictions:

1721

#

1722

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1723

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1724

# * Both keys and values are additionally constrained to be <= 128 bytes in

1725

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1726

"a_key": "A String",

1727

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1728

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1729

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1730

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1731

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1732

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1733

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1734

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1735

"version": { # A structure describing which components and their versions of the service

1736

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1737

"a_key": "", # Properties of the object.

1738

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1739

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1740

# storage. The system will append the suffix "/temp-{JOBNAME} to

1741

# this resource prefix, where {JOBNAME} is the value of the

1742

# job_name field. The resulting bucket and object prefix is used

1743

# as the prefix of the resources used to store temporary data

1744

# needed during the job execution. NOTE: This will override the

1745

# value in taskrunner_settings.

1746

# The supported resource type is:

1747

#

1748

# Google Cloud Storage:

1749

#

1750

# storage.googleapis.com/{bucket}/{object}

1751

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1752

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1753

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1754

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1755

"dataset": "A String", # The dataset for the current project where various workflow

1756

# related tables are stored.

1757

#

1758

# The supported resource type is:

1759

#

1760

# Google BigQuery:

1761

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1762

"experiments": [ # The list of experiments to enable.

1763

"A String",

1764

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

1765

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1766

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1767

# options are passed through the service and are used to recreate the

1768

# SDK pipeline options on the worker in a language agnostic and platform

1769

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1770

"a_key": "", # Properties of the object.

1771

},

1772

"userAgent": { # A description of the process that generated the request.

1773

"a_key": "", # Properties of the object.

1774

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1775

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1776

# unspecified, the service will attempt to choose a reasonable

1777

# default. This should be in the form of the API service name,

1778

# e.g. "compute.googleapis.com".

1779

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1780

# specified in order for the job to have workers.

1781

{ # Describes one particular pool of Cloud Dataflow workers to be

1782

# instantiated by the Cloud Dataflow service in order to perform the

1783

# computations required by a job. Note that a workflow job may use

1784

# multiple pools, in order to match the various computational

1785

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1786

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1787

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1788

# using the standard Dataflow task runner. Users should ignore

1789

# this field.

1790

"workflowFileName": "A String", # The file to store the workflow in.

1791

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1792

# will not be uploaded.

1793

#

1794

# The supported resource type is:

1795

#

1796

# Google Cloud Storage:

1797

# storage.googleapis.com/{bucket}/{object}

1798

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1799

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1800

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1801

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1802

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1803

"vmId": "A String", # The ID string of the VM.

1804

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1805

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1806

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1807

# access the Cloud Dataflow API.

1808

"A String",

1809

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1810

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1811

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1812

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1813

# "shuffle/v1beta1".

1814

"workerId": "A String", # The ID of the worker running this pipeline.

1815

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1816

#

1817

# When workers access Google Cloud APIs, they logically do so via

1818

# relative URLs. If this field is specified, it supplies the base

1819

# URL to use for resolving these relative URLs. The normative

1820

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1821

# Locators".

1822

#

1823

# If not specified, the default value is "http://www.googleapis.com/"

1824

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1825

# "dataflow/v1b3/projects".

1826

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1827

# storage.

1828

#

1829

# The supported resource type is:

1830

#

1831

# Google Cloud Storage:

1832

#

1833

# storage.googleapis.com/{bucket}/{object}

1834

# bucket.storage.googleapis.com/{object}

1835

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1836

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1837

# taskrunner; e.g. "wheel".

1838

"languageHint": "A String", # The suggested backend language.

1839

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1840

# console.

1841

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1842

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1843

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1844

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1845

#

1846

# When workers access Google Cloud APIs, they logically do so via

1847

# relative URLs. If this field is specified, it supplies the base

1848

# URL to use for resolving these relative URLs. The normative

1849

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1850

# Locators".

1851

#

1852

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1853

"harnessCommand": "A String", # The command to launch the worker harness.

1854

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1855

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1856

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1857

# The supported resource type is:

1858

#

1859

# Google Cloud Storage:

1860

# storage.googleapis.com/{bucket}/{object}

1861

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1862

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1863

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1864

# are supported.

1865

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1866

# service will attempt to choose a reasonable default.

1867

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1868

# the service will use the network "default".

1869

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1870

# will attempt to choose a reasonable default.

1871

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1872

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1873

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1874

{ # Describes the data disk used by a workflow job.

1875

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1876

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1877

# attempt to choose a reasonable default.

1878

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1879

# must be a disk type appropriate to the project and zone in which

1880

# the workers will run. If unknown or unspecified, the service

1881

# will attempt to choose a reasonable default.

1882

#

1883

# For example, the standard persistent disk type is a resource name

1884

# typically ending in "pd-standard". If SSD persistent disks are

1885

# available, the resource name typically ends with "pd-ssd". The

1886

# actual valid values are defined the Google Compute Engine API,

1887

# not by the Cloud Dataflow API; consult the Google Compute Engine

1888

# documentation for more information about determining the set of

1889

# available disk types for a particular project and zone.

1890

#

1891

# Google Compute Engine Disk types are local to a particular

1892

# project in a particular zone, and so the resource name will

1893

# typically look something like this:

1894

#

1895

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1896

},

1897

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1898

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1899

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1900

# `TEARDOWN_NEVER`.

1901

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1902

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1903

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1904

# down.

1905

#

1906

# If the workers are not torn down by the service, they will

1907

# continue to run and use Google Compute Engine VM resources in the

1908

# user's project until they are explicitly terminated by the user.

1909

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1910

# policy except for small, manually supervised test jobs.

1911

#

1912

# If unknown or unspecified, the service will attempt to choose a reasonable

1913

# default.

1914

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1915

# Compute Engine API.

1916

"ipConfiguration": "A String", # Configuration for VM IPs.

1917

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1918

# service will choose a number of threads (according to the number of cores

1919

# on the selected machine type for batch, or 1 by convention for streaming).

1920

"poolArgs": { # Extra arguments for this worker pool.

1921

"a_key": "", # Properties of the object. Contains field @type with type URL.

1922

},

1923

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1924

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1925

# attempt to choose a reasonable default.

1926

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1927

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1928

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1929

# the form "regions/REGION/subnetworks/SUBNETWORK".

1930

"packages": [ # Packages to be installed on workers.

1931

{ # The packages that must be installed in order for a worker to run the

1932

# steps of the Cloud Dataflow job that will be assigned to its worker

1933

# pool.

1934

#

1935

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1936

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1937

# might use this to install jars containing the user's code and all of the

1938

# various dependencies (libraries, data files, etc.) required in order

1939

# for that code to run.

1940

"location": "A String", # The resource to read the package from. The supported resource type is:

1941

#

1942

# Google Cloud Storage:

1943

#

1944

# storage.googleapis.com/{bucket}

1945

# bucket.storage.googleapis.com/

1946

"name": "A String", # The name of the package.

1947

},

1948

],

1949

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1950

"algorithm": "A String", # The algorithm to use for autoscaling.

1951

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1952

},

1953

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1954

# select a default set of packages which are useful to worker

1955

# harnesses written in a particular language.

1956

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1957

# attempt to choose a reasonable default.

1958

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1959

"a_key": "A String",

1960

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1961

},

1962

],

1963

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1964

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1965

# of the job it replaced.

1966

#

1967

# When sending a `CreateJobRequest`, you can update a job by specifying it

1968

# here. The job named here is stopped, and its intermediate state is

1969

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1970

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1971

# A description of the user pipeline and stages through which it is executed.

1972

# Created by Cloud Dataflow service. Only retrieved with

1973

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1974

# form. This data is provided by the Dataflow service for ease of visualizing

1975

# the pipeline and interpretting Dataflow provided metrics.

1976

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1977

{ # Description of the type, names/ids, and input/outputs for a transform.

1978

"kind": "A String", # Type of transform.

1979

"name": "A String", # User provided name for this transform instance.

1980

"inputCollectionName": [ # User names for all collection inputs to this transform.

1981

"A String",

1982

],

1983

"displayData": [ # Transform-specific display data.

1984

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1985

"shortStrValue": "A String", # A possible additional shorter value to display.

1986

# For example a java_class_name_value of com.mypackage.MyDoFn

1987

# will be stored with MyDoFn as the short_str_value and

1988

# com.mypackage.MyDoFn as the java_class_name value.

1989

# short_str_value can be displayed and java_class_name_value

1990

# will be displayed as a tooltip.

1991

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1992

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1993

"url": "A String", # An optional full URL.

1994

"floatValue": 3.14, # Contains value if the data is of float type.

1995

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1996

# language namespace (i.e. python module) which defines the display data.

1997

# This allows a dax monitoring system to specially handle the data

1998

# and perform custom rendering.

1999

"javaClassValue": "A String", # Contains value if the data is of java class type.

2000

"label": "A String", # An optional label to display in a dax UI for the element.

2001

"boolValue": True or False, # Contains value if the data is of a boolean type.

2002

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2003

"key": "A String", # The key identifying the display data.

2004

# This is intended to be used as a label for the display data

2005

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2006

"int64Value": "A String", # Contains value if the data is of int64 type.

2007

},

2008

],

2009

"outputCollectionName": [ # User names for all collection outputs to this transform.

2010

"A String",

2011

],

2012

"id": "A String", # SDK generated id of this transform instance.

2013

},

2014

],

2015

"displayData": [ # Pipeline level display data.

2016

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2017

"shortStrValue": "A String", # A possible additional shorter value to display.

2018

# For example a java_class_name_value of com.mypackage.MyDoFn

2019

# will be stored with MyDoFn as the short_str_value and

2020

# com.mypackage.MyDoFn as the java_class_name value.

2021

# short_str_value can be displayed and java_class_name_value

2022

# will be displayed as a tooltip.

2023

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2024

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2025

"url": "A String", # An optional full URL.

2026

"floatValue": 3.14, # Contains value if the data is of float type.

2027

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2028

# language namespace (i.e. python module) which defines the display data.

2029

# This allows a dax monitoring system to specially handle the data

2030

# and perform custom rendering.

2031

"javaClassValue": "A String", # Contains value if the data is of java class type.

2032

"label": "A String", # An optional label to display in a dax UI for the element.

2033

"boolValue": True or False, # Contains value if the data is of a boolean type.

2034

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2035

"key": "A String", # The key identifying the display data.

2036

# This is intended to be used as a label for the display data

2037

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2038

"int64Value": "A String", # Contains value if the data is of int64 type.

2039

},

2040

],

2041

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2042

{ # Description of the composing transforms, names/ids, and input/outputs of a

2043

# stage of execution. Some composing transforms and sources may have been

2044

# generated by the Dataflow service during execution planning.

2045

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2046

{ # Description of an interstitial value between transforms in an execution

2047

# stage.

2048

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2049

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2050

# source is most closely associated.

2051

"name": "A String", # Dataflow service generated name for this source.

2052

},

2053

],

2054

"kind": "A String", # Type of tranform this stage is executing.

2055

"name": "A String", # Dataflow service generated name for this stage.

2056

"outputSource": [ # Output sources for this stage.

2057

{ # Description of an input or output of an execution stage.

2058

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2059

"sizeBytes": "A String", # Size of the source, if measurable.

2060

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2061

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2062

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2063

},

2064

],

2065

"inputSource": [ # Input sources for this stage.

2066

{ # Description of an input or output of an execution stage.

2067

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2068

"sizeBytes": "A String", # Size of the source, if measurable.

2069

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2070

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2071

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2072

},

2073

],

2074

"componentTransform": [ # Transforms that comprise this execution stage.

2075

{ # Description of a transform executed as part of an execution stage.

2076

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2077

"originalTransform": "A String", # User name for the original user transform with which this transform is

2078

# most closely associated.

2079

"name": "A String", # Dataflow service generated name for this source.

2080

},

2081

],

2082

"id": "A String", # Dataflow service generated id for this stage.

2083

},

2084

],

2085

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2086

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2087

{ # Defines a particular step within a Cloud Dataflow job.

2088

#

2089

# A job consists of multiple steps, each of which performs some

2090

# specific operation as part of the overall job. Data is typically

2091

# passed from one step to another as part of the job.

2092

#

2093

# Here's an example of a sequence of steps which together implement a

2094

# Map-Reduce job:

2095

#

2096

# * Read a collection of data from some source, parsing the

2097

# collection's elements.

2098

#

2099

# * Validate the elements.

2100

#

2101

# * Apply a user-defined function to map each element to some value

2102

# and extract an element-specific key value.

2103

#

2104

# * Group elements with the same key into a single element with

2105

# that key, transforming a multiply-keyed collection into a

2106

# uniquely-keyed collection.

2107

#

2108

# * Write the elements out to some data sink.

2109

#

2110

# Note that the Cloud Dataflow service may be used to run many different

2111

# types of jobs, not just Map-Reduce.

2112

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2113

"name": "A String", # The name that identifies the step. This must be unique for each

2114

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2115

"properties": { # Named properties associated with the step. Each kind of

2116

# predefined step has its own required set of properties.

2117

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2118

"a_key": "", # Properties of the object.

2119

},

2120

},

2121

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2122

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2123

"tempFiles": [ # A set of files the system should be aware of that are used

2124

# for temporary storage. These temporary files will be

2125

# removed on job completion.

2126

# No duplicates are allowed.

2127

# No file patterns are supported.

2128

#

2129

# The supported files are:

2130

#

2131

# Google Cloud Storage:

2132

#

2133

# storage.googleapis.com/{bucket}/{object}

2134

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

2135

"A String",

2136

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2137

"type": "A String", # The type of Cloud Dataflow job.

2138

"id": "A String", # The unique ID of this job.

2139

#

2140

# This field is set by the Cloud Dataflow service when the Job is

2141

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2142

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2143

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2144

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2145

# specified.

2146

#

2147

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2148

# terminal state. After a job has reached a terminal state, no

2149

# further state updates may be made.

2150

#

2151

# This field may be mutated by the Cloud Dataflow service;

2152

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2153

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2154

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2155

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2156

"a_key": { # Contains information about how a particular

2157

# google.dataflow.v1beta3.Step will be executed.

2158

"stepName": [ # The steps associated with the execution stage.

2159

# Note that stages may have several steps, and that a given step

2160

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2161

"A String",

2162

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2163

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2164

},

2165

},

2166

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2167

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2168

"failedLocation": [ # Zero or more messages describing locations that failed to respond.

2169

{ # Indicates which location failed to respond to a request for data.

2170

"name": "A String", # The name of the failed location.

2171

},

2172

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

}</pre>

</div>

<code class="details" id="list_next">list_next(previous_request, previous_response)</code>

2178

<pre>Retrieves the next page of results.

2179

2180

Args:

2181

previous_request: The request for the previous page. (required)

2182

previous_response: The response from the request for the previous page. (required)

2183

2184

Returns:

2185

A request object that you can call 'execute()' on to request the next

2186

page. Returns None if there are no more items in the collection.

</pre>

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2191

<code class="details" id="update">update(projectId, jobId, body, location=None, x__xgafv=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2192

<pre>Updates the state of an existing Cloud Dataflow job.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2193

2194

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2195

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

2196

jobId: string, The job ID. (required)

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2197

body: object, The request body. (required)

2198

The object takes the form of:

2199

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2200

{ # Defines a job to be run by the Cloud Dataflow service.

2201

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2202

# If this field is set, the service will ensure its uniqueness.

2203

# The request to create a job will fail if the service has knowledge of a

2204

# previously submitted job with the same client's ID and job name.

2205

# The caller may use this field to ensure idempotence of job

2206

# creation across retried attempts to create a job.

2207

# By default, the field is empty and, in that case, the service ignores it.

2208

"requestedState": "A String", # The job's requested state.

2209

#

2210

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2211

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2212

# also be used to directly set a job's requested state to

2213

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2214

# job if it has not already reached a terminal state.

2215

"name": "A String", # The user-specified Cloud Dataflow job name.

2216

#

2217

# Only one Job with a given name may exist in a project at any

2218

# given time. If a caller attempts to create a Job with the same

2219

# name as an already-existing Job, the attempt returns the

2220

# existing Job.

2221

#

2222

# The name must match the regular expression

2223

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

2224

"currentStateTime": "A String", # The timestamp associated with the current state.

2225

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2226

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2227

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

2228

"labels": { # User-defined labels for this job.

2229

#

2230

# The labels map can contain no more than 64 entries. Entries of the labels

2231

# map are UTF8 strings that comply with the following restrictions:

2232

#

2233

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2234

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2235

# * Both keys and values are additionally constrained to be <= 128 bytes in

2236

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2237

"a_key": "A String",

2238

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2239

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2240

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2241

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2242

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2243

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2244

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2245

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2246

"version": { # A structure describing which components and their versions of the service

2247

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2248

"a_key": "", # Properties of the object.

2249

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2250

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2251

# storage. The system will append the suffix "/temp-{JOBNAME} to

2252

# this resource prefix, where {JOBNAME} is the value of the

2253

# job_name field. The resulting bucket and object prefix is used

2254

# as the prefix of the resources used to store temporary data

2255

# needed during the job execution. NOTE: This will override the

2256

# value in taskrunner_settings.

2257

# The supported resource type is:

2258

#

2259

# Google Cloud Storage:

2260

#

2261

# storage.googleapis.com/{bucket}/{object}

2262

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2263

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2264

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2265

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2266

"dataset": "A String", # The dataset for the current project where various workflow

2267

# related tables are stored.

2268

#

2269

# The supported resource type is:

2270

#

2271

# Google BigQuery:

2272

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2273

"experiments": [ # The list of experiments to enable.

2274

"A String",

2275

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

2276

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2277

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2278

# options are passed through the service and are used to recreate the

2279

# SDK pipeline options on the worker in a language agnostic and platform

2280

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2281

"a_key": "", # Properties of the object.

2282

},

2283

"userAgent": { # A description of the process that generated the request.

2284

"a_key": "", # Properties of the object.

2285

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2286

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2287

# unspecified, the service will attempt to choose a reasonable

2288

# default. This should be in the form of the API service name,

2289

# e.g. "compute.googleapis.com".

2290

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2291

# specified in order for the job to have workers.

2292

{ # Describes one particular pool of Cloud Dataflow workers to be

2293

# instantiated by the Cloud Dataflow service in order to perform the

2294

# computations required by a job. Note that a workflow job may use

2295

# multiple pools, in order to match the various computational

2296

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2297

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2298

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2299

# using the standard Dataflow task runner. Users should ignore

2300

# this field.

2301

"workflowFileName": "A String", # The file to store the workflow in.

2302

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2303

# will not be uploaded.

2304

#

2305

# The supported resource type is:

2306

#

2307

# Google Cloud Storage:

2308

# storage.googleapis.com/{bucket}/{object}

2309

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2310

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2311

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2312

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

2313

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

2314

"vmId": "A String", # The ID string of the VM.

2315

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2316

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2317

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2318

# access the Cloud Dataflow API.

2319

"A String",

2320

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2321

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2322

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2323

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2324

# "shuffle/v1beta1".

2325

"workerId": "A String", # The ID of the worker running this pipeline.

2326

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2327

#

2328

# When workers access Google Cloud APIs, they logically do so via

2329

# relative URLs. If this field is specified, it supplies the base

2330

# URL to use for resolving these relative URLs. The normative

2331

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2332

# Locators".

2333

#

2334

# If not specified, the default value is "http://www.googleapis.com/"

2335

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2336

# "dataflow/v1b3/projects".

2337

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2338

# storage.

2339

#

2340

# The supported resource type is:

2341

#

2342

# Google Cloud Storage:

2343

#

2344

# storage.googleapis.com/{bucket}/{object}

2345

# bucket.storage.googleapis.com/{object}

2346

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2347

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2348

# taskrunner; e.g. "wheel".

2349

"languageHint": "A String", # The suggested backend language.

2350

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2351

# console.

2352

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2353

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2354

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2355

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2356

#

2357

# When workers access Google Cloud APIs, they logically do so via

2358

# relative URLs. If this field is specified, it supplies the base

2359

# URL to use for resolving these relative URLs. The normative

2360

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2361

# Locators".

2362

#

2363

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2364

"harnessCommand": "A String", # The command to launch the worker harness.

2365

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2366

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2367

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2368

# The supported resource type is:

2369

#

2370

# Google Cloud Storage:

2371

# storage.googleapis.com/{bucket}/{object}

2372

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2373

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2374

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2375

# are supported.

2376

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2377

# service will attempt to choose a reasonable default.

2378

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2379

# the service will use the network "default".

2380

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2381

# will attempt to choose a reasonable default.

2382

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2383

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2384

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2385

{ # Describes the data disk used by a workflow job.

2386

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2387

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2388

# attempt to choose a reasonable default.

2389

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2390

# must be a disk type appropriate to the project and zone in which

2391

# the workers will run. If unknown or unspecified, the service

2392

# will attempt to choose a reasonable default.

2393

#

2394

# For example, the standard persistent disk type is a resource name

2395

# typically ending in "pd-standard". If SSD persistent disks are

2396

# available, the resource name typically ends with "pd-ssd". The

2397

# actual valid values are defined the Google Compute Engine API,

2398

# not by the Cloud Dataflow API; consult the Google Compute Engine

2399

# documentation for more information about determining the set of

2400

# available disk types for a particular project and zone.

2401

#

2402

# Google Compute Engine Disk types are local to a particular

2403

# project in a particular zone, and so the resource name will

2404

# typically look something like this:

2405

#

2406

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2407

},

2408

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2409

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2410

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2411

# `TEARDOWN_NEVER`.

2412

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2413

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2414

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2415

# down.

2416

#

2417

# If the workers are not torn down by the service, they will

2418

# continue to run and use Google Compute Engine VM resources in the

2419

# user's project until they are explicitly terminated by the user.

2420

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2421

# policy except for small, manually supervised test jobs.

2422

#

2423

# If unknown or unspecified, the service will attempt to choose a reasonable

2424

# default.

2425

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2426

# Compute Engine API.

2427

"ipConfiguration": "A String", # Configuration for VM IPs.

2428

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2429

# service will choose a number of threads (according to the number of cores

2430

# on the selected machine type for batch, or 1 by convention for streaming).

2431

"poolArgs": { # Extra arguments for this worker pool.

2432

"a_key": "", # Properties of the object. Contains field @type with type URL.

2433

},

2434

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2435

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2436

# attempt to choose a reasonable default.

2437

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2438

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2439

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2440

# the form "regions/REGION/subnetworks/SUBNETWORK".

2441

"packages": [ # Packages to be installed on workers.

2442

{ # The packages that must be installed in order for a worker to run the

2443

# steps of the Cloud Dataflow job that will be assigned to its worker

2444

# pool.

2445

#

2446

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2447

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2448

# might use this to install jars containing the user's code and all of the

2449

# various dependencies (libraries, data files, etc.) required in order

2450

# for that code to run.

2451

"location": "A String", # The resource to read the package from. The supported resource type is:

2452

#

2453

# Google Cloud Storage:

2454

#

2455

# storage.googleapis.com/{bucket}

2456

# bucket.storage.googleapis.com/

2457

"name": "A String", # The name of the package.

2458

},

2459

],

2460

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

2461

"algorithm": "A String", # The algorithm to use for autoscaling.

2462

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

2463

},

2464

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

2465

# select a default set of packages which are useful to worker

2466

# harnesses written in a particular language.

2467

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2468

# attempt to choose a reasonable default.

2469

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2470

"a_key": "A String",

2471

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2472

},

2473

],

2474

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2475

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2476

# of the job it replaced.

2477

#

2478

# When sending a `CreateJobRequest`, you can update a job by specifying it

2479

# here. The job named here is stopped, and its intermediate state is

2480

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2481

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2482

# A description of the user pipeline and stages through which it is executed.

2483

# Created by Cloud Dataflow service. Only retrieved with

2484

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2485

# form. This data is provided by the Dataflow service for ease of visualizing

2486

# the pipeline and interpretting Dataflow provided metrics.

2487

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2488

{ # Description of the type, names/ids, and input/outputs for a transform.

2489

"kind": "A String", # Type of transform.

2490

"name": "A String", # User provided name for this transform instance.

2491

"inputCollectionName": [ # User names for all collection inputs to this transform.

2492

"A String",

2493

],

2494

"displayData": [ # Transform-specific display data.

2495

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2496

"shortStrValue": "A String", # A possible additional shorter value to display.

2497

# For example a java_class_name_value of com.mypackage.MyDoFn

2498

# will be stored with MyDoFn as the short_str_value and

2499

# com.mypackage.MyDoFn as the java_class_name value.

2500

# short_str_value can be displayed and java_class_name_value

2501

# will be displayed as a tooltip.

2502

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2503

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2504

"url": "A String", # An optional full URL.

2505

"floatValue": 3.14, # Contains value if the data is of float type.

2506

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2507

# language namespace (i.e. python module) which defines the display data.

2508

# This allows a dax monitoring system to specially handle the data

2509

# and perform custom rendering.

2510

"javaClassValue": "A String", # Contains value if the data is of java class type.

2511

"label": "A String", # An optional label to display in a dax UI for the element.

2512

"boolValue": True or False, # Contains value if the data is of a boolean type.

2513

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2514

"key": "A String", # The key identifying the display data.

2515

# This is intended to be used as a label for the display data

2516

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2517

"int64Value": "A String", # Contains value if the data is of int64 type.

2518

},

2519

],

2520

"outputCollectionName": [ # User names for all collection outputs to this transform.

2521

"A String",

2522

],

2523

"id": "A String", # SDK generated id of this transform instance.

2524

},

2525

],

2526

"displayData": [ # Pipeline level display data.

2527

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2528

"shortStrValue": "A String", # A possible additional shorter value to display.

2529

# For example a java_class_name_value of com.mypackage.MyDoFn

2530

# will be stored with MyDoFn as the short_str_value and

2531

# com.mypackage.MyDoFn as the java_class_name value.

2532

# short_str_value can be displayed and java_class_name_value

2533

# will be displayed as a tooltip.

2534

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2535

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2536

"url": "A String", # An optional full URL.

2537

"floatValue": 3.14, # Contains value if the data is of float type.

2538

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2539

# language namespace (i.e. python module) which defines the display data.

2540

# This allows a dax monitoring system to specially handle the data

2541

# and perform custom rendering.

2542

"javaClassValue": "A String", # Contains value if the data is of java class type.

2543

"label": "A String", # An optional label to display in a dax UI for the element.

2544

"boolValue": True or False, # Contains value if the data is of a boolean type.

2545

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2546

"key": "A String", # The key identifying the display data.

2547

# This is intended to be used as a label for the display data

2548

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2549

"int64Value": "A String", # Contains value if the data is of int64 type.

2550

},

2551

],

2552

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2553

{ # Description of the composing transforms, names/ids, and input/outputs of a

2554

# stage of execution. Some composing transforms and sources may have been

2555

# generated by the Dataflow service during execution planning.

2556

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2557

{ # Description of an interstitial value between transforms in an execution

2558

# stage.

2559

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2560

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2561

# source is most closely associated.

2562

"name": "A String", # Dataflow service generated name for this source.

2563

},

2564

],

2565

"kind": "A String", # Type of tranform this stage is executing.

2566

"name": "A String", # Dataflow service generated name for this stage.

2567

"outputSource": [ # Output sources for this stage.

2568

{ # Description of an input or output of an execution stage.

2569

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2570

"sizeBytes": "A String", # Size of the source, if measurable.

2571

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2572

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2573

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2574

},

2575

],

2576

"inputSource": [ # Input sources for this stage.

2577

{ # Description of an input or output of an execution stage.

2578

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2579

"sizeBytes": "A String", # Size of the source, if measurable.

2580

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2581

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2582

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2583

},

2584

],

2585

"componentTransform": [ # Transforms that comprise this execution stage.

2586

{ # Description of a transform executed as part of an execution stage.

2587

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2588

"originalTransform": "A String", # User name for the original user transform with which this transform is

2589

# most closely associated.

2590

"name": "A String", # Dataflow service generated name for this source.

2591

},

2592

],

2593

"id": "A String", # Dataflow service generated id for this stage.

2594

},

2595

],

2596

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2597

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2598

{ # Defines a particular step within a Cloud Dataflow job.

2599

#

2600

# A job consists of multiple steps, each of which performs some

2601

# specific operation as part of the overall job. Data is typically

2602

# passed from one step to another as part of the job.

2603

#

2604

# Here's an example of a sequence of steps which together implement a

2605

# Map-Reduce job:

2606

#

2607

# * Read a collection of data from some source, parsing the

2608

# collection's elements.

2609

#

2610

# * Validate the elements.

2611

#

2612

# * Apply a user-defined function to map each element to some value

2613

# and extract an element-specific key value.

2614

#

2615

# * Group elements with the same key into a single element with

2616

# that key, transforming a multiply-keyed collection into a

2617

# uniquely-keyed collection.

2618

#

2619

# * Write the elements out to some data sink.

2620

#

2621

# Note that the Cloud Dataflow service may be used to run many different

2622

# types of jobs, not just Map-Reduce.

2623

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2624

"name": "A String", # The name that identifies the step. This must be unique for each

2625

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2626

"properties": { # Named properties associated with the step. Each kind of

2627

# predefined step has its own required set of properties.

2628

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2629

"a_key": "", # Properties of the object.

2630

},

2631

},

2632

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2633

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2634

"tempFiles": [ # A set of files the system should be aware of that are used

2635

# for temporary storage. These temporary files will be

2636

# removed on job completion.

2637

# No duplicates are allowed.

2638

# No file patterns are supported.

2639

#

2640

# The supported files are:

2641

#

2642

# Google Cloud Storage:

2643

#

2644

# storage.googleapis.com/{bucket}/{object}

2645

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

2646

"A String",

2647

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2648

"type": "A String", # The type of Cloud Dataflow job.

2649

"id": "A String", # The unique ID of this job.

2650

#

2651

# This field is set by the Cloud Dataflow service when the Job is

2652

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2653

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2654

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2655

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2656

# specified.

2657

#

2658

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2659

# terminal state. After a job has reached a terminal state, no

2660

# further state updates may be made.

2661

#

2662

# This field may be mutated by the Cloud Dataflow service;

2663

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2664

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2665

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2666

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2667

"a_key": { # Contains information about how a particular

2668

# google.dataflow.v1beta3.Step will be executed.

2669

"stepName": [ # The steps associated with the execution stage.

2670

# Note that stages may have several steps, and that a given step

2671

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2672

"A String",

2673

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2674

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2675

},

2676

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2677

}

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2678

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2679

location: string, The location that contains this job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2680

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2681

Allowed values

2682

1 - v1 error format

2683

2 - v2 error format

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2684

2685

Returns:

2686

An object of the form:

2687

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2688

{ # Defines a job to be run by the Cloud Dataflow service.

2689

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2690

# If this field is set, the service will ensure its uniqueness.

2691

# The request to create a job will fail if the service has knowledge of a

2692

# previously submitted job with the same client's ID and job name.

2693

# The caller may use this field to ensure idempotence of job

2694

# creation across retried attempts to create a job.

2695

# By default, the field is empty and, in that case, the service ignores it.

2696

"requestedState": "A String", # The job's requested state.

2697

#

2698

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2699

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2700

# also be used to directly set a job's requested state to

2701

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2702

# job if it has not already reached a terminal state.

2703

"name": "A String", # The user-specified Cloud Dataflow job name.

2704

#

2705

# Only one Job with a given name may exist in a project at any

2706

# given time. If a caller attempts to create a Job with the same

2707

# name as an already-existing Job, the attempt returns the

2708

# existing Job.

2709

#

2710

# The name must match the regular expression

2711

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

2712

"currentStateTime": "A String", # The timestamp associated with the current state.

2713

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2714

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2715

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

2716

"labels": { # User-defined labels for this job.

2717

#

2718

# The labels map can contain no more than 64 entries. Entries of the labels

2719

# map are UTF8 strings that comply with the following restrictions:

2720

#

2721

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2722

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2723

# * Both keys and values are additionally constrained to be <= 128 bytes in

2724

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2725

"a_key": "A String",

2726

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2727

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2728

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2729

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2730

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2731

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2732

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2733

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2734

"version": { # A structure describing which components and their versions of the service

2735

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2736

"a_key": "", # Properties of the object.

2737

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2738

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2739

# storage. The system will append the suffix "/temp-{JOBNAME} to

2740

# this resource prefix, where {JOBNAME} is the value of the

2741

# job_name field. The resulting bucket and object prefix is used

2742

# as the prefix of the resources used to store temporary data

2743

# needed during the job execution. NOTE: This will override the

2744

# value in taskrunner_settings.

2745

# The supported resource type is:

2746

#

2747

# Google Cloud Storage:

2748

#

2749

# storage.googleapis.com/{bucket}/{object}

2750

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2751

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2752

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2753

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2754

"dataset": "A String", # The dataset for the current project where various workflow

2755

# related tables are stored.

2756

#

2757

# The supported resource type is:

2758

#

2759

# Google BigQuery:

2760

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2761

"experiments": [ # The list of experiments to enable.

2762

"A String",

2763

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

2764

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2765

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2766

# options are passed through the service and are used to recreate the

2767

# SDK pipeline options on the worker in a language agnostic and platform

2768

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2769

"a_key": "", # Properties of the object.

2770

},

2771

"userAgent": { # A description of the process that generated the request.

2772

"a_key": "", # Properties of the object.

2773

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2774

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2775

# unspecified, the service will attempt to choose a reasonable

2776

# default. This should be in the form of the API service name,

2777

# e.g. "compute.googleapis.com".

2778

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2779

# specified in order for the job to have workers.

2780

{ # Describes one particular pool of Cloud Dataflow workers to be

2781

# instantiated by the Cloud Dataflow service in order to perform the

2782

# computations required by a job. Note that a workflow job may use

2783

# multiple pools, in order to match the various computational

2784

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2785

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2786

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2787

# using the standard Dataflow task runner. Users should ignore

2788

# this field.

2789

"workflowFileName": "A String", # The file to store the workflow in.

2790

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2791

# will not be uploaded.

2792

#

2793

# The supported resource type is:

2794

#

2795

# Google Cloud Storage:

2796

# storage.googleapis.com/{bucket}/{object}

2797

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2798

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2799

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2800

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

2801

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

2802

"vmId": "A String", # The ID string of the VM.

2803

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2804

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2805

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2806

# access the Cloud Dataflow API.

2807

"A String",

2808

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2809

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2810

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2811

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2812

# "shuffle/v1beta1".

2813

"workerId": "A String", # The ID of the worker running this pipeline.

2814

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2815

#

2816

# When workers access Google Cloud APIs, they logically do so via

2817

# relative URLs. If this field is specified, it supplies the base

2818

# URL to use for resolving these relative URLs. The normative

2819

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2820

# Locators".

2821

#

2822

# If not specified, the default value is "http://www.googleapis.com/"

2823

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2824

# "dataflow/v1b3/projects".

2825

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2826

# storage.

2827

#

2828

# The supported resource type is:

2829

#

2830

# Google Cloud Storage:

2831

#

2832

# storage.googleapis.com/{bucket}/{object}

2833

# bucket.storage.googleapis.com/{object}

2834

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2835

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2836

# taskrunner; e.g. "wheel".

2837

"languageHint": "A String", # The suggested backend language.

2838

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2839

# console.

2840

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2841

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2842

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2843

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2844

#

2845

# When workers access Google Cloud APIs, they logically do so via

2846

# relative URLs. If this field is specified, it supplies the base

2847

# URL to use for resolving these relative URLs. The normative

2848

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2849

# Locators".

2850

#

2851

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2852

"harnessCommand": "A String", # The command to launch the worker harness.

2853

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2854

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2855

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2856

# The supported resource type is:

2857

#

2858

# Google Cloud Storage:

2859

# storage.googleapis.com/{bucket}/{object}

2860

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2861

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2862

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2863

# are supported.

2864

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2865

# service will attempt to choose a reasonable default.

2866

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2867

# the service will use the network "default".

2868

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2869

# will attempt to choose a reasonable default.

2870

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2871

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2872

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2873

{ # Describes the data disk used by a workflow job.

2874

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2875

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2876

# attempt to choose a reasonable default.

2877

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2878

# must be a disk type appropriate to the project and zone in which

2879

# the workers will run. If unknown or unspecified, the service

2880

# will attempt to choose a reasonable default.

2881

#

2882

# For example, the standard persistent disk type is a resource name

2883

# typically ending in "pd-standard". If SSD persistent disks are

2884

# available, the resource name typically ends with "pd-ssd". The

2885

# actual valid values are defined the Google Compute Engine API,

2886

# not by the Cloud Dataflow API; consult the Google Compute Engine

2887

# documentation for more information about determining the set of

2888

# available disk types for a particular project and zone.

2889

#

2890

# Google Compute Engine Disk types are local to a particular

2891

# project in a particular zone, and so the resource name will

2892

# typically look something like this:

2893

#

2894

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2895

},

2896

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2897

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2898

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2899

# `TEARDOWN_NEVER`.

2900

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2901

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2902

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2903

# down.

2904

#

2905

# If the workers are not torn down by the service, they will

2906

# continue to run and use Google Compute Engine VM resources in the

2907

# user's project until they are explicitly terminated by the user.

2908

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2909

# policy except for small, manually supervised test jobs.

2910

#

2911

# If unknown or unspecified, the service will attempt to choose a reasonable

2912

# default.

2913

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2914

# Compute Engine API.

2915

"ipConfiguration": "A String", # Configuration for VM IPs.

2916

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2917

# service will choose a number of threads (according to the number of cores

2918

# on the selected machine type for batch, or 1 by convention for streaming).

2919

"poolArgs": { # Extra arguments for this worker pool.

2920

"a_key": "", # Properties of the object. Contains field @type with type URL.

2921

},

2922

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2923

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2924

# attempt to choose a reasonable default.

2925

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2926

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2927

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2928

# the form "regions/REGION/subnetworks/SUBNETWORK".

2929

"packages": [ # Packages to be installed on workers.

2930

{ # The packages that must be installed in order for a worker to run the

2931

# steps of the Cloud Dataflow job that will be assigned to its worker

2932

# pool.

2933

#

2934

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2935

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2936

# might use this to install jars containing the user's code and all of the

2937

# various dependencies (libraries, data files, etc.) required in order

2938

# for that code to run.

2939

"location": "A String", # The resource to read the package from. The supported resource type is:

2940

#

2941

# Google Cloud Storage:

2942

#

2943

# storage.googleapis.com/{bucket}

2944

# bucket.storage.googleapis.com/

2945

"name": "A String", # The name of the package.

2946

},

2947

],

2948

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

2949

"algorithm": "A String", # The algorithm to use for autoscaling.

2950

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

2951

},

2952

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

2953

# select a default set of packages which are useful to worker

2954

# harnesses written in a particular language.

2955

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2956

# attempt to choose a reasonable default.

2957

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2958

"a_key": "A String",

2959

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2960

},

2961

],

2962

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2963

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2964

# of the job it replaced.

2965

#

2966

# When sending a `CreateJobRequest`, you can update a job by specifying it

2967

# here. The job named here is stopped, and its intermediate state is

2968

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2969

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2970

# A description of the user pipeline and stages through which it is executed.

2971

# Created by Cloud Dataflow service. Only retrieved with

2972

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2973

# form. This data is provided by the Dataflow service for ease of visualizing

2974

# the pipeline and interpretting Dataflow provided metrics.

2975

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2976

{ # Description of the type, names/ids, and input/outputs for a transform.

2977

"kind": "A String", # Type of transform.

2978

"name": "A String", # User provided name for this transform instance.

2979

"inputCollectionName": [ # User names for all collection inputs to this transform.

2980

"A String",

2981

],

2982

"displayData": [ # Transform-specific display data.

2983

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2984

"shortStrValue": "A String", # A possible additional shorter value to display.

2985

# For example a java_class_name_value of com.mypackage.MyDoFn

2986

# will be stored with MyDoFn as the short_str_value and

2987

# com.mypackage.MyDoFn as the java_class_name value.

2988

# short_str_value can be displayed and java_class_name_value

2989

# will be displayed as a tooltip.

2990

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2991

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2992

"url": "A String", # An optional full URL.

2993

"floatValue": 3.14, # Contains value if the data is of float type.

2994

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2995

# language namespace (i.e. python module) which defines the display data.

2996

# This allows a dax monitoring system to specially handle the data

2997

# and perform custom rendering.

2998

"javaClassValue": "A String", # Contains value if the data is of java class type.

2999

"label": "A String", # An optional label to display in a dax UI for the element.

3000

"boolValue": True or False, # Contains value if the data is of a boolean type.

3001

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3002

"key": "A String", # The key identifying the display data.

3003

# This is intended to be used as a label for the display data

3004

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3005

"int64Value": "A String", # Contains value if the data is of int64 type.

3006

},

3007

],

3008

"outputCollectionName": [ # User names for all collection outputs to this transform.

3009

"A String",

3010

],

3011

"id": "A String", # SDK generated id of this transform instance.

3012

},

3013

],

3014

"displayData": [ # Pipeline level display data.

3015

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3016

"shortStrValue": "A String", # A possible additional shorter value to display.

3017

# For example a java_class_name_value of com.mypackage.MyDoFn

3018

# will be stored with MyDoFn as the short_str_value and

3019

# com.mypackage.MyDoFn as the java_class_name value.

3020

# short_str_value can be displayed and java_class_name_value

3021

# will be displayed as a tooltip.

3022

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3023

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3024

"url": "A String", # An optional full URL.

3025

"floatValue": 3.14, # Contains value if the data is of float type.

3026

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

3027

# language namespace (i.e. python module) which defines the display data.

3028

# This allows a dax monitoring system to specially handle the data

3029

# and perform custom rendering.

3030

"javaClassValue": "A String", # Contains value if the data is of java class type.

3031

"label": "A String", # An optional label to display in a dax UI for the element.

3032

"boolValue": True or False, # Contains value if the data is of a boolean type.

3033

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3034

"key": "A String", # The key identifying the display data.

3035

# This is intended to be used as a label for the display data

3036

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3037

"int64Value": "A String", # Contains value if the data is of int64 type.

3038

},

3039

],

3040

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

3041

{ # Description of the composing transforms, names/ids, and input/outputs of a

3042

# stage of execution. Some composing transforms and sources may have been

3043

# generated by the Dataflow service during execution planning.

3044

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

3045

{ # Description of an interstitial value between transforms in an execution

3046

# stage.

3047

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3048

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3049

# source is most closely associated.

3050

"name": "A String", # Dataflow service generated name for this source.

3051

},

3052

],

3053

"kind": "A String", # Type of tranform this stage is executing.

3054

"name": "A String", # Dataflow service generated name for this stage.

3055

"outputSource": [ # Output sources for this stage.

3056

{ # Description of an input or output of an execution stage.

3057

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3058

"sizeBytes": "A String", # Size of the source, if measurable.

3059

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3060

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3061

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3062

},

3063

],

3064

"inputSource": [ # Input sources for this stage.

3065

{ # Description of an input or output of an execution stage.

3066

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3067

"sizeBytes": "A String", # Size of the source, if measurable.

3068

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3069

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3070

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3071

},

3072

],

3073

"componentTransform": [ # Transforms that comprise this execution stage.

3074

{ # Description of a transform executed as part of an execution stage.

3075

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3076

"originalTransform": "A String", # User name for the original user transform with which this transform is

3077

# most closely associated.

3078

"name": "A String", # Dataflow service generated name for this source.

3079

},

3080

],

3081

"id": "A String", # Dataflow service generated id for this stage.

3082

},

3083

],

3084

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3085

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3086

{ # Defines a particular step within a Cloud Dataflow job.

3087

#

3088

# A job consists of multiple steps, each of which performs some

3089

# specific operation as part of the overall job. Data is typically

3090

# passed from one step to another as part of the job.

3091

#

3092

# Here's an example of a sequence of steps which together implement a

3093

# Map-Reduce job:

3094

#

3095

# * Read a collection of data from some source, parsing the

3096

# collection's elements.

3097

#

3098

# * Validate the elements.

3099

#

3100

# * Apply a user-defined function to map each element to some value

3101

# and extract an element-specific key value.

3102

#

3103

# * Group elements with the same key into a single element with

3104

# that key, transforming a multiply-keyed collection into a

3105

# uniquely-keyed collection.

3106

#

3107

# * Write the elements out to some data sink.

3108

#

3109

# Note that the Cloud Dataflow service may be used to run many different

3110

# types of jobs, not just Map-Reduce.

3111

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3112

"name": "A String", # The name that identifies the step. This must be unique for each

3113

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3114

"properties": { # Named properties associated with the step. Each kind of

3115

# predefined step has its own required set of properties.

3116

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3117

"a_key": "", # Properties of the object.

3118

},

3119

},

3120

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3121

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3122

"tempFiles": [ # A set of files the system should be aware of that are used

3123

# for temporary storage. These temporary files will be

3124

# removed on job completion.

3125

# No duplicates are allowed.

3126

# No file patterns are supported.

3127

#

3128

# The supported files are:

3129

#

3130

# Google Cloud Storage:

3131

#

3132

# storage.googleapis.com/{bucket}/{object}

3133

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

3134

"A String",

3135

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3136

"type": "A String", # The type of Cloud Dataflow job.

3137

"id": "A String", # The unique ID of this job.

3138

#

3139

# This field is set by the Cloud Dataflow service when the Job is

3140

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3141

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3142

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3143

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

3144

# specified.

3145

#

3146

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

3147

# terminal state. After a job has reached a terminal state, no

3148

# further state updates may be made.

3149

#

3150

# This field may be mutated by the Cloud Dataflow service;

3151

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3152

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

3153

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3154

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3155

"a_key": { # Contains information about how a particular

3156

# google.dataflow.v1beta3.Step will be executed.

3157

"stepName": [ # The steps associated with the execution stage.

3158

# Note that stages may have several steps, and that a given step

3159

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

3160

"A String",

3161

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

3162

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

3163

},

3164

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3165

}</pre>

Nathaniel Manista