Blame - docs/dyn/dataflow_v1b3.projects.jobs.html - platform/external/python/google-api-python-client

2016-08-16 12:44:29 -0700

[diff] [blame]

78

<code><a href="dataflow_v1b3.projects.jobs.debug.html">debug()</a></code>

79

</p>

80

<p class="firstline">Returns the debug Resource.</p>

81

82

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

83

<code><a href="dataflow_v1b3.projects.jobs.messages.html">messages()</a></code>

84

</p>

85

<p class="firstline">Returns the messages Resource.</p>

86

87

88

<code><a href="dataflow_v1b3.projects.jobs.workItems.html">workItems()</a></code>

89

</p>

90

<p class="firstline">Returns the workItems Resource.</p>

91

92

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

93

<code><a href="#create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

94

<p class="firstline">Creates a Cloud Dataflow job.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

95

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

96

<code><a href="#get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

97

<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

98

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

99

<code><a href="#getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</a></code></p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

100

<p class="firstline">Request the job status.</p>

101

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

102

<code><a href="#list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

103

<p class="firstline">List the jobs of a project.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

104

105

<code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>

106

<p class="firstline">Retrieves the next page of results.</p>

107

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

108

<code><a href="#update">update(projectId, jobId, body, location=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

109

<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

110

<h3>Method Details</h3>

111

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

112

<code class="details" id="create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

113

<pre>Creates a Cloud Dataflow job.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

114

115

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

116

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

117

body: object, The request body. (required)

118

The object takes the form of:

119

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

120

{ # Defines a job to be run by the Cloud Dataflow service.

121

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

122

# If this field is set, the service will ensure its uniqueness.

123

# The request to create a job will fail if the service has knowledge of a

124

# previously submitted job with the same client's ID and job name.

125

# The caller may use this field to ensure idempotence of job

126

# creation across retried attempts to create a job.

127

# By default, the field is empty and, in that case, the service ignores it.

128

"requestedState": "A String", # The job's requested state.

129

#

130

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

131

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

132

# also be used to directly set a job's requested state to

133

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

134

# job if it has not already reached a terminal state.

135

"name": "A String", # The user-specified Cloud Dataflow job name.

136

#

137

# Only one Job with a given name may exist in a project at any

138

# given time. If a caller attempts to create a Job with the same

139

# name as an already-existing Job, the attempt returns the

140

# existing Job.

141

#

142

# The name must match the regular expression

143

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

144

"currentStateTime": "A String", # The timestamp associated with the current state.

145

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

146

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

147

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

148

"labels": { # User-defined labels for this job.

149

#

150

# The labels map can contain no more than 64 entries. Entries of the labels

151

# map are UTF8 strings that comply with the following restrictions:

152

#

153

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

154

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

155

# * Both keys and values are additionally constrained to be <= 128 bytes in

156

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

157

"a_key": "A String",

158

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

159

"location": "A String", # The location that contains this job.

160

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

161

# Cloud Dataflow service.

162

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

163

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

164

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

165

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

166

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

167

"version": { # A structure describing which components and their versions of the service

168

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

169

"a_key": "", # Properties of the object.

170

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

171

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

172

# storage. The system will append the suffix "/temp-{JOBNAME} to

173

# this resource prefix, where {JOBNAME} is the value of the

174

# job_name field. The resulting bucket and object prefix is used

175

# as the prefix of the resources used to store temporary data

176

# needed during the job execution. NOTE: This will override the

177

# value in taskrunner_settings.

178

# The supported resource type is:

179

#

180

# Google Cloud Storage:

181

#

182

# storage.googleapis.com/{bucket}/{object}

183

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

184

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

185

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

186

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

187

"dataset": "A String", # The dataset for the current project where various workflow

188

# related tables are stored.

189

#

190

# The supported resource type is:

191

#

192

# Google BigQuery:

193

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

194

"experiments": [ # The list of experiments to enable.

195

"A String",

196

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

197

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

198

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

199

# options are passed through the service and are used to recreate the

200

# SDK pipeline options on the worker in a language agnostic and platform

201

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

202

"a_key": "", # Properties of the object.

203

},

204

"userAgent": { # A description of the process that generated the request.

205

"a_key": "", # Properties of the object.

206

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

207

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

208

# unspecified, the service will attempt to choose a reasonable

209

# default. This should be in the form of the API service name,

210

# e.g. "compute.googleapis.com".

211

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

212

# specified in order for the job to have workers.

213

{ # Describes one particular pool of Cloud Dataflow workers to be

214

# instantiated by the Cloud Dataflow service in order to perform the

215

# computations required by a job. Note that a workflow job may use

216

# multiple pools, in order to match the various computational

217

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

218

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

219

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

220

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

221

# are supported.

222

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

223

# service will attempt to choose a reasonable default.

224

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

225

# the service will use the network "default".

226

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

227

# will attempt to choose a reasonable default.

228

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

229

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

230

"metadata": { # Metadata to set on the Google Compute Engine VMs.

231

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

232

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

233

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

234

# Compute Engine API.

235

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

236

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

237

# `TEARDOWN_NEVER`.

238

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

239

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

240

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

241

# down.

242

#

243

# If the workers are not torn down by the service, they will

244

# continue to run and use Google Compute Engine VM resources in the

245

# user's project until they are explicitly terminated by the user.

246

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

247

# policy except for small, manually supervised test jobs.

248

#

249

# If unknown or unspecified, the service will attempt to choose a reasonable

250

# default.

251

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

252

# service will choose a number of threads (according to the number of cores

253

# on the selected machine type for batch, or 1 by convention for streaming).

254

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

255

# the form "regions/REGION/subnetworks/SUBNETWORK".

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

256

"poolArgs": { # Extra arguments for this worker pool.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

257

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

258

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

259

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

260

# execute the job. If zero or unspecified, the service will

261

# attempt to choose a reasonable default.

262

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

263

# using the standard Dataflow task runner. Users should ignore

264

# this field.

265

"workflowFileName": "A String", # The file to store the workflow in.

266

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

267

# will not be uploaded.

268

#

269

# The supported resource type is:

270

#

271

# Google Cloud Storage:

272

# storage.googleapis.com/{bucket}/{object}

273

# bucket.storage.googleapis.com/{object}

274

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

275

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

276

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

277

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

278

"vmId": "A String", # The ID string of the VM.

279

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

280

# taskrunner; e.g. "wheel".

281

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

282

# taskrunner; e.g. "root".

283

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

284

# access the Cloud Dataflow API.

285

"A String",

286

],

287

"languageHint": "A String", # The suggested backend language.

288

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

289

# console.

290

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

291

"logDir": "A String", # The directory on the VM to store logs.

292

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

293

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

294

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

295

# "shuffle/v1beta1".

296

"workerId": "A String", # The ID of the worker running this pipeline.

297

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

298

#

299

# When workers access Google Cloud APIs, they logically do so via

300

# relative URLs. If this field is specified, it supplies the base

301

# URL to use for resolving these relative URLs. The normative

302

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

303

# Locators".

304

#

305

# If not specified, the default value is "http://www.googleapis.com/"

306

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

307

# "dataflow/v1b3/projects".

308

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

309

# storage.

310

#

311

# The supported resource type is:

312

#

313

# Google Cloud Storage:

314

#

315

# storage.googleapis.com/{bucket}/{object}

316

# bucket.storage.googleapis.com/{object}

317

},

318

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

319

"harnessCommand": "A String", # The command to launch the worker harness.

320

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

321

# temporary storage.

322

#

323

# The supported resource type is:

324

#

325

# Google Cloud Storage:

326

# storage.googleapis.com/{bucket}/{object}

327

# bucket.storage.googleapis.com/{object}

328

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

329

#

330

# When workers access Google Cloud APIs, they logically do so via

331

# relative URLs. If this field is specified, it supplies the base

332

# URL to use for resolving these relative URLs. The normative

333

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

334

# Locators".

335

#

336

# If not specified, the default value is "http://www.googleapis.com/"

337

},

338

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

339

# select a default set of packages which are useful to worker

340

# harnesses written in a particular language.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

341

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

342

{ # The packages that must be installed in order for a worker to run the

343

# steps of the Cloud Dataflow job that will be assigned to its worker

344

# pool.

345

#

346

# This is the mechanism by which the Cloud Dataflow SDK causes code to

347

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

348

# might use this to install jars containing the user's code and all of the

349

# various dependencies (libraries, data files, etc.) required in order

350

# for that code to run.

351

"location": "A String", # The resource to read the package from. The supported resource type is:

352

#

353

# Google Cloud Storage:

354

#

355

# storage.googleapis.com/{bucket}

356

# bucket.storage.googleapis.com/

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

357

"name": "A String", # The name of the package.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

358

},

359

],

360

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

361

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

362

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

363

},

364

"dataDisks": [ # Data disks that are used by a VM in this workflow.

365

{ # Describes the data disk used by a workflow job.

366

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

367

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

368

# attempt to choose a reasonable default.

369

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

370

# must be a disk type appropriate to the project and zone in which

371

# the workers will run. If unknown or unspecified, the service

372

# will attempt to choose a reasonable default.

373

#

374

# For example, the standard persistent disk type is a resource name

375

# typically ending in "pd-standard". If SSD persistent disks are

376

# available, the resource name typically ends with "pd-ssd". The

377

# actual valid values are defined the Google Compute Engine API,

378

# not by the Cloud Dataflow API; consult the Google Compute Engine

379

# documentation for more information about determining the set of

380

# available disk types for a particular project and zone.

381

#

382

# Google Compute Engine Disk types are local to a particular

383

# project in a particular zone, and so the resource name will

384

# typically look something like this:

385

#

386

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

387

},

388

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

389

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

390

# attempt to choose a reasonable default.

391

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

392

# harness, residing in Google Container Registry.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

393

},

394

],

395

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

396

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

397

# A description of the user pipeline and stages through which it is executed.

398

# Created by Cloud Dataflow service. Only retrieved with

399

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

400

# form. This data is provided by the Dataflow service for ease of visualizing

401

# the pipeline and interpretting Dataflow provided metrics.

402

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

403

{ # Description of the type, names/ids, and input/outputs for a transform.

404

"kind": "A String", # Type of transform.

405

"name": "A String", # User provided name for this transform instance.

406

"inputCollectionName": [ # User names for all collection inputs to this transform.

407

"A String",

408

],

409

"displayData": [ # Transform-specific display data.

410

{ # Data provided with a pipeline or transform to provide descriptive info.

411

"key": "A String", # The key identifying the display data.

412

# This is intended to be used as a label for the display data

413

# when viewed in a dax monitoring system.

414

"shortStrValue": "A String", # A possible additional shorter value to display.

415

# For example a java_class_name_value of com.mypackage.MyDoFn

416

# will be stored with MyDoFn as the short_str_value and

417

# com.mypackage.MyDoFn as the java_class_name value.

418

# short_str_value can be displayed and java_class_name_value

419

# will be displayed as a tooltip.

420

"timestampValue": "A String", # Contains value if the data is of timestamp type.

421

"url": "A String", # An optional full URL.

422

"floatValue": 3.14, # Contains value if the data is of float type.

423

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

424

# language namespace (i.e. python module) which defines the display data.

425

# This allows a dax monitoring system to specially handle the data

426

# and perform custom rendering.

427

"javaClassValue": "A String", # Contains value if the data is of java class type.

428

"label": "A String", # An optional label to display in a dax UI for the element.

429

"boolValue": True or False, # Contains value if the data is of a boolean type.

430

"strValue": "A String", # Contains value if the data is of string type.

431

"durationValue": "A String", # Contains value if the data is of duration type.

432

"int64Value": "A String", # Contains value if the data is of int64 type.

433

},

434

],

435

"outputCollectionName": [ # User names for all collection outputs to this transform.

436

"A String",

437

],

438

"id": "A String", # SDK generated id of this transform instance.

439

},

440

],

441

"displayData": [ # Pipeline level display data.

442

{ # Data provided with a pipeline or transform to provide descriptive info.

443

"key": "A String", # The key identifying the display data.

444

# This is intended to be used as a label for the display data

445

# when viewed in a dax monitoring system.

446

"shortStrValue": "A String", # A possible additional shorter value to display.

447

# For example a java_class_name_value of com.mypackage.MyDoFn

448

# will be stored with MyDoFn as the short_str_value and

449

# com.mypackage.MyDoFn as the java_class_name value.

450

# short_str_value can be displayed and java_class_name_value

451

# will be displayed as a tooltip.

452

"timestampValue": "A String", # Contains value if the data is of timestamp type.

453

"url": "A String", # An optional full URL.

454

"floatValue": 3.14, # Contains value if the data is of float type.

455

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

456

# language namespace (i.e. python module) which defines the display data.

457

# This allows a dax monitoring system to specially handle the data

458

# and perform custom rendering.

459

"javaClassValue": "A String", # Contains value if the data is of java class type.

460

"label": "A String", # An optional label to display in a dax UI for the element.

461

"boolValue": True or False, # Contains value if the data is of a boolean type.

462

"strValue": "A String", # Contains value if the data is of string type.

463

"durationValue": "A String", # Contains value if the data is of duration type.

464

"int64Value": "A String", # Contains value if the data is of int64 type.

465

},

466

],

467

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

468

{ # Description of the composing transforms, names/ids, and input/outputs of a

469

# stage of execution. Some composing transforms and sources may have been

470

# generated by the Dataflow service during execution planning.

471

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

472

{ # Description of an interstitial value between transforms in an execution

473

# stage.

474

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

475

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

476

# source is most closely associated.

477

"name": "A String", # Dataflow service generated name for this source.

478

},

479

],

480

"kind": "A String", # Type of tranform this stage is executing.

481

"name": "A String", # Dataflow service generated name for this stage.

482

"outputSource": [ # Output sources for this stage.

483

{ # Description of an input or output of an execution stage.

484

"userName": "A String", # Human-readable name for this source; may be user or system generated.

485

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

486

# source is most closely associated.

487

"name": "A String", # Dataflow service generated name for this source.

488

"sizeBytes": "A String", # Size of the source, if measurable.

489

},

490

],

491

"inputSource": [ # Input sources for this stage.

492

{ # Description of an input or output of an execution stage.

493

"userName": "A String", # Human-readable name for this source; may be user or system generated.

494

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

495

# source is most closely associated.

496

"name": "A String", # Dataflow service generated name for this source.

497

"sizeBytes": "A String", # Size of the source, if measurable.

498

},

499

],

500

"componentTransform": [ # Transforms that comprise this execution stage.

501

{ # Description of a transform executed as part of an execution stage.

502

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

503

"originalTransform": "A String", # User name for the original user transform with which this transform is

504

# most closely associated.

505

"name": "A String", # Dataflow service generated name for this source.

506

},

507

],

508

"id": "A String", # Dataflow service generated id for this stage.

509

},

510

],

511

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

512

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

513

{ # Defines a particular step within a Cloud Dataflow job.

514

#

515

# A job consists of multiple steps, each of which performs some

516

# specific operation as part of the overall job. Data is typically

517

# passed from one step to another as part of the job.

518

#

519

# Here's an example of a sequence of steps which together implement a

520

# Map-Reduce job:

521

#

522

# * Read a collection of data from some source, parsing the

523

# collection's elements.

524

#

525

# * Validate the elements.

526

#

527

# * Apply a user-defined function to map each element to some value

528

# and extract an element-specific key value.

529

#

530

# * Group elements with the same key into a single element with

531

# that key, transforming a multiply-keyed collection into a

532

# uniquely-keyed collection.

533

#

534

# * Write the elements out to some data sink.

535

#

536

# Note that the Cloud Dataflow service may be used to run many different

537

# types of jobs, not just Map-Reduce.

538

"kind": "A String", # The kind of step in the Cloud Dataflow job.

539

"properties": { # Named properties associated with the step. Each kind of

540

# predefined step has its own required set of properties.

541

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

542

"a_key": "", # Properties of the object.

543

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

544

"name": "A String", # The name that identifies the step. This must be unique for each

545

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

546

},

547

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

548

"currentState": "A String", # The current state of the job.

549

#

550

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

551

# specified.

552

#

553

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

554

# terminal state. After a job has reached a terminal state, no

555

# further state updates may be made.

556

#

557

# This field may be mutated by the Cloud Dataflow service;

558

# callers cannot mutate it.

559

"tempFiles": [ # A set of files the system should be aware of that are used

560

# for temporary storage. These temporary files will be

561

# removed on job completion.

562

# No duplicates are allowed.

563

# No file patterns are supported.

564

#

565

# The supported files are:

566

#

567

# Google Cloud Storage:

568

#

569

# storage.googleapis.com/{bucket}/{object}

570

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

571

"A String",

572

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

573

"type": "A String", # The type of Cloud Dataflow job.

574

"id": "A String", # The unique ID of this job.

575

#

576

# This field is set by the Cloud Dataflow service when the Job is

577

# created, and is immutable for the life of the job.

578

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

579

# of the job it replaced.

580

#

581

# When sending a `CreateJobRequest`, you can update a job by specifying it

582

# here. The job named here is stopped, and its intermediate state is

583

# transferred to this job.

584

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

585

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

586

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

587

"a_key": { # Contains information about how a particular

588

# google.dataflow.v1beta3.Step will be executed.

589

"stepName": [ # The steps associated with the execution stage.

590

# Note that stages may have several steps, and that a given step

591

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

592

"A String",

593

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

594

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

595

},

596

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

597

}

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

598

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

599

location: string, The location that contains this job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

600

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

Allowed values

1 - v1 error format

2 - v2 error format

replaceJobId: string, Deprecated. This field is now in the Job message.

605

view: string, The level of information requested in response.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

606

607

Returns:

608

An object of the form:

609

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

610

{ # Defines a job to be run by the Cloud Dataflow service.

611

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

612

# If this field is set, the service will ensure its uniqueness.

613

# The request to create a job will fail if the service has knowledge of a

614

# previously submitted job with the same client's ID and job name.

615

# The caller may use this field to ensure idempotence of job

616

# creation across retried attempts to create a job.

617

# By default, the field is empty and, in that case, the service ignores it.

618

"requestedState": "A String", # The job's requested state.

619

#

620

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

621

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

622

# also be used to directly set a job's requested state to

623

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

624

# job if it has not already reached a terminal state.

625

"name": "A String", # The user-specified Cloud Dataflow job name.

626

#

627

# Only one Job with a given name may exist in a project at any

628

# given time. If a caller attempts to create a Job with the same

629

# name as an already-existing Job, the attempt returns the

630

# existing Job.

631

#

632

# The name must match the regular expression

633

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

634

"currentStateTime": "A String", # The timestamp associated with the current state.

635

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

636

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

637

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

638

"labels": { # User-defined labels for this job.

639

#

640

# The labels map can contain no more than 64 entries. Entries of the labels

641

# map are UTF8 strings that comply with the following restrictions:

642

#

643

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

644

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

645

# * Both keys and values are additionally constrained to be <= 128 bytes in

646

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

647

"a_key": "A String",

648

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

649

"location": "A String", # The location that contains this job.

650

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

651

# Cloud Dataflow service.

652

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

653

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

654

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

655

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

656

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

657

"version": { # A structure describing which components and their versions of the service

658

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

659

"a_key": "", # Properties of the object.

660

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

661

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

662

# storage. The system will append the suffix "/temp-{JOBNAME} to

663

# this resource prefix, where {JOBNAME} is the value of the

664

# job_name field. The resulting bucket and object prefix is used

665

# as the prefix of the resources used to store temporary data

666

# needed during the job execution. NOTE: This will override the

667

# value in taskrunner_settings.

668

# The supported resource type is:

669

#

670

# Google Cloud Storage:

671

#

672

# storage.googleapis.com/{bucket}/{object}

673

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

674

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

675

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

676

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

677

"dataset": "A String", # The dataset for the current project where various workflow

678

# related tables are stored.

679

#

680

# The supported resource type is:

681

#

682

# Google BigQuery:

683

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

684

"experiments": [ # The list of experiments to enable.

685

"A String",

686

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

687

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

688

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

689

# options are passed through the service and are used to recreate the

690

# SDK pipeline options on the worker in a language agnostic and platform

691

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

692

"a_key": "", # Properties of the object.

693

},

694

"userAgent": { # A description of the process that generated the request.

695

"a_key": "", # Properties of the object.

696

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

697

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

698

# unspecified, the service will attempt to choose a reasonable

699

# default. This should be in the form of the API service name,

700

# e.g. "compute.googleapis.com".

701

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

702

# specified in order for the job to have workers.

703

{ # Describes one particular pool of Cloud Dataflow workers to be

704

# instantiated by the Cloud Dataflow service in order to perform the

705

# computations required by a job. Note that a workflow job may use

706

# multiple pools, in order to match the various computational

707

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

708

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

709

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

710

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

711

# are supported.

712

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

713

# service will attempt to choose a reasonable default.

714

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

715

# the service will use the network "default".

716

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

717

# will attempt to choose a reasonable default.

718

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

719

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

720

"metadata": { # Metadata to set on the Google Compute Engine VMs.

721

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

722

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

723

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

724

# Compute Engine API.

725

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

726

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

727

# `TEARDOWN_NEVER`.

728

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

729

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

730

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

731

# down.

732

#

733

# If the workers are not torn down by the service, they will

734

# continue to run and use Google Compute Engine VM resources in the

735

# user's project until they are explicitly terminated by the user.

736

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

737

# policy except for small, manually supervised test jobs.

738

#

739

# If unknown or unspecified, the service will attempt to choose a reasonable

740

# default.

741

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

742

# service will choose a number of threads (according to the number of cores

743

# on the selected machine type for batch, or 1 by convention for streaming).

744

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

745

# the form "regions/REGION/subnetworks/SUBNETWORK".

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

746

"poolArgs": { # Extra arguments for this worker pool.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

747

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

748

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

749

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

750

# execute the job. If zero or unspecified, the service will

751

# attempt to choose a reasonable default.

752

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

753

# using the standard Dataflow task runner. Users should ignore

754

# this field.

755

"workflowFileName": "A String", # The file to store the workflow in.

756

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

757

# will not be uploaded.

758

#

759

# The supported resource type is:

760

#

761

# Google Cloud Storage:

762

# storage.googleapis.com/{bucket}/{object}

763

# bucket.storage.googleapis.com/{object}

764

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

765

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

766

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

767

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

768

"vmId": "A String", # The ID string of the VM.

769

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

770

# taskrunner; e.g. "wheel".

771

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

772

# taskrunner; e.g. "root".

773

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

774

# access the Cloud Dataflow API.

775

"A String",

776

],

777

"languageHint": "A String", # The suggested backend language.

778

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

779

# console.

780

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

781

"logDir": "A String", # The directory on the VM to store logs.

782

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

783

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

784

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

785

# "shuffle/v1beta1".

786

"workerId": "A String", # The ID of the worker running this pipeline.

787

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

788

#

789

# When workers access Google Cloud APIs, they logically do so via

790

# relative URLs. If this field is specified, it supplies the base

791

# URL to use for resolving these relative URLs. The normative

792

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

793

# Locators".

794

#

795

# If not specified, the default value is "http://www.googleapis.com/"

796

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

797

# "dataflow/v1b3/projects".

798

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

799

# storage.

800

#

801

# The supported resource type is:

802

#

803

# Google Cloud Storage:

804

#

805

# storage.googleapis.com/{bucket}/{object}

806

# bucket.storage.googleapis.com/{object}

807

},

808

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

809

"harnessCommand": "A String", # The command to launch the worker harness.

810

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

811

# temporary storage.

812

#

813

# The supported resource type is:

814

#

815

# Google Cloud Storage:

816

# storage.googleapis.com/{bucket}/{object}

817

# bucket.storage.googleapis.com/{object}

818

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

819

#

820

# When workers access Google Cloud APIs, they logically do so via

821

# relative URLs. If this field is specified, it supplies the base

822

# URL to use for resolving these relative URLs. The normative

823

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

824

# Locators".

825

#

826

# If not specified, the default value is "http://www.googleapis.com/"

827

},

828

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

829

# select a default set of packages which are useful to worker

830

# harnesses written in a particular language.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

831

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

832

{ # The packages that must be installed in order for a worker to run the

833

# steps of the Cloud Dataflow job that will be assigned to its worker

834

# pool.

835

#

836

# This is the mechanism by which the Cloud Dataflow SDK causes code to

837

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

838

# might use this to install jars containing the user's code and all of the

839

# various dependencies (libraries, data files, etc.) required in order

840

# for that code to run.

841

"location": "A String", # The resource to read the package from. The supported resource type is:

842

#

843

# Google Cloud Storage:

844

#

845

# storage.googleapis.com/{bucket}

846

# bucket.storage.googleapis.com/

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

847

"name": "A String", # The name of the package.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

848

},

849

],

850

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

851

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

852

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

853

},

854

"dataDisks": [ # Data disks that are used by a VM in this workflow.

855

{ # Describes the data disk used by a workflow job.

856

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

857

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

858

# attempt to choose a reasonable default.

859

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

860

# must be a disk type appropriate to the project and zone in which

861

# the workers will run. If unknown or unspecified, the service

862

# will attempt to choose a reasonable default.

863

#

864

# For example, the standard persistent disk type is a resource name

865

# typically ending in "pd-standard". If SSD persistent disks are

866

# available, the resource name typically ends with "pd-ssd". The

867

# actual valid values are defined the Google Compute Engine API,

868

# not by the Cloud Dataflow API; consult the Google Compute Engine

869

# documentation for more information about determining the set of

870

# available disk types for a particular project and zone.

871

#

872

# Google Compute Engine Disk types are local to a particular

873

# project in a particular zone, and so the resource name will

874

# typically look something like this:

875

#

876

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

877

},

878

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

879

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

880

# attempt to choose a reasonable default.

881

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

882

# harness, residing in Google Container Registry.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

883

},

884

],

885

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

886

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

887

# A description of the user pipeline and stages through which it is executed.

888

# Created by Cloud Dataflow service. Only retrieved with

889

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

890

# form. This data is provided by the Dataflow service for ease of visualizing

891

# the pipeline and interpretting Dataflow provided metrics.

892

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

893

{ # Description of the type, names/ids, and input/outputs for a transform.

894

"kind": "A String", # Type of transform.

895

"name": "A String", # User provided name for this transform instance.

896

"inputCollectionName": [ # User names for all collection inputs to this transform.

897

"A String",

898

],

899

"displayData": [ # Transform-specific display data.

900

{ # Data provided with a pipeline or transform to provide descriptive info.

901

"key": "A String", # The key identifying the display data.

902

# This is intended to be used as a label for the display data

903

# when viewed in a dax monitoring system.

904

"shortStrValue": "A String", # A possible additional shorter value to display.

905

# For example a java_class_name_value of com.mypackage.MyDoFn

906

# will be stored with MyDoFn as the short_str_value and

907

# com.mypackage.MyDoFn as the java_class_name value.

908

# short_str_value can be displayed and java_class_name_value

909

# will be displayed as a tooltip.

910

"timestampValue": "A String", # Contains value if the data is of timestamp type.

911

"url": "A String", # An optional full URL.

912

"floatValue": 3.14, # Contains value if the data is of float type.

913

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

914

# language namespace (i.e. python module) which defines the display data.

915

# This allows a dax monitoring system to specially handle the data

916

# and perform custom rendering.

917

"javaClassValue": "A String", # Contains value if the data is of java class type.

918

"label": "A String", # An optional label to display in a dax UI for the element.

919

"boolValue": True or False, # Contains value if the data is of a boolean type.

920

"strValue": "A String", # Contains value if the data is of string type.

921

"durationValue": "A String", # Contains value if the data is of duration type.

922

"int64Value": "A String", # Contains value if the data is of int64 type.

923

},

924

],

925

"outputCollectionName": [ # User names for all collection outputs to this transform.

926

"A String",

927

],

928

"id": "A String", # SDK generated id of this transform instance.

929

},

930

],

931

"displayData": [ # Pipeline level display data.

932

{ # Data provided with a pipeline or transform to provide descriptive info.

933

"key": "A String", # The key identifying the display data.

934

# This is intended to be used as a label for the display data

935

# when viewed in a dax monitoring system.

936

"shortStrValue": "A String", # A possible additional shorter value to display.

937

# For example a java_class_name_value of com.mypackage.MyDoFn

938

# will be stored with MyDoFn as the short_str_value and

939

# com.mypackage.MyDoFn as the java_class_name value.

940

# short_str_value can be displayed and java_class_name_value

941

# will be displayed as a tooltip.

942

"timestampValue": "A String", # Contains value if the data is of timestamp type.

943

"url": "A String", # An optional full URL.

944

"floatValue": 3.14, # Contains value if the data is of float type.

945

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

946

# language namespace (i.e. python module) which defines the display data.

947

# This allows a dax monitoring system to specially handle the data

948

# and perform custom rendering.

949

"javaClassValue": "A String", # Contains value if the data is of java class type.

950

"label": "A String", # An optional label to display in a dax UI for the element.

951

"boolValue": True or False, # Contains value if the data is of a boolean type.

952

"strValue": "A String", # Contains value if the data is of string type.

953

"durationValue": "A String", # Contains value if the data is of duration type.

954

"int64Value": "A String", # Contains value if the data is of int64 type.

955

},

956

],

957

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

958

{ # Description of the composing transforms, names/ids, and input/outputs of a

959

# stage of execution. Some composing transforms and sources may have been

960

# generated by the Dataflow service during execution planning.

961

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

962

{ # Description of an interstitial value between transforms in an execution

963

# stage.

964

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

965

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

966

# source is most closely associated.

967

"name": "A String", # Dataflow service generated name for this source.

968

},

969

],

970

"kind": "A String", # Type of tranform this stage is executing.

971

"name": "A String", # Dataflow service generated name for this stage.

972

"outputSource": [ # Output sources for this stage.

973

{ # Description of an input or output of an execution stage.

974

"userName": "A String", # Human-readable name for this source; may be user or system generated.

975

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

976

# source is most closely associated.

977

"name": "A String", # Dataflow service generated name for this source.

978

"sizeBytes": "A String", # Size of the source, if measurable.

979

},

980

],

981

"inputSource": [ # Input sources for this stage.

982

{ # Description of an input or output of an execution stage.

983

"userName": "A String", # Human-readable name for this source; may be user or system generated.

984

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

985

# source is most closely associated.

986

"name": "A String", # Dataflow service generated name for this source.

987

"sizeBytes": "A String", # Size of the source, if measurable.

988

},

989

],

990

"componentTransform": [ # Transforms that comprise this execution stage.

991

{ # Description of a transform executed as part of an execution stage.

992

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

993

"originalTransform": "A String", # User name for the original user transform with which this transform is

994

# most closely associated.

995

"name": "A String", # Dataflow service generated name for this source.

996

},

997

],

998

"id": "A String", # Dataflow service generated id for this stage.

999

},

1000

],

1001

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1002

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1003

{ # Defines a particular step within a Cloud Dataflow job.

1004

#

1005

# A job consists of multiple steps, each of which performs some

1006

# specific operation as part of the overall job. Data is typically

1007

# passed from one step to another as part of the job.

1008

#

1009

# Here's an example of a sequence of steps which together implement a

1010

# Map-Reduce job:

1011

#

1012

# * Read a collection of data from some source, parsing the

1013

# collection's elements.

1014

#

1015

# * Validate the elements.

1016

#

1017

# * Apply a user-defined function to map each element to some value

1018

# and extract an element-specific key value.

1019

#

1020

# * Group elements with the same key into a single element with

1021

# that key, transforming a multiply-keyed collection into a

1022

# uniquely-keyed collection.

1023

#

1024

# * Write the elements out to some data sink.

1025

#

1026

# Note that the Cloud Dataflow service may be used to run many different

1027

# types of jobs, not just Map-Reduce.

1028

"kind": "A String", # The kind of step in the Cloud Dataflow job.

1029

"properties": { # Named properties associated with the step. Each kind of

1030

# predefined step has its own required set of properties.

1031

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1032

"a_key": "", # Properties of the object.

1033

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1034

"name": "A String", # The name that identifies the step. This must be unique for each

1035

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1036

},

1037

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1038

"currentState": "A String", # The current state of the job.

1039

#

1040

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1041

# specified.

1042

#

1043

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1044

# terminal state. After a job has reached a terminal state, no

1045

# further state updates may be made.

1046

#

1047

# This field may be mutated by the Cloud Dataflow service;

1048

# callers cannot mutate it.

1049

"tempFiles": [ # A set of files the system should be aware of that are used

1050

# for temporary storage. These temporary files will be

1051

# removed on job completion.

1052

# No duplicates are allowed.

1053

# No file patterns are supported.

1054

#

1055

# The supported files are:

1056

#

1057

# Google Cloud Storage:

1058

#

1059

# storage.googleapis.com/{bucket}/{object}

1060

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

1061

"A String",

1062

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1063

"type": "A String", # The type of Cloud Dataflow job.

1064

"id": "A String", # The unique ID of this job.

1065

#

1066

# This field is set by the Cloud Dataflow service when the Job is

1067

# created, and is immutable for the life of the job.

1068

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1069

# of the job it replaced.

1070

#

1071

# When sending a `CreateJobRequest`, you can update a job by specifying it

1072

# here. The job named here is stopped, and its intermediate state is

1073

# transferred to this job.

1074

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1075

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1076

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1077

"a_key": { # Contains information about how a particular

1078

# google.dataflow.v1beta3.Step will be executed.

1079

"stepName": [ # The steps associated with the execution stage.

1080

# Note that stages may have several steps, and that a given step

1081

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1082

"A String",

1083

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1084

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1085

},

1086

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1087

}</pre>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1091

<code class="details" id="get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1092

<pre>Gets the state of the specified Cloud Dataflow job.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1093

1094

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1095

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

1096

jobId: string, The job ID. (required)

1097

location: string, The location that contains this job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1098

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

Allowed values

1 - v1 error format

2 - v2 error format

view: string, The level of information requested in response.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1103

1104

Returns:

1105

An object of the form:

1106

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1107

{ # Defines a job to be run by the Cloud Dataflow service.

1108

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1109

# If this field is set, the service will ensure its uniqueness.

1110

# The request to create a job will fail if the service has knowledge of a

1111

# previously submitted job with the same client's ID and job name.

1112

# The caller may use this field to ensure idempotence of job

1113

# creation across retried attempts to create a job.

1114

# By default, the field is empty and, in that case, the service ignores it.

1115

"requestedState": "A String", # The job's requested state.

1116

#

1117

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1118

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1119

# also be used to directly set a job's requested state to

1120

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1121

# job if it has not already reached a terminal state.

1122

"name": "A String", # The user-specified Cloud Dataflow job name.

1123

#

1124

# Only one Job with a given name may exist in a project at any

1125

# given time. If a caller attempts to create a Job with the same

1126

# name as an already-existing Job, the attempt returns the

1127

# existing Job.

1128

#

1129

# The name must match the regular expression

1130

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1131

"currentStateTime": "A String", # The timestamp associated with the current state.

1132

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1133

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1134

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1135

"labels": { # User-defined labels for this job.

1136

#

1137

# The labels map can contain no more than 64 entries. Entries of the labels

1138

# map are UTF8 strings that comply with the following restrictions:

1139

#

1140

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1141

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1142

# * Both keys and values are additionally constrained to be <= 128 bytes in

1143

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1144

"a_key": "A String",

1145

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1146

"location": "A String", # The location that contains this job.

1147

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1148

# Cloud Dataflow service.

1149

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1150

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1151

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1152

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1153

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1154

"version": { # A structure describing which components and their versions of the service

1155

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1156

"a_key": "", # Properties of the object.

1157

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1158

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1159

# storage. The system will append the suffix "/temp-{JOBNAME} to

1160

# this resource prefix, where {JOBNAME} is the value of the

1161

# job_name field. The resulting bucket and object prefix is used

1162

# as the prefix of the resources used to store temporary data

1163

# needed during the job execution. NOTE: This will override the

1164

# value in taskrunner_settings.

1165

# The supported resource type is:

1166

#

1167

# Google Cloud Storage:

1168

#

1169

# storage.googleapis.com/{bucket}/{object}

1170

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1171

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1172

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1173

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1174

"dataset": "A String", # The dataset for the current project where various workflow

1175

# related tables are stored.

1176

#

1177

# The supported resource type is:

1178

#

1179

# Google BigQuery:

1180

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1181

"experiments": [ # The list of experiments to enable.

1182

"A String",

1183

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

1184

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1185

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1186

# options are passed through the service and are used to recreate the

1187

# SDK pipeline options on the worker in a language agnostic and platform

1188

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1189

"a_key": "", # Properties of the object.

1190

},

1191

"userAgent": { # A description of the process that generated the request.

1192

"a_key": "", # Properties of the object.

1193

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1194

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1195

# unspecified, the service will attempt to choose a reasonable

1196

# default. This should be in the form of the API service name,

1197

# e.g. "compute.googleapis.com".

1198

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1199

# specified in order for the job to have workers.

1200

{ # Describes one particular pool of Cloud Dataflow workers to be

1201

# instantiated by the Cloud Dataflow service in order to perform the

1202

# computations required by a job. Note that a workflow job may use

1203

# multiple pools, in order to match the various computational

1204

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1205

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1206

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1207

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1208

# are supported.

1209

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1210

# service will attempt to choose a reasonable default.

1211

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1212

# the service will use the network "default".

1213

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1214

# will attempt to choose a reasonable default.

1215

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1216

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1217

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1218

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1219

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1220

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1221

# Compute Engine API.

1222

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1223

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1224

# `TEARDOWN_NEVER`.

1225

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1226

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1227

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1228

# down.

1229

#

1230

# If the workers are not torn down by the service, they will

1231

# continue to run and use Google Compute Engine VM resources in the

1232

# user's project until they are explicitly terminated by the user.

1233

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1234

# policy except for small, manually supervised test jobs.

1235

#

1236

# If unknown or unspecified, the service will attempt to choose a reasonable

1237

# default.

1238

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1239

# service will choose a number of threads (according to the number of cores

1240

# on the selected machine type for batch, or 1 by convention for streaming).

1241

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1242

# the form "regions/REGION/subnetworks/SUBNETWORK".

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1243

"poolArgs": { # Extra arguments for this worker pool.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1244

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1245

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1246

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1247

# execute the job. If zero or unspecified, the service will

1248

# attempt to choose a reasonable default.

1249

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1250

# using the standard Dataflow task runner. Users should ignore

1251

# this field.

1252

"workflowFileName": "A String", # The file to store the workflow in.

1253

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1254

# will not be uploaded.

1255

#

1256

# The supported resource type is:

1257

#

1258

# Google Cloud Storage:

1259

# storage.googleapis.com/{bucket}/{object}

1260

# bucket.storage.googleapis.com/{object}

1261

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1262

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1263

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

1264

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1265

"vmId": "A String", # The ID string of the VM.

1266

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1267

# taskrunner; e.g. "wheel".

1268

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1269

# taskrunner; e.g. "root".

1270

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1271

# access the Cloud Dataflow API.

1272

"A String",

1273

],

1274

"languageHint": "A String", # The suggested backend language.

1275

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1276

# console.

1277

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1278

"logDir": "A String", # The directory on the VM to store logs.

1279

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1280

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1281

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1282

# "shuffle/v1beta1".

1283

"workerId": "A String", # The ID of the worker running this pipeline.

1284

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1285

#

1286

# When workers access Google Cloud APIs, they logically do so via

1287

# relative URLs. If this field is specified, it supplies the base

1288

# URL to use for resolving these relative URLs. The normative

1289

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1290

# Locators".

1291

#

1292

# If not specified, the default value is "http://www.googleapis.com/"

1293

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1294

# "dataflow/v1b3/projects".

1295

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1296

# storage.

1297

#

1298

# The supported resource type is:

1299

#

1300

# Google Cloud Storage:

1301

#

1302

# storage.googleapis.com/{bucket}/{object}

1303

# bucket.storage.googleapis.com/{object}

1304

},

1305

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

1306

"harnessCommand": "A String", # The command to launch the worker harness.

1307

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1308

# temporary storage.

1309

#

1310

# The supported resource type is:

1311

#

1312

# Google Cloud Storage:

1313

# storage.googleapis.com/{bucket}/{object}

1314

# bucket.storage.googleapis.com/{object}

1315

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1316

#

1317

# When workers access Google Cloud APIs, they logically do so via

1318

# relative URLs. If this field is specified, it supplies the base

1319

# URL to use for resolving these relative URLs. The normative

1320

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1321

# Locators".

1322

#

1323

# If not specified, the default value is "http://www.googleapis.com/"

1324

},

1325

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1326

# select a default set of packages which are useful to worker

1327

# harnesses written in a particular language.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1328

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1329

{ # The packages that must be installed in order for a worker to run the

1330

# steps of the Cloud Dataflow job that will be assigned to its worker

1331

# pool.

1332

#

1333

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1334

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1335

# might use this to install jars containing the user's code and all of the

1336

# various dependencies (libraries, data files, etc.) required in order

1337

# for that code to run.

1338

"location": "A String", # The resource to read the package from. The supported resource type is:

1339

#

1340

# Google Cloud Storage:

1341

#

1342

# storage.googleapis.com/{bucket}

1343

# bucket.storage.googleapis.com/

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1344

"name": "A String", # The name of the package.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1345

},

1346

],

1347

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1348

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1349

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1350

},

1351

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1352

{ # Describes the data disk used by a workflow job.

1353

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1354

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1355

# attempt to choose a reasonable default.

1356

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1357

# must be a disk type appropriate to the project and zone in which

1358

# the workers will run. If unknown or unspecified, the service

1359

# will attempt to choose a reasonable default.

1360

#

1361

# For example, the standard persistent disk type is a resource name

1362

# typically ending in "pd-standard". If SSD persistent disks are

1363

# available, the resource name typically ends with "pd-ssd". The

1364

# actual valid values are defined the Google Compute Engine API,

1365

# not by the Cloud Dataflow API; consult the Google Compute Engine

1366

# documentation for more information about determining the set of

1367

# available disk types for a particular project and zone.

1368

#

1369

# Google Compute Engine Disk types are local to a particular

1370

# project in a particular zone, and so the resource name will

1371

# typically look something like this:

1372

#

1373

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1374

},

1375

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1376

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1377

# attempt to choose a reasonable default.

1378

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1379

# harness, residing in Google Container Registry.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1380

},

1381

],

1382

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1383

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1384

# A description of the user pipeline and stages through which it is executed.

1385

# Created by Cloud Dataflow service. Only retrieved with

1386

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1387

# form. This data is provided by the Dataflow service for ease of visualizing

1388

# the pipeline and interpretting Dataflow provided metrics.

1389

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1390

{ # Description of the type, names/ids, and input/outputs for a transform.

1391

"kind": "A String", # Type of transform.

1392

"name": "A String", # User provided name for this transform instance.

1393

"inputCollectionName": [ # User names for all collection inputs to this transform.

1394

"A String",

1395

],

1396

"displayData": [ # Transform-specific display data.

1397

{ # Data provided with a pipeline or transform to provide descriptive info.

1398

"key": "A String", # The key identifying the display data.

1399

# This is intended to be used as a label for the display data

1400

# when viewed in a dax monitoring system.

1401

"shortStrValue": "A String", # A possible additional shorter value to display.

1402

# For example a java_class_name_value of com.mypackage.MyDoFn

1403

# will be stored with MyDoFn as the short_str_value and

1404

# com.mypackage.MyDoFn as the java_class_name value.

1405

# short_str_value can be displayed and java_class_name_value

1406

# will be displayed as a tooltip.

1407

"timestampValue": "A String", # Contains value if the data is of timestamp type.

1408

"url": "A String", # An optional full URL.

1409

"floatValue": 3.14, # Contains value if the data is of float type.

1410

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1411

# language namespace (i.e. python module) which defines the display data.

1412

# This allows a dax monitoring system to specially handle the data

1413

# and perform custom rendering.

1414

"javaClassValue": "A String", # Contains value if the data is of java class type.

1415

"label": "A String", # An optional label to display in a dax UI for the element.

1416

"boolValue": True or False, # Contains value if the data is of a boolean type.

1417

"strValue": "A String", # Contains value if the data is of string type.

1418

"durationValue": "A String", # Contains value if the data is of duration type.

1419

"int64Value": "A String", # Contains value if the data is of int64 type.

1420

},

1421

],

1422

"outputCollectionName": [ # User names for all collection outputs to this transform.

1423

"A String",

1424

],

1425

"id": "A String", # SDK generated id of this transform instance.

1426

},

1427

],

1428

"displayData": [ # Pipeline level display data.

1429

{ # Data provided with a pipeline or transform to provide descriptive info.

1430

"key": "A String", # The key identifying the display data.

1431

# This is intended to be used as a label for the display data

1432

# when viewed in a dax monitoring system.

1433

"shortStrValue": "A String", # A possible additional shorter value to display.

1434

# For example a java_class_name_value of com.mypackage.MyDoFn

1435

# will be stored with MyDoFn as the short_str_value and

1436

# com.mypackage.MyDoFn as the java_class_name value.

1437

# short_str_value can be displayed and java_class_name_value

1438

# will be displayed as a tooltip.

1439

"timestampValue": "A String", # Contains value if the data is of timestamp type.

1440

"url": "A String", # An optional full URL.

1441

"floatValue": 3.14, # Contains value if the data is of float type.

1442

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1443

# language namespace (i.e. python module) which defines the display data.

1444

# This allows a dax monitoring system to specially handle the data

1445

# and perform custom rendering.

1446

"javaClassValue": "A String", # Contains value if the data is of java class type.

1447

"label": "A String", # An optional label to display in a dax UI for the element.

1448

"boolValue": True or False, # Contains value if the data is of a boolean type.

1449

"strValue": "A String", # Contains value if the data is of string type.

1450

"durationValue": "A String", # Contains value if the data is of duration type.

1451

"int64Value": "A String", # Contains value if the data is of int64 type.

1452

},

1453

],

1454

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

1455

{ # Description of the composing transforms, names/ids, and input/outputs of a

1456

# stage of execution. Some composing transforms and sources may have been

1457

# generated by the Dataflow service during execution planning.

1458

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

1459

{ # Description of an interstitial value between transforms in an execution

1460

# stage.

1461

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1462

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1463

# source is most closely associated.

1464

"name": "A String", # Dataflow service generated name for this source.

1465

},

1466

],

1467

"kind": "A String", # Type of tranform this stage is executing.

1468

"name": "A String", # Dataflow service generated name for this stage.

1469

"outputSource": [ # Output sources for this stage.

1470

{ # Description of an input or output of an execution stage.

1471

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1472

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1473

# source is most closely associated.

1474

"name": "A String", # Dataflow service generated name for this source.

1475

"sizeBytes": "A String", # Size of the source, if measurable.

1476

},

1477

],

1478

"inputSource": [ # Input sources for this stage.

1479

{ # Description of an input or output of an execution stage.

1480

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1481

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1482

# source is most closely associated.

1483

"name": "A String", # Dataflow service generated name for this source.

1484

"sizeBytes": "A String", # Size of the source, if measurable.

1485

},

1486

],

1487

"componentTransform": [ # Transforms that comprise this execution stage.

1488

{ # Description of a transform executed as part of an execution stage.

1489

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1490

"originalTransform": "A String", # User name for the original user transform with which this transform is

1491

# most closely associated.

1492

"name": "A String", # Dataflow service generated name for this source.

1493

},

1494

],

1495

"id": "A String", # Dataflow service generated id for this stage.

1496

},

1497

],

1498

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1499

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1500

{ # Defines a particular step within a Cloud Dataflow job.

1501

#

1502

# A job consists of multiple steps, each of which performs some

1503

# specific operation as part of the overall job. Data is typically

1504

# passed from one step to another as part of the job.

1505

#

1506

# Here's an example of a sequence of steps which together implement a

1507

# Map-Reduce job:

1508

#

1509

# * Read a collection of data from some source, parsing the

1510

# collection's elements.

1511

#

1512

# * Validate the elements.

1513

#

1514

# * Apply a user-defined function to map each element to some value

1515

# and extract an element-specific key value.

1516

#

1517

# * Group elements with the same key into a single element with

1518

# that key, transforming a multiply-keyed collection into a

1519

# uniquely-keyed collection.

1520

#

1521

# * Write the elements out to some data sink.

1522

#

1523

# Note that the Cloud Dataflow service may be used to run many different

1524

# types of jobs, not just Map-Reduce.

1525

"kind": "A String", # The kind of step in the Cloud Dataflow job.

1526

"properties": { # Named properties associated with the step. Each kind of

1527

# predefined step has its own required set of properties.

1528

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1529

"a_key": "", # Properties of the object.

1530

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1531

"name": "A String", # The name that identifies the step. This must be unique for each

1532

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1533

},

1534

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1535

"currentState": "A String", # The current state of the job.

1536

#

1537

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1538

# specified.

1539

#

1540

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1541

# terminal state. After a job has reached a terminal state, no

1542

# further state updates may be made.

1543

#

1544

# This field may be mutated by the Cloud Dataflow service;

1545

# callers cannot mutate it.

1546

"tempFiles": [ # A set of files the system should be aware of that are used

1547

# for temporary storage. These temporary files will be

1548

# removed on job completion.

1549

# No duplicates are allowed.

1550

# No file patterns are supported.

1551

#

1552

# The supported files are:

1553

#

1554

# Google Cloud Storage:

1555

#

1556

# storage.googleapis.com/{bucket}/{object}

1557

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

1558

"A String",

1559

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1560

"type": "A String", # The type of Cloud Dataflow job.

1561

"id": "A String", # The unique ID of this job.

1562

#

1563

# This field is set by the Cloud Dataflow service when the Job is

1564

# created, and is immutable for the life of the job.

1565

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1566

# of the job it replaced.

1567

#

1568

# When sending a `CreateJobRequest`, you can update a job by specifying it

1569

# here. The job named here is stopped, and its intermediate state is

1570

# transferred to this job.

1571

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1572

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1573

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1574

"a_key": { # Contains information about how a particular

1575

# google.dataflow.v1beta3.Step will be executed.

1576

"stepName": [ # The steps associated with the execution stage.

1577

# Note that stages may have several steps, and that a given step

1578

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1579

"A String",

1580

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1581

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1582

},

1583

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1584

}</pre>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1588

<code class="details" id="getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</code>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1589

<pre>Request the job status.

1590

1591

Args:

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1592

projectId: string, A project id. (required)

1593

jobId: string, The job to get messages for. (required)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1594

startTime: string, Return only metric data that has changed since this time.

1595

Default is to return all information about all metrics for the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1596

location: string, The location which contains the job specified by job_id.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1597

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1598

Allowed values

1599

1 - v1 error format

1600

2 - v2 error format

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1601

1602

Returns:

1603

An object of the form:

1604

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1605

{ # JobMetrics contains a collection of metrics descibing the detailed progress

1606

# of a Dataflow job. Metrics correspond to user-defined and system-defined

1607

# metrics in the job.

1608

#

1609

# This resource captures only the most recent values of each metric;

1610

# time-series data can be queried for them (under the same metric names)

1611

# from Cloud Monitoring.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1612

"metrics": [ # All metrics for this job.

1613

{ # Describes the state of a metric.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1614

"meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1615

# This holds the count of the aggregated values and is used in combination

1616

# with mean_sum above to obtain the actual mean aggregate value.

1617

# The only possible value type is Long.

1618

"kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are

1619

# "Sum", "Max", "Min", "Mean", "Set", "And", and "Or".

1620

# The specified aggregation kind is case-insensitive.

1621

#

1622

# If omitted, this is not an aggregated value but instead

1623

# a single metric sample value.

1624

"set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only

1625

# possible value type is a list of Values whose type can be Long, Double,

1626

# or String, according to the metric's type. All Values in the list must

1627

# be of the same type.

1628

"name": { # Identifies a metric, by describing the source which generated the # Name of the metric.

1629

# metric.

1630

"origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;

1631

# will be "dataflow" for metrics defined by the Dataflow service or SDK.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1632

"name": "A String", # Worker-defined metric name.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1633

"context": { # Zero or more labeled fields which identify the part of the job this

1634

# metric is associated with, such as the name of a step or collection.

1635

#

1636

# For example, built-in counters associated with steps will have

1637

# context['step'] = <step-name>. Counters associated with PCollections

1638

# in the SDK will have context['pcollection'] = <pcollection-name>.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1639

"a_key": "A String",

1640

},

1641

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1642

"meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1643

# This holds the sum of the aggregated values and is used in combination

1644

# with mean_count below to obtain the actual mean aggregate value.

1645

# The only possible value types are Long and Double.

1646

"cumulative": True or False, # True if this metric is reported as the total cumulative aggregate

1647

# value accumulated since the worker started working on this WorkItem.

1648

# By default this is false, indicating that this metric is reported

1649

# as a delta that is not associated with any WorkItem.

1650

"updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are

1651

# reporting work progress; it will be filled in responses from the

1652

# metrics API.

1653

"scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",

1654

# "And", and "Or". The possible value types are Long, Double, and Boolean.

1655

"internal": "", # Worker-computed aggregate value for internal use by the Dataflow

1656

# service.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1657

},

1658

],

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1659

"metricTime": "A String", # Timestamp as of which metric values are current.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

}</pre>

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1664

<code class="details" id="list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1665

<pre>List the jobs of a project.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1666

1667

Args:

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1668

projectId: string, The project which owns the jobs. (required)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1669

pageSize: integer, If there are many jobs, limit response to at most this many.

1670

The actual number of jobs returned will be the lesser of max_responses

1671

and an unspecified server-defined limit.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1672

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

Allowed values

1 - v1 error format

2 - v2 error format

pageToken: string, Set this to the 'next_page_token' field of a previous response

1677

to request additional results in a long list.

1678

location: string, The location that contains this job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1679

filter: string, The kind of filter to use.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1680

view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1681

1682

Returns:

1683

An object of the form:

1684

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1685

{ # Response to a request to list Cloud Dataflow jobs. This may be a partial

1686

# response, depending on the page size in the ListJobsRequest.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1687

"nextPageToken": "A String", # Set if there may be more results than fit in this response.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1688

"failedLocation": [ # Zero or more messages describing locations that failed to respond.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1689

{ # Indicates which location failed to respond to a request for data.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1690

"name": "A String", # The name of the failed location.

1691

},

1692

],

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1693

"jobs": [ # A subset of the requested job information.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1694

{ # Defines a job to be run by the Cloud Dataflow service.

1695

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1696

# If this field is set, the service will ensure its uniqueness.

1697

# The request to create a job will fail if the service has knowledge of a

1698

# previously submitted job with the same client's ID and job name.

1699

# The caller may use this field to ensure idempotence of job

1700

# creation across retried attempts to create a job.

1701

# By default, the field is empty and, in that case, the service ignores it.

1702

"requestedState": "A String", # The job's requested state.

1703

#

1704

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1705

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1706

# also be used to directly set a job's requested state to

1707

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1708

# job if it has not already reached a terminal state.

1709

"name": "A String", # The user-specified Cloud Dataflow job name.

1710

#

1711

# Only one Job with a given name may exist in a project at any

1712

# given time. If a caller attempts to create a Job with the same

1713

# name as an already-existing Job, the attempt returns the

1714

# existing Job.

1715

#

1716

# The name must match the regular expression

1717

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1718

"currentStateTime": "A String", # The timestamp associated with the current state.

1719

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1720

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1721

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1722

"labels": { # User-defined labels for this job.

1723

#

1724

# The labels map can contain no more than 64 entries. Entries of the labels

1725

# map are UTF8 strings that comply with the following restrictions:

1726

#

1727

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1728

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1729

# * Both keys and values are additionally constrained to be <= 128 bytes in

1730

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1731

"a_key": "A String",

1732

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1733

"location": "A String", # The location that contains this job.

1734

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1735

# Cloud Dataflow service.

1736

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1737

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1738

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1739

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1740

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1741

"version": { # A structure describing which components and their versions of the service

1742

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1743

"a_key": "", # Properties of the object.

1744

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1745

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1746

# storage. The system will append the suffix "/temp-{JOBNAME} to

1747

# this resource prefix, where {JOBNAME} is the value of the

1748

# job_name field. The resulting bucket and object prefix is used

1749

# as the prefix of the resources used to store temporary data

1750

# needed during the job execution. NOTE: This will override the

1751

# value in taskrunner_settings.

1752

# The supported resource type is:

1753

#

1754

# Google Cloud Storage:

1755

#

1756

# storage.googleapis.com/{bucket}/{object}

1757

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1758

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1759

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1760

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1761

"dataset": "A String", # The dataset for the current project where various workflow

1762

# related tables are stored.

1763

#

1764

# The supported resource type is:

1765

#

1766

# Google BigQuery:

1767

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1768

"experiments": [ # The list of experiments to enable.

1769

"A String",

1770

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

1771

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1772

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1773

# options are passed through the service and are used to recreate the

1774

# SDK pipeline options on the worker in a language agnostic and platform

1775

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1776

"a_key": "", # Properties of the object.

1777

},

1778

"userAgent": { # A description of the process that generated the request.

1779

"a_key": "", # Properties of the object.

1780

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1781

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1782

# unspecified, the service will attempt to choose a reasonable

1783

# default. This should be in the form of the API service name,

1784

# e.g. "compute.googleapis.com".

1785

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1786

# specified in order for the job to have workers.

1787

{ # Describes one particular pool of Cloud Dataflow workers to be

1788

# instantiated by the Cloud Dataflow service in order to perform the

1789

# computations required by a job. Note that a workflow job may use

1790

# multiple pools, in order to match the various computational

1791

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1792

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1793

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1794

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1795

# are supported.

1796

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1797

# service will attempt to choose a reasonable default.

1798

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1799

# the service will use the network "default".

1800

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1801

# will attempt to choose a reasonable default.

1802

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1803

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1804

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1805

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1806

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1807

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1808

# Compute Engine API.

1809

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1810

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1811

# `TEARDOWN_NEVER`.

1812

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1813

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1814

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1815

# down.

1816

#

1817

# If the workers are not torn down by the service, they will

1818

# continue to run and use Google Compute Engine VM resources in the

1819

# user's project until they are explicitly terminated by the user.

1820

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1821

# policy except for small, manually supervised test jobs.

1822

#

1823

# If unknown or unspecified, the service will attempt to choose a reasonable

1824

# default.

1825

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1826

# service will choose a number of threads (according to the number of cores

1827

# on the selected machine type for batch, or 1 by convention for streaming).

1828

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1829

# the form "regions/REGION/subnetworks/SUBNETWORK".

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1830

"poolArgs": { # Extra arguments for this worker pool.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1831

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1832

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1833

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1834

# execute the job. If zero or unspecified, the service will

1835

# attempt to choose a reasonable default.

1836

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1837

# using the standard Dataflow task runner. Users should ignore

1838

# this field.

1839

"workflowFileName": "A String", # The file to store the workflow in.

1840

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1841

# will not be uploaded.

1842

#

1843

# The supported resource type is:

1844

#

1845

# Google Cloud Storage:

1846

# storage.googleapis.com/{bucket}/{object}

1847

# bucket.storage.googleapis.com/{object}

1848

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1849

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1850

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

1851

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1852

"vmId": "A String", # The ID string of the VM.

1853

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1854

# taskrunner; e.g. "wheel".

1855

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1856

# taskrunner; e.g. "root".

1857

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1858

# access the Cloud Dataflow API.

1859

"A String",

1860

],

1861

"languageHint": "A String", # The suggested backend language.

1862

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1863

# console.

1864

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1865

"logDir": "A String", # The directory on the VM to store logs.

1866

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1867

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1868

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1869

# "shuffle/v1beta1".

1870

"workerId": "A String", # The ID of the worker running this pipeline.

1871

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1872

#

1873

# When workers access Google Cloud APIs, they logically do so via

1874

# relative URLs. If this field is specified, it supplies the base

1875

# URL to use for resolving these relative URLs. The normative

1876

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1877

# Locators".

1878

#

1879

# If not specified, the default value is "http://www.googleapis.com/"

1880

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1881

# "dataflow/v1b3/projects".

1882

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1883

# storage.

1884

#

1885

# The supported resource type is:

1886

#

1887

# Google Cloud Storage:

1888

#

1889

# storage.googleapis.com/{bucket}/{object}

1890

# bucket.storage.googleapis.com/{object}

1891

},

1892

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

1893

"harnessCommand": "A String", # The command to launch the worker harness.

1894

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1895

# temporary storage.

1896

#

1897

# The supported resource type is:

1898

#

1899

# Google Cloud Storage:

1900

# storage.googleapis.com/{bucket}/{object}

1901

# bucket.storage.googleapis.com/{object}

1902

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1903

#

1904

# When workers access Google Cloud APIs, they logically do so via

1905

# relative URLs. If this field is specified, it supplies the base

1906

# URL to use for resolving these relative URLs. The normative

1907

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1908

# Locators".

1909

#

1910

# If not specified, the default value is "http://www.googleapis.com/"

1911

},

1912

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1913

# select a default set of packages which are useful to worker

1914

# harnesses written in a particular language.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1915

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1916

{ # The packages that must be installed in order for a worker to run the

1917

# steps of the Cloud Dataflow job that will be assigned to its worker

1918

# pool.

1919

#

1920

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1921

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1922

# might use this to install jars containing the user's code and all of the

1923

# various dependencies (libraries, data files, etc.) required in order

1924

# for that code to run.

1925

"location": "A String", # The resource to read the package from. The supported resource type is:

1926

#

1927

# Google Cloud Storage:

1928

#

1929

# storage.googleapis.com/{bucket}

1930

# bucket.storage.googleapis.com/

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1931

"name": "A String", # The name of the package.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1932

},

1933

],

1934

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1935

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1936

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1937

},

1938

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1939

{ # Describes the data disk used by a workflow job.

1940

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1941

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1942

# attempt to choose a reasonable default.

1943

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1944

# must be a disk type appropriate to the project and zone in which

1945

# the workers will run. If unknown or unspecified, the service

1946

# will attempt to choose a reasonable default.

1947

#

1948

# For example, the standard persistent disk type is a resource name

1949

# typically ending in "pd-standard". If SSD persistent disks are

1950

# available, the resource name typically ends with "pd-ssd". The

1951

# actual valid values are defined the Google Compute Engine API,

1952

# not by the Cloud Dataflow API; consult the Google Compute Engine

1953

# documentation for more information about determining the set of

1954

# available disk types for a particular project and zone.

1955

#

1956

# Google Compute Engine Disk types are local to a particular

1957

# project in a particular zone, and so the resource name will

1958

# typically look something like this:

1959

#

1960

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1961

},

1962

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1963

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1964

# attempt to choose a reasonable default.

1965

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1966

# harness, residing in Google Container Registry.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1967

},

1968

],

1969

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

1970

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1971

# A description of the user pipeline and stages through which it is executed.

1972

# Created by Cloud Dataflow service. Only retrieved with

1973

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1974

# form. This data is provided by the Dataflow service for ease of visualizing

1975

# the pipeline and interpretting Dataflow provided metrics.

1976

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1977

{ # Description of the type, names/ids, and input/outputs for a transform.

1978

"kind": "A String", # Type of transform.

1979

"name": "A String", # User provided name for this transform instance.

1980

"inputCollectionName": [ # User names for all collection inputs to this transform.

1981

"A String",

1982

],

1983

"displayData": [ # Transform-specific display data.

1984

{ # Data provided with a pipeline or transform to provide descriptive info.

1985

"key": "A String", # The key identifying the display data.

1986

# This is intended to be used as a label for the display data

1987

# when viewed in a dax monitoring system.

1988

"shortStrValue": "A String", # A possible additional shorter value to display.

1989

# For example a java_class_name_value of com.mypackage.MyDoFn

1990

# will be stored with MyDoFn as the short_str_value and

1991

# com.mypackage.MyDoFn as the java_class_name value.

1992

# short_str_value can be displayed and java_class_name_value

1993

# will be displayed as a tooltip.

1994

"timestampValue": "A String", # Contains value if the data is of timestamp type.

1995

"url": "A String", # An optional full URL.

1996

"floatValue": 3.14, # Contains value if the data is of float type.

1997

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1998

# language namespace (i.e. python module) which defines the display data.

1999

# This allows a dax monitoring system to specially handle the data

2000

# and perform custom rendering.

2001

"javaClassValue": "A String", # Contains value if the data is of java class type.

2002

"label": "A String", # An optional label to display in a dax UI for the element.

2003

"boolValue": True or False, # Contains value if the data is of a boolean type.

2004

"strValue": "A String", # Contains value if the data is of string type.

2005

"durationValue": "A String", # Contains value if the data is of duration type.

2006

"int64Value": "A String", # Contains value if the data is of int64 type.

2007

},

2008

],

2009

"outputCollectionName": [ # User names for all collection outputs to this transform.

2010

"A String",

2011

],

2012

"id": "A String", # SDK generated id of this transform instance.

2013

},

2014

],

2015

"displayData": [ # Pipeline level display data.

2016

{ # Data provided with a pipeline or transform to provide descriptive info.

2017

"key": "A String", # The key identifying the display data.

2018

# This is intended to be used as a label for the display data

2019

# when viewed in a dax monitoring system.

2020

"shortStrValue": "A String", # A possible additional shorter value to display.

2021

# For example a java_class_name_value of com.mypackage.MyDoFn

2022

# will be stored with MyDoFn as the short_str_value and

2023

# com.mypackage.MyDoFn as the java_class_name value.

2024

# short_str_value can be displayed and java_class_name_value

2025

# will be displayed as a tooltip.

2026

"timestampValue": "A String", # Contains value if the data is of timestamp type.

2027

"url": "A String", # An optional full URL.

2028

"floatValue": 3.14, # Contains value if the data is of float type.

2029

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2030

# language namespace (i.e. python module) which defines the display data.

2031

# This allows a dax monitoring system to specially handle the data

2032

# and perform custom rendering.

2033

"javaClassValue": "A String", # Contains value if the data is of java class type.

2034

"label": "A String", # An optional label to display in a dax UI for the element.

2035

"boolValue": True or False, # Contains value if the data is of a boolean type.

2036

"strValue": "A String", # Contains value if the data is of string type.

2037

"durationValue": "A String", # Contains value if the data is of duration type.

2038

"int64Value": "A String", # Contains value if the data is of int64 type.

2039

},

2040

],

2041

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2042

{ # Description of the composing transforms, names/ids, and input/outputs of a

2043

# stage of execution. Some composing transforms and sources may have been

2044

# generated by the Dataflow service during execution planning.

2045

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2046

{ # Description of an interstitial value between transforms in an execution

2047

# stage.

2048

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2049

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2050

# source is most closely associated.

2051

"name": "A String", # Dataflow service generated name for this source.

2052

},

2053

],

2054

"kind": "A String", # Type of tranform this stage is executing.

2055

"name": "A String", # Dataflow service generated name for this stage.

2056

"outputSource": [ # Output sources for this stage.

2057

{ # Description of an input or output of an execution stage.

2058

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2059

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2060

# source is most closely associated.

2061

"name": "A String", # Dataflow service generated name for this source.

2062

"sizeBytes": "A String", # Size of the source, if measurable.

2063

},

2064

],

2065

"inputSource": [ # Input sources for this stage.

2066

{ # Description of an input or output of an execution stage.

2067

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2068

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2069

# source is most closely associated.

2070

"name": "A String", # Dataflow service generated name for this source.

2071

"sizeBytes": "A String", # Size of the source, if measurable.

2072

},

2073

],

2074

"componentTransform": [ # Transforms that comprise this execution stage.

2075

{ # Description of a transform executed as part of an execution stage.

2076

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2077

"originalTransform": "A String", # User name for the original user transform with which this transform is

2078

# most closely associated.

2079

"name": "A String", # Dataflow service generated name for this source.

2080

},

2081

],

2082

"id": "A String", # Dataflow service generated id for this stage.

2083

},

2084

],

2085

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2086

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2087

{ # Defines a particular step within a Cloud Dataflow job.

2088

#

2089

# A job consists of multiple steps, each of which performs some

2090

# specific operation as part of the overall job. Data is typically

2091

# passed from one step to another as part of the job.

2092

#

2093

# Here's an example of a sequence of steps which together implement a

2094

# Map-Reduce job:

2095

#

2096

# * Read a collection of data from some source, parsing the

2097

# collection's elements.

2098

#

2099

# * Validate the elements.

2100

#

2101

# * Apply a user-defined function to map each element to some value

2102

# and extract an element-specific key value.

2103

#

2104

# * Group elements with the same key into a single element with

2105

# that key, transforming a multiply-keyed collection into a

2106

# uniquely-keyed collection.

2107

#

2108

# * Write the elements out to some data sink.

2109

#

2110

# Note that the Cloud Dataflow service may be used to run many different

2111

# types of jobs, not just Map-Reduce.

2112

"kind": "A String", # The kind of step in the Cloud Dataflow job.

2113

"properties": { # Named properties associated with the step. Each kind of

2114

# predefined step has its own required set of properties.

2115

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2116

"a_key": "", # Properties of the object.

2117

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2118

"name": "A String", # The name that identifies the step. This must be unique for each

2119

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2120

},

2121

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2122

"currentState": "A String", # The current state of the job.

2123

#

2124

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2125

# specified.

2126

#

2127

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2128

# terminal state. After a job has reached a terminal state, no

2129

# further state updates may be made.

2130

#

2131

# This field may be mutated by the Cloud Dataflow service;

2132

# callers cannot mutate it.

2133

"tempFiles": [ # A set of files the system should be aware of that are used

2134

# for temporary storage. These temporary files will be

2135

# removed on job completion.

2136

# No duplicates are allowed.

2137

# No file patterns are supported.

2138

#

2139

# The supported files are:

2140

#

2141

# Google Cloud Storage:

2142

#

2143

# storage.googleapis.com/{bucket}/{object}

2144

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

2145

"A String",

2146

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2147

"type": "A String", # The type of Cloud Dataflow job.

2148

"id": "A String", # The unique ID of this job.

2149

#

2150

# This field is set by the Cloud Dataflow service when the Job is

2151

# created, and is immutable for the life of the job.

2152

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2153

# of the job it replaced.

2154

#

2155

# When sending a `CreateJobRequest`, you can update a job by specifying it

2156

# here. The job named here is stopped, and its intermediate state is

2157

# transferred to this job.

2158

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2159

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2160

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2161

"a_key": { # Contains information about how a particular

2162

# google.dataflow.v1beta3.Step will be executed.

2163

"stepName": [ # The steps associated with the execution stage.

2164

# Note that stages may have several steps, and that a given step

2165

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2166

"A String",

2167

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2168

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2169

},

2170

},

2171

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

],

}</pre>

</div>

<code class="details" id="list_next">list_next(previous_request, previous_response)</code>

2178

<pre>Retrieves the next page of results.

2179

2180

Args:

2181

previous_request: The request for the previous page. (required)

2182

previous_response: The response from the request for the previous page. (required)

2183

2184

Returns:

2185

A request object that you can call 'execute()' on to request the next

2186

page. Returns None if there are no more items in the collection.

</pre>

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2191

<code class="details" id="update">update(projectId, jobId, body, location=None, x__xgafv=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2192

<pre>Updates the state of an existing Cloud Dataflow job.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2193

2194

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2195

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

2196

jobId: string, The job ID. (required)

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2197

body: object, The request body. (required)

2198

The object takes the form of:

2199

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2200

{ # Defines a job to be run by the Cloud Dataflow service.

2201

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2202

# If this field is set, the service will ensure its uniqueness.

2203

# The request to create a job will fail if the service has knowledge of a

2204

# previously submitted job with the same client's ID and job name.

2205

# The caller may use this field to ensure idempotence of job

2206

# creation across retried attempts to create a job.

2207

# By default, the field is empty and, in that case, the service ignores it.

2208

"requestedState": "A String", # The job's requested state.

2209

#

2210

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2211

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2212

# also be used to directly set a job's requested state to

2213

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2214

# job if it has not already reached a terminal state.

2215

"name": "A String", # The user-specified Cloud Dataflow job name.

2216

#

2217

# Only one Job with a given name may exist in a project at any

2218

# given time. If a caller attempts to create a Job with the same

2219

# name as an already-existing Job, the attempt returns the

2220

# existing Job.

2221

#

2222

# The name must match the regular expression

2223

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

2224

"currentStateTime": "A String", # The timestamp associated with the current state.

2225

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2226

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2227

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

2228

"labels": { # User-defined labels for this job.

2229

#

2230

# The labels map can contain no more than 64 entries. Entries of the labels

2231

# map are UTF8 strings that comply with the following restrictions:

2232

#

2233

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2234

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2235

# * Both keys and values are additionally constrained to be <= 128 bytes in

2236

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2237

"a_key": "A String",

2238

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2239

"location": "A String", # The location that contains this job.

2240

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2241

# Cloud Dataflow service.

2242

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2243

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2244

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2245

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2246

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2247

"version": { # A structure describing which components and their versions of the service

2248

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2249

"a_key": "", # Properties of the object.

2250

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2251

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2252

# storage. The system will append the suffix "/temp-{JOBNAME} to

2253

# this resource prefix, where {JOBNAME} is the value of the

2254

# job_name field. The resulting bucket and object prefix is used

2255

# as the prefix of the resources used to store temporary data

2256

# needed during the job execution. NOTE: This will override the

2257

# value in taskrunner_settings.

2258

# The supported resource type is:

2259

#

2260

# Google Cloud Storage:

2261

#

2262

# storage.googleapis.com/{bucket}/{object}

2263

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2264

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2265

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2266

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2267

"dataset": "A String", # The dataset for the current project where various workflow

2268

# related tables are stored.

2269

#

2270

# The supported resource type is:

2271

#

2272

# Google BigQuery:

2273

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2274

"experiments": [ # The list of experiments to enable.

2275

"A String",

2276

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

2277

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2278

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2279

# options are passed through the service and are used to recreate the

2280

# SDK pipeline options on the worker in a language agnostic and platform

2281

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2282

"a_key": "", # Properties of the object.

2283

},

2284

"userAgent": { # A description of the process that generated the request.

2285

"a_key": "", # Properties of the object.

2286

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2287

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2288

# unspecified, the service will attempt to choose a reasonable

2289

# default. This should be in the form of the API service name,

2290

# e.g. "compute.googleapis.com".

2291

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2292

# specified in order for the job to have workers.

2293

{ # Describes one particular pool of Cloud Dataflow workers to be

2294

# instantiated by the Cloud Dataflow service in order to perform the

2295

# computations required by a job. Note that a workflow job may use

2296

# multiple pools, in order to match the various computational

2297

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2298

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2299

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2300

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2301

# are supported.

2302

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2303

# service will attempt to choose a reasonable default.

2304

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2305

# the service will use the network "default".

2306

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2307

# will attempt to choose a reasonable default.

2308

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2309

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2310

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2311

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2312

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2313

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2314

# Compute Engine API.

2315

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2316

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2317

# `TEARDOWN_NEVER`.

2318

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2319

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2320

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2321

# down.

2322

#

2323

# If the workers are not torn down by the service, they will

2324

# continue to run and use Google Compute Engine VM resources in the

2325

# user's project until they are explicitly terminated by the user.

2326

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2327

# policy except for small, manually supervised test jobs.

2328

#

2329

# If unknown or unspecified, the service will attempt to choose a reasonable

2330

# default.

2331

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2332

# service will choose a number of threads (according to the number of cores

2333

# on the selected machine type for batch, or 1 by convention for streaming).

2334

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2335

# the form "regions/REGION/subnetworks/SUBNETWORK".

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2336

"poolArgs": { # Extra arguments for this worker pool.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2337

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2338

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2339

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2340

# execute the job. If zero or unspecified, the service will

2341

# attempt to choose a reasonable default.

2342

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2343

# using the standard Dataflow task runner. Users should ignore

2344

# this field.

2345

"workflowFileName": "A String", # The file to store the workflow in.

2346

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2347

# will not be uploaded.

2348

#

2349

# The supported resource type is:

2350

#

2351

# Google Cloud Storage:

2352

# storage.googleapis.com/{bucket}/{object}

2353

# bucket.storage.googleapis.com/{object}

2354

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

2355

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

2356

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

2357

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2358

"vmId": "A String", # The ID string of the VM.

2359

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2360

# taskrunner; e.g. "wheel".

2361

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2362

# taskrunner; e.g. "root".

2363

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2364

# access the Cloud Dataflow API.

2365

"A String",

2366

],

2367

"languageHint": "A String", # The suggested backend language.

2368

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2369

# console.

2370

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2371

"logDir": "A String", # The directory on the VM to store logs.

2372

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2373

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2374

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2375

# "shuffle/v1beta1".

2376

"workerId": "A String", # The ID of the worker running this pipeline.

2377

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2378

#

2379

# When workers access Google Cloud APIs, they logically do so via

2380

# relative URLs. If this field is specified, it supplies the base

2381

# URL to use for resolving these relative URLs. The normative

2382

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2383

# Locators".

2384

#

2385

# If not specified, the default value is "http://www.googleapis.com/"

2386

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2387

# "dataflow/v1b3/projects".

2388

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2389

# storage.

2390

#

2391

# The supported resource type is:

2392

#

2393

# Google Cloud Storage:

2394

#

2395

# storage.googleapis.com/{bucket}/{object}

2396

# bucket.storage.googleapis.com/{object}

2397

},

2398

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

2399

"harnessCommand": "A String", # The command to launch the worker harness.

2400

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2401

# temporary storage.

2402

#

2403

# The supported resource type is:

2404

#

2405

# Google Cloud Storage:

2406

# storage.googleapis.com/{bucket}/{object}

2407

# bucket.storage.googleapis.com/{object}

2408

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2409

#

2410

# When workers access Google Cloud APIs, they logically do so via

2411

# relative URLs. If this field is specified, it supplies the base

2412

# URL to use for resolving these relative URLs. The normative

2413

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2414

# Locators".

2415

#

2416

# If not specified, the default value is "http://www.googleapis.com/"

2417

},

2418

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

2419

# select a default set of packages which are useful to worker

2420

# harnesses written in a particular language.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2421

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2422

{ # The packages that must be installed in order for a worker to run the

2423

# steps of the Cloud Dataflow job that will be assigned to its worker

2424

# pool.

2425

#

2426

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2427

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2428

# might use this to install jars containing the user's code and all of the

2429

# various dependencies (libraries, data files, etc.) required in order

2430

# for that code to run.

2431

"location": "A String", # The resource to read the package from. The supported resource type is:

2432

#

2433

# Google Cloud Storage:

2434

#

2435

# storage.googleapis.com/{bucket}

2436

# bucket.storage.googleapis.com/

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2437

"name": "A String", # The name of the package.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2438

},

2439

],

2440

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2441

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2442

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2443

},

2444

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2445

{ # Describes the data disk used by a workflow job.

2446

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2447

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2448

# attempt to choose a reasonable default.

2449

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2450

# must be a disk type appropriate to the project and zone in which

2451

# the workers will run. If unknown or unspecified, the service

2452

# will attempt to choose a reasonable default.

2453

#

2454

# For example, the standard persistent disk type is a resource name

2455

# typically ending in "pd-standard". If SSD persistent disks are

2456

# available, the resource name typically ends with "pd-ssd". The

2457

# actual valid values are defined the Google Compute Engine API,

2458

# not by the Cloud Dataflow API; consult the Google Compute Engine

2459

# documentation for more information about determining the set of

2460

# available disk types for a particular project and zone.

2461

#

2462

# Google Compute Engine Disk types are local to a particular

2463

# project in a particular zone, and so the resource name will

2464

# typically look something like this:

2465

#

2466

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2467

},

2468

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2469

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2470

# attempt to choose a reasonable default.

2471

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2472

# harness, residing in Google Container Registry.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2473

},

2474

],

2475

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2476

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2477

# A description of the user pipeline and stages through which it is executed.

2478

# Created by Cloud Dataflow service. Only retrieved with

2479

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2480

# form. This data is provided by the Dataflow service for ease of visualizing

2481

# the pipeline and interpretting Dataflow provided metrics.

2482

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2483

{ # Description of the type, names/ids, and input/outputs for a transform.

2484

"kind": "A String", # Type of transform.

2485

"name": "A String", # User provided name for this transform instance.

2486

"inputCollectionName": [ # User names for all collection inputs to this transform.

2487

"A String",

2488

],

2489

"displayData": [ # Transform-specific display data.

2490

{ # Data provided with a pipeline or transform to provide descriptive info.

2491

"key": "A String", # The key identifying the display data.

2492

# This is intended to be used as a label for the display data

2493

# when viewed in a dax monitoring system.

2494

"shortStrValue": "A String", # A possible additional shorter value to display.

2495

# For example a java_class_name_value of com.mypackage.MyDoFn

2496

# will be stored with MyDoFn as the short_str_value and

2497

# com.mypackage.MyDoFn as the java_class_name value.

2498

# short_str_value can be displayed and java_class_name_value

2499

# will be displayed as a tooltip.

2500

"timestampValue": "A String", # Contains value if the data is of timestamp type.

2501

"url": "A String", # An optional full URL.

2502

"floatValue": 3.14, # Contains value if the data is of float type.

2503

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2504

# language namespace (i.e. python module) which defines the display data.

2505

# This allows a dax monitoring system to specially handle the data

2506

# and perform custom rendering.

2507

"javaClassValue": "A String", # Contains value if the data is of java class type.

2508

"label": "A String", # An optional label to display in a dax UI for the element.

2509

"boolValue": True or False, # Contains value if the data is of a boolean type.

2510

"strValue": "A String", # Contains value if the data is of string type.

2511

"durationValue": "A String", # Contains value if the data is of duration type.

2512

"int64Value": "A String", # Contains value if the data is of int64 type.

2513

},

2514

],

2515

"outputCollectionName": [ # User names for all collection outputs to this transform.

2516

"A String",

2517

],

2518

"id": "A String", # SDK generated id of this transform instance.

2519

},

2520

],

2521

"displayData": [ # Pipeline level display data.

2522

{ # Data provided with a pipeline or transform to provide descriptive info.

2523

"key": "A String", # The key identifying the display data.

2524

# This is intended to be used as a label for the display data

2525

# when viewed in a dax monitoring system.

2526

"shortStrValue": "A String", # A possible additional shorter value to display.

2527

# For example a java_class_name_value of com.mypackage.MyDoFn

2528

# will be stored with MyDoFn as the short_str_value and

2529

# com.mypackage.MyDoFn as the java_class_name value.

2530

# short_str_value can be displayed and java_class_name_value

2531

# will be displayed as a tooltip.

2532

"timestampValue": "A String", # Contains value if the data is of timestamp type.

2533

"url": "A String", # An optional full URL.

2534

"floatValue": 3.14, # Contains value if the data is of float type.

2535

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2536

# language namespace (i.e. python module) which defines the display data.

2537

# This allows a dax monitoring system to specially handle the data

2538

# and perform custom rendering.

2539

"javaClassValue": "A String", # Contains value if the data is of java class type.

2540

"label": "A String", # An optional label to display in a dax UI for the element.

2541

"boolValue": True or False, # Contains value if the data is of a boolean type.

2542

"strValue": "A String", # Contains value if the data is of string type.

2543

"durationValue": "A String", # Contains value if the data is of duration type.

2544

"int64Value": "A String", # Contains value if the data is of int64 type.

2545

},

2546

],

2547

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2548

{ # Description of the composing transforms, names/ids, and input/outputs of a

2549

# stage of execution. Some composing transforms and sources may have been

2550

# generated by the Dataflow service during execution planning.

2551

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2552

{ # Description of an interstitial value between transforms in an execution

2553

# stage.

2554

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2555

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2556

# source is most closely associated.

2557

"name": "A String", # Dataflow service generated name for this source.

2558

},

2559

],

2560

"kind": "A String", # Type of tranform this stage is executing.

2561

"name": "A String", # Dataflow service generated name for this stage.

2562

"outputSource": [ # Output sources for this stage.

2563

{ # Description of an input or output of an execution stage.

2564

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2565

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2566

# source is most closely associated.

2567

"name": "A String", # Dataflow service generated name for this source.

2568

"sizeBytes": "A String", # Size of the source, if measurable.

2569

},

2570

],

2571

"inputSource": [ # Input sources for this stage.

2572

{ # Description of an input or output of an execution stage.

2573

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2574

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2575

# source is most closely associated.

2576

"name": "A String", # Dataflow service generated name for this source.

2577

"sizeBytes": "A String", # Size of the source, if measurable.

2578

},

2579

],

2580

"componentTransform": [ # Transforms that comprise this execution stage.

2581

{ # Description of a transform executed as part of an execution stage.

2582

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2583

"originalTransform": "A String", # User name for the original user transform with which this transform is

2584

# most closely associated.

2585

"name": "A String", # Dataflow service generated name for this source.

2586

},

2587

],

2588

"id": "A String", # Dataflow service generated id for this stage.

2589

},

2590

],

2591

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2592

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2593

{ # Defines a particular step within a Cloud Dataflow job.

2594

#

2595

# A job consists of multiple steps, each of which performs some

2596

# specific operation as part of the overall job. Data is typically

2597

# passed from one step to another as part of the job.

2598

#

2599

# Here's an example of a sequence of steps which together implement a

2600

# Map-Reduce job:

2601

#

2602

# * Read a collection of data from some source, parsing the

2603

# collection's elements.

2604

#

2605

# * Validate the elements.

2606

#

2607

# * Apply a user-defined function to map each element to some value

2608

# and extract an element-specific key value.

2609

#

2610

# * Group elements with the same key into a single element with

2611

# that key, transforming a multiply-keyed collection into a

2612

# uniquely-keyed collection.

2613

#

2614

# * Write the elements out to some data sink.

2615

#

2616

# Note that the Cloud Dataflow service may be used to run many different

2617

# types of jobs, not just Map-Reduce.

2618

"kind": "A String", # The kind of step in the Cloud Dataflow job.

2619

"properties": { # Named properties associated with the step. Each kind of

2620

# predefined step has its own required set of properties.

2621

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2622

"a_key": "", # Properties of the object.

2623

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2624

"name": "A String", # The name that identifies the step. This must be unique for each

2625

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2626

},

2627

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2628

"currentState": "A String", # The current state of the job.

2629

#

2630

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2631

# specified.

2632

#

2633

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2634

# terminal state. After a job has reached a terminal state, no

2635

# further state updates may be made.

2636

#

2637

# This field may be mutated by the Cloud Dataflow service;

2638

# callers cannot mutate it.

2639

"tempFiles": [ # A set of files the system should be aware of that are used

2640

# for temporary storage. These temporary files will be

2641

# removed on job completion.

2642

# No duplicates are allowed.

2643

# No file patterns are supported.

2644

#

2645

# The supported files are:

2646

#

2647

# Google Cloud Storage:

2648

#

2649

# storage.googleapis.com/{bucket}/{object}

2650

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

2651

"A String",

2652

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2653

"type": "A String", # The type of Cloud Dataflow job.

2654

"id": "A String", # The unique ID of this job.

2655

#

2656

# This field is set by the Cloud Dataflow service when the Job is

2657

# created, and is immutable for the life of the job.

2658

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2659

# of the job it replaced.

2660

#

2661

# When sending a `CreateJobRequest`, you can update a job by specifying it

2662

# here. The job named here is stopped, and its intermediate state is

2663

# transferred to this job.

2664

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2665

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2666

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2667

"a_key": { # Contains information about how a particular

2668

# google.dataflow.v1beta3.Step will be executed.

2669

"stepName": [ # The steps associated with the execution stage.

2670

# Note that stages may have several steps, and that a given step

2671

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2672

"A String",

2673

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2674

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2675

},

2676

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2677

}

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2678

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2679

location: string, The location that contains this job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2680

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2681

Allowed values

2682

1 - v1 error format

2683

2 - v2 error format

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2684

2685

Returns:

2686

An object of the form:

2687

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2688

{ # Defines a job to be run by the Cloud Dataflow service.

2689

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2690

# If this field is set, the service will ensure its uniqueness.

2691

# The request to create a job will fail if the service has knowledge of a

2692

# previously submitted job with the same client's ID and job name.

2693

# The caller may use this field to ensure idempotence of job

2694

# creation across retried attempts to create a job.

2695

# By default, the field is empty and, in that case, the service ignores it.

2696

"requestedState": "A String", # The job's requested state.

2697

#

2698

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2699

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2700

# also be used to directly set a job's requested state to

2701

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2702

# job if it has not already reached a terminal state.

2703

"name": "A String", # The user-specified Cloud Dataflow job name.

2704

#

2705

# Only one Job with a given name may exist in a project at any

2706

# given time. If a caller attempts to create a Job with the same

2707

# name as an already-existing Job, the attempt returns the

2708

# existing Job.

2709

#

2710

# The name must match the regular expression

2711

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

2712

"currentStateTime": "A String", # The timestamp associated with the current state.

2713

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2714

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2715

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

2716

"labels": { # User-defined labels for this job.

2717

#

2718

# The labels map can contain no more than 64 entries. Entries of the labels

2719

# map are UTF8 strings that comply with the following restrictions:

2720

#

2721

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2722

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2723

# * Both keys and values are additionally constrained to be <= 128 bytes in

2724

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2725

"a_key": "A String",

2726

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2727

"location": "A String", # The location that contains this job.

2728

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2729

# Cloud Dataflow service.

2730

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2731

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2732

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2733

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2734

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2735

"version": { # A structure describing which components and their versions of the service

2736

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2737

"a_key": "", # Properties of the object.

2738

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2739

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2740

# storage. The system will append the suffix "/temp-{JOBNAME} to

2741

# this resource prefix, where {JOBNAME} is the value of the

2742

# job_name field. The resulting bucket and object prefix is used

2743

# as the prefix of the resources used to store temporary data

2744

# needed during the job execution. NOTE: This will override the

2745

# value in taskrunner_settings.

2746

# The supported resource type is:

2747

#

2748

# Google Cloud Storage:

2749

#

2750

# storage.googleapis.com/{bucket}/{object}

2751

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2752

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2753

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2754

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2755

"dataset": "A String", # The dataset for the current project where various workflow

2756

# related tables are stored.

2757

#

2758

# The supported resource type is:

2759

#

2760

# Google BigQuery:

2761

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2762

"experiments": [ # The list of experiments to enable.

2763

"A String",

2764

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

2765

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2766

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2767

# options are passed through the service and are used to recreate the

2768

# SDK pipeline options on the worker in a language agnostic and platform

2769

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2770

"a_key": "", # Properties of the object.

2771

},

2772

"userAgent": { # A description of the process that generated the request.

2773

"a_key": "", # Properties of the object.

2774

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2775

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2776

# unspecified, the service will attempt to choose a reasonable

2777

# default. This should be in the form of the API service name,

2778

# e.g. "compute.googleapis.com".

2779

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2780

# specified in order for the job to have workers.

2781

{ # Describes one particular pool of Cloud Dataflow workers to be

2782

# instantiated by the Cloud Dataflow service in order to perform the

2783

# computations required by a job. Note that a workflow job may use

2784

# multiple pools, in order to match the various computational

2785

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2786

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2787

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2788

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2789

# are supported.

2790

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2791

# service will attempt to choose a reasonable default.

2792

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2793

# the service will use the network "default".

2794

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2795

# will attempt to choose a reasonable default.

2796

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2797

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2798

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2799

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2800

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2801

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2802

# Compute Engine API.

2803

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2804

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2805

# `TEARDOWN_NEVER`.

2806

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2807

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2808

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2809

# down.

2810

#

2811

# If the workers are not torn down by the service, they will

2812

# continue to run and use Google Compute Engine VM resources in the

2813

# user's project until they are explicitly terminated by the user.

2814

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2815

# policy except for small, manually supervised test jobs.

2816

#

2817

# If unknown or unspecified, the service will attempt to choose a reasonable

2818

# default.

2819

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2820

# service will choose a number of threads (according to the number of cores

2821

# on the selected machine type for batch, or 1 by convention for streaming).

2822

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2823

# the form "regions/REGION/subnetworks/SUBNETWORK".

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2824

"poolArgs": { # Extra arguments for this worker pool.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2825

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2826

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2827

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2828

# execute the job. If zero or unspecified, the service will

2829

# attempt to choose a reasonable default.

2830

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2831

# using the standard Dataflow task runner. Users should ignore

2832

# this field.

2833

"workflowFileName": "A String", # The file to store the workflow in.

2834

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2835

# will not be uploaded.

2836

#

2837

# The supported resource type is:

2838

#

2839

# Google Cloud Storage:

2840

# storage.googleapis.com/{bucket}/{object}

2841

# bucket.storage.googleapis.com/{object}

2842

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

2843

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

2844

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

2845

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2846

"vmId": "A String", # The ID string of the VM.

2847

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2848

# taskrunner; e.g. "wheel".

2849

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2850

# taskrunner; e.g. "root".

2851

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2852

# access the Cloud Dataflow API.

2853

"A String",

2854

],

2855

"languageHint": "A String", # The suggested backend language.

2856

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2857

# console.

2858

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2859

"logDir": "A String", # The directory on the VM to store logs.

2860

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2861

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2862

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2863

# "shuffle/v1beta1".

2864

"workerId": "A String", # The ID of the worker running this pipeline.

2865

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2866

#

2867

# When workers access Google Cloud APIs, they logically do so via

2868

# relative URLs. If this field is specified, it supplies the base

2869

# URL to use for resolving these relative URLs. The normative

2870

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2871

# Locators".

2872

#

2873

# If not specified, the default value is "http://www.googleapis.com/"

2874

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2875

# "dataflow/v1b3/projects".

2876

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2877

# storage.

2878

#

2879

# The supported resource type is:

2880

#

2881

# Google Cloud Storage:

2882

#

2883

# storage.googleapis.com/{bucket}/{object}

2884

# bucket.storage.googleapis.com/{object}

2885

},

2886

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

2887

"harnessCommand": "A String", # The command to launch the worker harness.

2888

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2889

# temporary storage.

2890

#

2891

# The supported resource type is:

2892

#

2893

# Google Cloud Storage:

2894

# storage.googleapis.com/{bucket}/{object}

2895

# bucket.storage.googleapis.com/{object}

2896

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2897

#

2898

# When workers access Google Cloud APIs, they logically do so via

2899

# relative URLs. If this field is specified, it supplies the base

2900

# URL to use for resolving these relative URLs. The normative

2901

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2902

# Locators".

2903

#

2904

# If not specified, the default value is "http://www.googleapis.com/"

2905

},

2906

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

2907

# select a default set of packages which are useful to worker

2908

# harnesses written in a particular language.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2909

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2910

{ # The packages that must be installed in order for a worker to run the

2911

# steps of the Cloud Dataflow job that will be assigned to its worker

2912

# pool.

2913

#

2914

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2915

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2916

# might use this to install jars containing the user's code and all of the

2917

# various dependencies (libraries, data files, etc.) required in order

2918

# for that code to run.

2919

"location": "A String", # The resource to read the package from. The supported resource type is:

2920

#

2921

# Google Cloud Storage:

2922

#

2923

# storage.googleapis.com/{bucket}

2924

# bucket.storage.googleapis.com/

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2925

"name": "A String", # The name of the package.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2926

},

2927

],

2928

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2929

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2930

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2931

},

2932

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2933

{ # Describes the data disk used by a workflow job.

2934

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2935

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2936

# attempt to choose a reasonable default.

2937

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2938

# must be a disk type appropriate to the project and zone in which

2939

# the workers will run. If unknown or unspecified, the service

2940

# will attempt to choose a reasonable default.

2941

#

2942

# For example, the standard persistent disk type is a resource name

2943

# typically ending in "pd-standard". If SSD persistent disks are

2944

# available, the resource name typically ends with "pd-ssd". The

2945

# actual valid values are defined the Google Compute Engine API,

2946

# not by the Cloud Dataflow API; consult the Google Compute Engine

2947

# documentation for more information about determining the set of

2948

# available disk types for a particular project and zone.

2949

#

2950

# Google Compute Engine Disk types are local to a particular

2951

# project in a particular zone, and so the resource name will

2952

# typically look something like this:

2953

#

2954

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2955

},

2956

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2957

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2958

# attempt to choose a reasonable default.

2959

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2960

# harness, residing in Google Container Registry.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2961

},

2962

],

2963

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

2964

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2965

# A description of the user pipeline and stages through which it is executed.

2966

# Created by Cloud Dataflow service. Only retrieved with

2967

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2968

# form. This data is provided by the Dataflow service for ease of visualizing

2969

# the pipeline and interpretting Dataflow provided metrics.

2970

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2971

{ # Description of the type, names/ids, and input/outputs for a transform.

2972

"kind": "A String", # Type of transform.

2973

"name": "A String", # User provided name for this transform instance.

2974

"inputCollectionName": [ # User names for all collection inputs to this transform.

2975

"A String",

2976

],

2977

"displayData": [ # Transform-specific display data.

2978

{ # Data provided with a pipeline or transform to provide descriptive info.

2979

"key": "A String", # The key identifying the display data.

2980

# This is intended to be used as a label for the display data

2981

# when viewed in a dax monitoring system.

2982

"shortStrValue": "A String", # A possible additional shorter value to display.

2983

# For example a java_class_name_value of com.mypackage.MyDoFn

2984

# will be stored with MyDoFn as the short_str_value and

2985

# com.mypackage.MyDoFn as the java_class_name value.

2986

# short_str_value can be displayed and java_class_name_value

2987

# will be displayed as a tooltip.

2988

"timestampValue": "A String", # Contains value if the data is of timestamp type.

2989

"url": "A String", # An optional full URL.

2990

"floatValue": 3.14, # Contains value if the data is of float type.

2991

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2992

# language namespace (i.e. python module) which defines the display data.

2993

# This allows a dax monitoring system to specially handle the data

2994

# and perform custom rendering.

2995

"javaClassValue": "A String", # Contains value if the data is of java class type.

2996

"label": "A String", # An optional label to display in a dax UI for the element.

2997

"boolValue": True or False, # Contains value if the data is of a boolean type.

2998

"strValue": "A String", # Contains value if the data is of string type.

2999

"durationValue": "A String", # Contains value if the data is of duration type.

3000

"int64Value": "A String", # Contains value if the data is of int64 type.

3001

},

3002

],

3003

"outputCollectionName": [ # User names for all collection outputs to this transform.

3004

"A String",

3005

],

3006

"id": "A String", # SDK generated id of this transform instance.

3007

},

3008

],

3009

"displayData": [ # Pipeline level display data.

3010

{ # Data provided with a pipeline or transform to provide descriptive info.

3011

"key": "A String", # The key identifying the display data.

3012

# This is intended to be used as a label for the display data

3013

# when viewed in a dax monitoring system.

3014

"shortStrValue": "A String", # A possible additional shorter value to display.

3015

# For example a java_class_name_value of com.mypackage.MyDoFn

3016

# will be stored with MyDoFn as the short_str_value and

3017

# com.mypackage.MyDoFn as the java_class_name value.

3018

# short_str_value can be displayed and java_class_name_value

3019

# will be displayed as a tooltip.

3020

"timestampValue": "A String", # Contains value if the data is of timestamp type.

3021

"url": "A String", # An optional full URL.

3022

"floatValue": 3.14, # Contains value if the data is of float type.

3023

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

3024

# language namespace (i.e. python module) which defines the display data.

3025

# This allows a dax monitoring system to specially handle the data

3026

# and perform custom rendering.

3027

"javaClassValue": "A String", # Contains value if the data is of java class type.

3028

"label": "A String", # An optional label to display in a dax UI for the element.

3029

"boolValue": True or False, # Contains value if the data is of a boolean type.

3030

"strValue": "A String", # Contains value if the data is of string type.

3031

"durationValue": "A String", # Contains value if the data is of duration type.

3032

"int64Value": "A String", # Contains value if the data is of int64 type.

3033

},

3034

],

3035

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

3036

{ # Description of the composing transforms, names/ids, and input/outputs of a

3037

# stage of execution. Some composing transforms and sources may have been

3038

# generated by the Dataflow service during execution planning.

3039

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

3040

{ # Description of an interstitial value between transforms in an execution

3041

# stage.

3042

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3043

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3044

# source is most closely associated.

3045

"name": "A String", # Dataflow service generated name for this source.

3046

},

3047

],

3048

"kind": "A String", # Type of tranform this stage is executing.

3049

"name": "A String", # Dataflow service generated name for this stage.

3050

"outputSource": [ # Output sources for this stage.

3051

{ # Description of an input or output of an execution stage.

3052

"userName": "A String", # Human-readable name for this source; may be user or system generated.

3053

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3054

# source is most closely associated.

3055

"name": "A String", # Dataflow service generated name for this source.

3056

"sizeBytes": "A String", # Size of the source, if measurable.

3057

},

3058

],

3059

"inputSource": [ # Input sources for this stage.

3060

{ # Description of an input or output of an execution stage.

3061

"userName": "A String", # Human-readable name for this source; may be user or system generated.

3062

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3063

# source is most closely associated.

3064

"name": "A String", # Dataflow service generated name for this source.

3065

"sizeBytes": "A String", # Size of the source, if measurable.

3066

},

3067

],

3068

"componentTransform": [ # Transforms that comprise this execution stage.

3069

{ # Description of a transform executed as part of an execution stage.

3070

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3071

"originalTransform": "A String", # User name for the original user transform with which this transform is

3072

# most closely associated.

3073

"name": "A String", # Dataflow service generated name for this source.

3074

},

3075

],

3076

"id": "A String", # Dataflow service generated id for this stage.

3077

},

3078

],

3079

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3080

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

3081

{ # Defines a particular step within a Cloud Dataflow job.

3082

#

3083

# A job consists of multiple steps, each of which performs some

3084

# specific operation as part of the overall job. Data is typically

3085

# passed from one step to another as part of the job.

3086

#

3087

# Here's an example of a sequence of steps which together implement a

3088

# Map-Reduce job:

3089

#

3090

# * Read a collection of data from some source, parsing the

3091

# collection's elements.

3092

#

3093

# * Validate the elements.

3094

#

3095

# * Apply a user-defined function to map each element to some value

3096

# and extract an element-specific key value.

3097

#

3098

# * Group elements with the same key into a single element with

3099

# that key, transforming a multiply-keyed collection into a

3100

# uniquely-keyed collection.

3101

#

3102

# * Write the elements out to some data sink.

3103

#

3104

# Note that the Cloud Dataflow service may be used to run many different

3105

# types of jobs, not just Map-Reduce.

3106

"kind": "A String", # The kind of step in the Cloud Dataflow job.

3107

"properties": { # Named properties associated with the step. Each kind of

3108

# predefined step has its own required set of properties.

3109

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3110

"a_key": "", # Properties of the object.

3111

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

3112

"name": "A String", # The name that identifies the step. This must be unique for each

3113

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3114

},

3115

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

3116

"currentState": "A String", # The current state of the job.

3117

#

3118

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

3119

# specified.

3120

#

3121

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

3122

# terminal state. After a job has reached a terminal state, no

3123

# further state updates may be made.

3124

#

3125

# This field may be mutated by the Cloud Dataflow service;

3126

# callers cannot mutate it.

3127

"tempFiles": [ # A set of files the system should be aware of that are used

3128

# for temporary storage. These temporary files will be

3129

# removed on job completion.

3130

# No duplicates are allowed.

3131

# No file patterns are supported.

3132

#

3133

# The supported files are:

3134

#

3135

# Google Cloud Storage:

3136

#

3137

# storage.googleapis.com/{bucket}/{object}

3138

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

3139

"A String",

3140

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

3141

"type": "A String", # The type of Cloud Dataflow job.

3142

"id": "A String", # The unique ID of this job.

3143

#

3144

# This field is set by the Cloud Dataflow service when the Job is

3145

# created, and is immutable for the life of the job.

3146

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

3147

# of the job it replaced.

3148

#

3149

# When sending a `CreateJobRequest`, you can update a job by specifying it

3150

# here. The job named here is stopped, and its intermediate state is

3151

# transferred to this job.

3152

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

3153

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3154

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame^]

3155

"a_key": { # Contains information about how a particular

3156

# google.dataflow.v1beta3.Step will be executed.

3157

"stepName": [ # The steps associated with the execution stage.

3158

# Note that stages may have several steps, and that a given step

3159

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

3160

"A String",

3161

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

3162

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

3163

},

3164

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3165

}</pre>

Nathaniel Manista