Blame - docs/dyn/dataflow_v1b3.projects.locations.jobs.html - platform/external/python/google-api-python-client

<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.jobs.html">jobs</a></h1>

76

<h2>Instance Methods</h2>

77

78

<code><a href="dataflow_v1b3.projects.locations.jobs.messages.html">messages()</a></code>

79

</p>

80

<p class="firstline">Returns the messages Resource.</p>

81

82

83

<code><a href="dataflow_v1b3.projects.locations.jobs.workItems.html">workItems()</a></code>

84

</p>

85

<p class="firstline">Returns the workItems Resource.</p>

86

87

88

<code><a href="#create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

89

<p class="firstline">Creates a Cloud Dataflow job.</p>

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

90

91

<code><a href="#get">get(projectId, location, jobId, x__xgafv=None, view=None)</a></code></p>

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

92

<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

93

94

<code><a href="#getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</a></code></p>

95

<p class="firstline">Request the job status.</p>

96

97

<code><a href="#list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</a></code></p>

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

98

<p class="firstline">List the jobs of a project.</p>

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

99

100

<code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>

101

<p class="firstline">Retrieves the next page of results.</p>

102

103

<code><a href="#update">update(projectId, location, jobId, body, x__xgafv=None)</a></code></p>

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

104

<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

105

<h3>Method Details</h3>

106

107

<code class="details" id="create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</code>

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

108

<pre>Creates a Cloud Dataflow job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

109

110

Args:

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

111

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

112

location: string, The location that contains this job. (required)

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

113

body: object, The request body. (required)

114

The object takes the form of:

115

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

116

{ # Defines a job to be run by the Cloud Dataflow service.

117

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

118

# If this field is set, the service will ensure its uniqueness.

119

# The request to create a job will fail if the service has knowledge of a

120

# previously submitted job with the same client's ID and job name.

121

# The caller may use this field to ensure idempotence of job

122

# creation across retried attempts to create a job.

123

# By default, the field is empty and, in that case, the service ignores it.

124

"requestedState": "A String", # The job's requested state.

125

#

126

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

127

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

128

# also be used to directly set a job's requested state to

129

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

130

# job if it has not already reached a terminal state.

131

"name": "A String", # The user-specified Cloud Dataflow job name.

132

#

133

# Only one Job with a given name may exist in a project at any

134

# given time. If a caller attempts to create a Job with the same

135

# name as an already-existing Job, the attempt returns the

136

# existing Job.

137

#

138

# The name must match the regular expression

139

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

140

"currentStateTime": "A String", # The timestamp associated with the current state.

141

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

142

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

143

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

144

"labels": { # User-defined labels for this job.

145

#

146

# The labels map can contain no more than 64 entries. Entries of the labels

147

# map are UTF8 strings that comply with the following restrictions:

148

#

149

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

150

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

151

# * Both keys and values are additionally constrained to be <= 128 bytes in

152

# size.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

153

"a_key": "A String",

154

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

155

"location": "A String", # The location that contains this job.

156

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

157

# Cloud Dataflow service.

158

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

159

# corresponding name prefixes of the new job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

160

"a_key": "A String",

161

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

162

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

163

"version": { # A structure describing which components and their versions of the service

164

# are required in order to run the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

165

"a_key": "", # Properties of the object.

166

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

167

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

168

# storage. The system will append the suffix "/temp-{JOBNAME} to

169

# this resource prefix, where {JOBNAME} is the value of the

170

# job_name field. The resulting bucket and object prefix is used

171

# as the prefix of the resources used to store temporary data

172

# needed during the job execution. NOTE: This will override the

173

# value in taskrunner_settings.

174

# The supported resource type is:

175

#

176

# Google Cloud Storage:

177

#

178

# storage.googleapis.com/{bucket}/{object}

179

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

180

"internalExperiments": { # Experimental settings.

181

"a_key": "", # Properties of the object. Contains field @type with type URL.

182

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

183

"dataset": "A String", # The dataset for the current project where various workflow

184

# related tables are stored.

185

#

186

# The supported resource type is:

187

#

188

# Google BigQuery:

189

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

190

"experiments": [ # The list of experiments to enable.

191

"A String",

192

],

193

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

194

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

195

# options are passed through the service and are used to recreate the

196

# SDK pipeline options on the worker in a language agnostic and platform

197

# independent way.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

198

"a_key": "", # Properties of the object.

199

},

200

"userAgent": { # A description of the process that generated the request.

201

"a_key": "", # Properties of the object.

202

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

203

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

204

# unspecified, the service will attempt to choose a reasonable

205

# default. This should be in the form of the API service name,

206

# e.g. "compute.googleapis.com".

207

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

208

# specified in order for the job to have workers.

209

{ # Describes one particular pool of Cloud Dataflow workers to be

210

# instantiated by the Cloud Dataflow service in order to perform the

211

# computations required by a job. Note that a workflow job may use

212

# multiple pools, in order to match the various computational

213

# requirements of the various stages of the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

214

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

215

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

216

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

217

# are supported.

218

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

219

# service will attempt to choose a reasonable default.

220

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

221

# the service will use the network "default".

222

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

223

# will attempt to choose a reasonable default.

224

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

225

# attempt to choose a reasonable default.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

226

"metadata": { # Metadata to set on the Google Compute Engine VMs.

227

"a_key": "A String",

228

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

229

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

230

# Compute Engine API.

231

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

232

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

233

# `TEARDOWN_NEVER`.

234

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

235

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

236

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

237

# down.

238

#

239

# If the workers are not torn down by the service, they will

240

# continue to run and use Google Compute Engine VM resources in the

241

# user's project until they are explicitly terminated by the user.

242

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

243

# policy except for small, manually supervised test jobs.

244

#

245

# If unknown or unspecified, the service will attempt to choose a reasonable

246

# default.

247

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

248

# service will choose a number of threads (according to the number of cores

249

# on the selected machine type for batch, or 1 by convention for streaming).

250

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

251

# the form "regions/REGION/subnetworks/SUBNETWORK".

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

252

"poolArgs": { # Extra arguments for this worker pool.

253

"a_key": "", # Properties of the object. Contains field @type with type URL.

254

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

255

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

256

# execute the job. If zero or unspecified, the service will

257

# attempt to choose a reasonable default.

258

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

259

# using the standard Dataflow task runner. Users should ignore

260

# this field.

261

"workflowFileName": "A String", # The file to store the workflow in.

262

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

263

# will not be uploaded.

264

#

265

# The supported resource type is:

266

#

267

# Google Cloud Storage:

268

# storage.googleapis.com/{bucket}/{object}

269

# bucket.storage.googleapis.com/{object}

270

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

271

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

272

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

273

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

274

"vmId": "A String", # The ID string of the VM.

275

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

276

# taskrunner; e.g. "wheel".

277

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

278

# taskrunner; e.g. "root".

279

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

280

# access the Cloud Dataflow API.

281

"A String",

282

],

283

"languageHint": "A String", # The suggested backend language.

284

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

285

# console.

286

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

287

"logDir": "A String", # The directory on the VM to store logs.

288

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

289

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

290

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

291

# "shuffle/v1beta1".

292

"workerId": "A String", # The ID of the worker running this pipeline.

293

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

294

#

295

# When workers access Google Cloud APIs, they logically do so via

296

# relative URLs. If this field is specified, it supplies the base

297

# URL to use for resolving these relative URLs. The normative

298

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

299

# Locators".

300

#

301

# If not specified, the default value is "http://www.googleapis.com/"

302

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

303

# "dataflow/v1b3/projects".

304

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

305

# storage.

306

#

307

# The supported resource type is:

308

#

309

# Google Cloud Storage:

310

#

311

# storage.googleapis.com/{bucket}/{object}

312

# bucket.storage.googleapis.com/{object}

313

},

314

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

315

"harnessCommand": "A String", # The command to launch the worker harness.

316

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

317

# temporary storage.

318

#

319

# The supported resource type is:

320

#

321

# Google Cloud Storage:

322

# storage.googleapis.com/{bucket}/{object}

323

# bucket.storage.googleapis.com/{object}

324

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

325

#

326

# When workers access Google Cloud APIs, they logically do so via

327

# relative URLs. If this field is specified, it supplies the base

328

# URL to use for resolving these relative URLs. The normative

329

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

330

# Locators".

331

#

332

# If not specified, the default value is "http://www.googleapis.com/"

333

},

334

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

335

# select a default set of packages which are useful to worker

336

# harnesses written in a particular language.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

337

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

338

{ # The packages that must be installed in order for a worker to run the

339

# steps of the Cloud Dataflow job that will be assigned to its worker

340

# pool.

341

#

342

# This is the mechanism by which the Cloud Dataflow SDK causes code to

343

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

344

# might use this to install jars containing the user's code and all of the

345

# various dependencies (libraries, data files, etc.) required in order

346

# for that code to run.

347

"location": "A String", # The resource to read the package from. The supported resource type is:

348

#

349

# Google Cloud Storage:

350

#

351

# storage.googleapis.com/{bucket}

352

# bucket.storage.googleapis.com/

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

353

"name": "A String", # The name of the package.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

354

},

355

],

356

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

357

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

358

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

359

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

360

"dataDisks": [ # Data disks that are used by a VM in this workflow.

361

{ # Describes the data disk used by a workflow job.

362

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

363

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

364

# attempt to choose a reasonable default.

365

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

366

# must be a disk type appropriate to the project and zone in which

367

# the workers will run. If unknown or unspecified, the service

368

# will attempt to choose a reasonable default.

369

#

370

# For example, the standard persistent disk type is a resource name

371

# typically ending in "pd-standard". If SSD persistent disks are

372

# available, the resource name typically ends with "pd-ssd". The

373

# actual valid values are defined the Google Compute Engine API,

374

# not by the Cloud Dataflow API; consult the Google Compute Engine

375

# documentation for more information about determining the set of

376

# available disk types for a particular project and zone.

377

#

378

# Google Compute Engine Disk types are local to a particular

379

# project in a particular zone, and so the resource name will

380

# typically look something like this:

381

#

382

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

383

},

384

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

385

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

386

# attempt to choose a reasonable default.

387

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

388

# harness, residing in Google Container Registry.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

389

},

390

],

391

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

392

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

393

# A description of the user pipeline and stages through which it is executed.

394

# Created by Cloud Dataflow service. Only retrieved with

395

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

396

# form. This data is provided by the Dataflow service for ease of visualizing

397

# the pipeline and interpretting Dataflow provided metrics.

398

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

399

{ # Description of the type, names/ids, and input/outputs for a transform.

400

"kind": "A String", # Type of transform.

401

"name": "A String", # User provided name for this transform instance.

402

"inputCollectionName": [ # User names for all collection inputs to this transform.

403

"A String",

404

],

405

"displayData": [ # Transform-specific display data.

406

{ # Data provided with a pipeline or transform to provide descriptive info.

407

"key": "A String", # The key identifying the display data.

408

# This is intended to be used as a label for the display data

409

# when viewed in a dax monitoring system.

410

"shortStrValue": "A String", # A possible additional shorter value to display.

411

# For example a java_class_name_value of com.mypackage.MyDoFn

412

# will be stored with MyDoFn as the short_str_value and

413

# com.mypackage.MyDoFn as the java_class_name value.

414

# short_str_value can be displayed and java_class_name_value

415

# will be displayed as a tooltip.

416

"timestampValue": "A String", # Contains value if the data is of timestamp type.

417

"url": "A String", # An optional full URL.

418

"floatValue": 3.14, # Contains value if the data is of float type.

419

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

420

# language namespace (i.e. python module) which defines the display data.

421

# This allows a dax monitoring system to specially handle the data

422

# and perform custom rendering.

423

"javaClassValue": "A String", # Contains value if the data is of java class type.

424

"label": "A String", # An optional label to display in a dax UI for the element.

425

"boolValue": True or False, # Contains value if the data is of a boolean type.

426

"strValue": "A String", # Contains value if the data is of string type.

427

"durationValue": "A String", # Contains value if the data is of duration type.

428

"int64Value": "A String", # Contains value if the data is of int64 type.

429

},

430

],

431

"outputCollectionName": [ # User names for all collection outputs to this transform.

432

"A String",

433

],

434

"id": "A String", # SDK generated id of this transform instance.

435

},

436

],

437

"displayData": [ # Pipeline level display data.

438

{ # Data provided with a pipeline or transform to provide descriptive info.

439

"key": "A String", # The key identifying the display data.

440

# This is intended to be used as a label for the display data

441

# when viewed in a dax monitoring system.

442

"shortStrValue": "A String", # A possible additional shorter value to display.

443

# For example a java_class_name_value of com.mypackage.MyDoFn

444

# will be stored with MyDoFn as the short_str_value and

445

# com.mypackage.MyDoFn as the java_class_name value.

446

# short_str_value can be displayed and java_class_name_value

447

# will be displayed as a tooltip.

448

"timestampValue": "A String", # Contains value if the data is of timestamp type.

449

"url": "A String", # An optional full URL.

450

"floatValue": 3.14, # Contains value if the data is of float type.

451

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

452

# language namespace (i.e. python module) which defines the display data.

453

# This allows a dax monitoring system to specially handle the data

454

# and perform custom rendering.

455

"javaClassValue": "A String", # Contains value if the data is of java class type.

456

"label": "A String", # An optional label to display in a dax UI for the element.

457

"boolValue": True or False, # Contains value if the data is of a boolean type.

458

"strValue": "A String", # Contains value if the data is of string type.

459

"durationValue": "A String", # Contains value if the data is of duration type.

460

"int64Value": "A String", # Contains value if the data is of int64 type.

461

},

462

],

463

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

464

{ # Description of the composing transforms, names/ids, and input/outputs of a

465

# stage of execution. Some composing transforms and sources may have been

466

# generated by the Dataflow service during execution planning.

467

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

468

{ # Description of an interstitial value between transforms in an execution

469

# stage.

470

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

471

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

472

# source is most closely associated.

473

"name": "A String", # Dataflow service generated name for this source.

474

},

475

],

476

"kind": "A String", # Type of tranform this stage is executing.

477

"name": "A String", # Dataflow service generated name for this stage.

478

"outputSource": [ # Output sources for this stage.

479

{ # Description of an input or output of an execution stage.

480

"userName": "A String", # Human-readable name for this source; may be user or system generated.

481

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

482

# source is most closely associated.

483

"name": "A String", # Dataflow service generated name for this source.

484

"sizeBytes": "A String", # Size of the source, if measurable.

485

},

486

],

487

"inputSource": [ # Input sources for this stage.

488

{ # Description of an input or output of an execution stage.

489

"userName": "A String", # Human-readable name for this source; may be user or system generated.

490

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

491

# source is most closely associated.

492

"name": "A String", # Dataflow service generated name for this source.

493

"sizeBytes": "A String", # Size of the source, if measurable.

494

},

495

],

496

"componentTransform": [ # Transforms that comprise this execution stage.

497

{ # Description of a transform executed as part of an execution stage.

498

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

499

"originalTransform": "A String", # User name for the original user transform with which this transform is

500

# most closely associated.

501

"name": "A String", # Dataflow service generated name for this source.

502

},

503

],

504

"id": "A String", # Dataflow service generated id for this stage.

505

},

506

],

507

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

508

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

509

{ # Defines a particular step within a Cloud Dataflow job.

510

#

511

# A job consists of multiple steps, each of which performs some

512

# specific operation as part of the overall job. Data is typically

513

# passed from one step to another as part of the job.

514

#

515

# Here's an example of a sequence of steps which together implement a

516

# Map-Reduce job:

517

#

518

# * Read a collection of data from some source, parsing the

519

# collection's elements.

520

#

521

# * Validate the elements.

522

#

523

# * Apply a user-defined function to map each element to some value

524

# and extract an element-specific key value.

525

#

526

# * Group elements with the same key into a single element with

527

# that key, transforming a multiply-keyed collection into a

528

# uniquely-keyed collection.

529

#

530

# * Write the elements out to some data sink.

531

#

532

# Note that the Cloud Dataflow service may be used to run many different

533

# types of jobs, not just Map-Reduce.

534

"kind": "A String", # The kind of step in the Cloud Dataflow job.

535

"properties": { # Named properties associated with the step. Each kind of

536

# predefined step has its own required set of properties.

537

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

538

"a_key": "", # Properties of the object.

539

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

540

"name": "A String", # The name that identifies the step. This must be unique for each

541

# step with respect to all other steps in the Cloud Dataflow job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

542

},

543

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

544

"currentState": "A String", # The current state of the job.

545

#

546

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

547

# specified.

548

#

549

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

550

# terminal state. After a job has reached a terminal state, no

551

# further state updates may be made.

552

#

553

# This field may be mutated by the Cloud Dataflow service;

554

# callers cannot mutate it.

555

"tempFiles": [ # A set of files the system should be aware of that are used

556

# for temporary storage. These temporary files will be

557

# removed on job completion.

558

# No duplicates are allowed.

559

# No file patterns are supported.

560

#

561

# The supported files are:

562

#

563

# Google Cloud Storage:

564

#

565

# storage.googleapis.com/{bucket}/{object}

566

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

567

"A String",

568

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

569

"type": "A String", # The type of Cloud Dataflow job.

570

"id": "A String", # The unique ID of this job.

571

#

572

# This field is set by the Cloud Dataflow service when the Job is

573

# created, and is immutable for the life of the job.

574

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

575

# of the job it replaced.

576

#

577

# When sending a `CreateJobRequest`, you can update a job by specifying it

578

# here. The job named here is stopped, and its intermediate state is

579

# transferred to this job.

580

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

581

# isn't contained in the submitted job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

582

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

583

"a_key": { # Contains information about how a particular

584

# google.dataflow.v1beta3.Step will be executed.

585

"stepName": [ # The steps associated with the execution stage.

586

# Note that stages may have several steps, and that a given step

587

# might be run by more than one stage.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

}

x__xgafv: string, V1 error format.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

Allowed values

1 - v1 error format

2 - v2 error format

replaceJobId: string, Deprecated. This field is now in the Job message.

600

view: string, The level of information requested in response.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

601

602

Returns:

603

An object of the form:

604

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

605

{ # Defines a job to be run by the Cloud Dataflow service.

606

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

607

# If this field is set, the service will ensure its uniqueness.

608

# The request to create a job will fail if the service has knowledge of a

609

# previously submitted job with the same client's ID and job name.

610

# The caller may use this field to ensure idempotence of job

611

# creation across retried attempts to create a job.

612

# By default, the field is empty and, in that case, the service ignores it.

613

"requestedState": "A String", # The job's requested state.

614

#

615

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

616

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

617

# also be used to directly set a job's requested state to

618

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

619

# job if it has not already reached a terminal state.

620

"name": "A String", # The user-specified Cloud Dataflow job name.

621

#

622

# Only one Job with a given name may exist in a project at any

623

# given time. If a caller attempts to create a Job with the same

624

# name as an already-existing Job, the attempt returns the

625

# existing Job.

626

#

627

# The name must match the regular expression

628

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

629

"currentStateTime": "A String", # The timestamp associated with the current state.

630

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

631

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

632

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

633

"labels": { # User-defined labels for this job.

634

#

635

# The labels map can contain no more than 64 entries. Entries of the labels

636

# map are UTF8 strings that comply with the following restrictions:

637

#

638

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

639

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

640

# * Both keys and values are additionally constrained to be <= 128 bytes in

641

# size.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

642

"a_key": "A String",

643

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

644

"location": "A String", # The location that contains this job.

645

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

646

# Cloud Dataflow service.

647

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

648

# corresponding name prefixes of the new job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

649

"a_key": "A String",

650

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

651

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

652

"version": { # A structure describing which components and their versions of the service

653

# are required in order to run the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

654

"a_key": "", # Properties of the object.

655

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

656

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

657

# storage. The system will append the suffix "/temp-{JOBNAME} to

658

# this resource prefix, where {JOBNAME} is the value of the

659

# job_name field. The resulting bucket and object prefix is used

660

# as the prefix of the resources used to store temporary data

661

# needed during the job execution. NOTE: This will override the

662

# value in taskrunner_settings.

663

# The supported resource type is:

664

#

665

# Google Cloud Storage:

666

#

667

# storage.googleapis.com/{bucket}/{object}

668

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

669

"internalExperiments": { # Experimental settings.

670

"a_key": "", # Properties of the object. Contains field @type with type URL.

671

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

672

"dataset": "A String", # The dataset for the current project where various workflow

673

# related tables are stored.

674

#

675

# The supported resource type is:

676

#

677

# Google BigQuery:

678

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

679

"experiments": [ # The list of experiments to enable.

680

"A String",

681

],

682

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

683

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

684

# options are passed through the service and are used to recreate the

685

# SDK pipeline options on the worker in a language agnostic and platform

686

# independent way.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

687

"a_key": "", # Properties of the object.

688

},

689

"userAgent": { # A description of the process that generated the request.

690

"a_key": "", # Properties of the object.

691

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

692

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

693

# unspecified, the service will attempt to choose a reasonable

694

# default. This should be in the form of the API service name,

695

# e.g. "compute.googleapis.com".

696

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

697

# specified in order for the job to have workers.

698

{ # Describes one particular pool of Cloud Dataflow workers to be

699

# instantiated by the Cloud Dataflow service in order to perform the

700

# computations required by a job. Note that a workflow job may use

701

# multiple pools, in order to match the various computational

702

# requirements of the various stages of the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

703

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

704

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

705

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

706

# are supported.

707

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

708

# service will attempt to choose a reasonable default.

709

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

710

# the service will use the network "default".

711

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

712

# will attempt to choose a reasonable default.

713

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

714

# attempt to choose a reasonable default.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

715

"metadata": { # Metadata to set on the Google Compute Engine VMs.

716

"a_key": "A String",

717

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

718

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

719

# Compute Engine API.

720

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

721

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

722

# `TEARDOWN_NEVER`.

723

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

724

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

725

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

726

# down.

727

#

728

# If the workers are not torn down by the service, they will

729

# continue to run and use Google Compute Engine VM resources in the

730

# user's project until they are explicitly terminated by the user.

731

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

732

# policy except for small, manually supervised test jobs.

733

#

734

# If unknown or unspecified, the service will attempt to choose a reasonable

735

# default.

736

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

737

# service will choose a number of threads (according to the number of cores

738

# on the selected machine type for batch, or 1 by convention for streaming).

739

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

740

# the form "regions/REGION/subnetworks/SUBNETWORK".

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

741

"poolArgs": { # Extra arguments for this worker pool.

742

"a_key": "", # Properties of the object. Contains field @type with type URL.

743

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

744

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

745

# execute the job. If zero or unspecified, the service will

746

# attempt to choose a reasonable default.

747

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

748

# using the standard Dataflow task runner. Users should ignore

749

# this field.

750

"workflowFileName": "A String", # The file to store the workflow in.

751

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

752

# will not be uploaded.

753

#

754

# The supported resource type is:

755

#

756

# Google Cloud Storage:

757

# storage.googleapis.com/{bucket}/{object}

758

# bucket.storage.googleapis.com/{object}

759

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

760

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

761

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

762

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

763

"vmId": "A String", # The ID string of the VM.

764

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

765

# taskrunner; e.g. "wheel".

766

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

767

# taskrunner; e.g. "root".

768

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

769

# access the Cloud Dataflow API.

770

"A String",

771

],

772

"languageHint": "A String", # The suggested backend language.

773

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

774

# console.

775

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

776

"logDir": "A String", # The directory on the VM to store logs.

777

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

778

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

779

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

780

# "shuffle/v1beta1".

781

"workerId": "A String", # The ID of the worker running this pipeline.

782

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

783

#

784

# When workers access Google Cloud APIs, they logically do so via

785

# relative URLs. If this field is specified, it supplies the base

786

# URL to use for resolving these relative URLs. The normative

787

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

788

# Locators".

789

#

790

# If not specified, the default value is "http://www.googleapis.com/"

791

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

792

# "dataflow/v1b3/projects".

793

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

794

# storage.

795

#

796

# The supported resource type is:

797

#

798

# Google Cloud Storage:

799

#

800

# storage.googleapis.com/{bucket}/{object}

801

# bucket.storage.googleapis.com/{object}

802

},

803

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

804

"harnessCommand": "A String", # The command to launch the worker harness.

805

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

806

# temporary storage.

807

#

808

# The supported resource type is:

809

#

810

# Google Cloud Storage:

811

# storage.googleapis.com/{bucket}/{object}

812

# bucket.storage.googleapis.com/{object}

813

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

814

#

815

# When workers access Google Cloud APIs, they logically do so via

816

# relative URLs. If this field is specified, it supplies the base

817

# URL to use for resolving these relative URLs. The normative

818

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

819

# Locators".

820

#

821

# If not specified, the default value is "http://www.googleapis.com/"

822

},

823

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

824

# select a default set of packages which are useful to worker

825

# harnesses written in a particular language.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

826

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

827

{ # The packages that must be installed in order for a worker to run the

828

# steps of the Cloud Dataflow job that will be assigned to its worker

829

# pool.

830

#

831

# This is the mechanism by which the Cloud Dataflow SDK causes code to

832

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

833

# might use this to install jars containing the user's code and all of the

834

# various dependencies (libraries, data files, etc.) required in order

835

# for that code to run.

836

"location": "A String", # The resource to read the package from. The supported resource type is:

837

#

838

# Google Cloud Storage:

839

#

840

# storage.googleapis.com/{bucket}

841

# bucket.storage.googleapis.com/

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

842

"name": "A String", # The name of the package.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

843

},

844

],

845

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

846

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

847

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

848

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

849

"dataDisks": [ # Data disks that are used by a VM in this workflow.

850

{ # Describes the data disk used by a workflow job.

851

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

852

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

853

# attempt to choose a reasonable default.

854

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

855

# must be a disk type appropriate to the project and zone in which

856

# the workers will run. If unknown or unspecified, the service

857

# will attempt to choose a reasonable default.

858

#

859

# For example, the standard persistent disk type is a resource name

860

# typically ending in "pd-standard". If SSD persistent disks are

861

# available, the resource name typically ends with "pd-ssd". The

862

# actual valid values are defined the Google Compute Engine API,

863

# not by the Cloud Dataflow API; consult the Google Compute Engine

864

# documentation for more information about determining the set of

865

# available disk types for a particular project and zone.

866

#

867

# Google Compute Engine Disk types are local to a particular

868

# project in a particular zone, and so the resource name will

869

# typically look something like this:

870

#

871

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

872

},

873

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

874

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

875

# attempt to choose a reasonable default.

876

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

877

# harness, residing in Google Container Registry.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

878

},

879

],

880

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

881

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

882

# A description of the user pipeline and stages through which it is executed.

883

# Created by Cloud Dataflow service. Only retrieved with

884

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

885

# form. This data is provided by the Dataflow service for ease of visualizing

886

# the pipeline and interpretting Dataflow provided metrics.

887

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

888

{ # Description of the type, names/ids, and input/outputs for a transform.

889

"kind": "A String", # Type of transform.

890

"name": "A String", # User provided name for this transform instance.

891

"inputCollectionName": [ # User names for all collection inputs to this transform.

892

"A String",

893

],

894

"displayData": [ # Transform-specific display data.

895

{ # Data provided with a pipeline or transform to provide descriptive info.

896

"key": "A String", # The key identifying the display data.

897

# This is intended to be used as a label for the display data

898

# when viewed in a dax monitoring system.

899

"shortStrValue": "A String", # A possible additional shorter value to display.

900

# For example a java_class_name_value of com.mypackage.MyDoFn

901

# will be stored with MyDoFn as the short_str_value and

902

# com.mypackage.MyDoFn as the java_class_name value.

903

# short_str_value can be displayed and java_class_name_value

904

# will be displayed as a tooltip.

905

"timestampValue": "A String", # Contains value if the data is of timestamp type.

906

"url": "A String", # An optional full URL.

907

"floatValue": 3.14, # Contains value if the data is of float type.

908

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

909

# language namespace (i.e. python module) which defines the display data.

910

# This allows a dax monitoring system to specially handle the data

911

# and perform custom rendering.

912

"javaClassValue": "A String", # Contains value if the data is of java class type.

913

"label": "A String", # An optional label to display in a dax UI for the element.

914

"boolValue": True or False, # Contains value if the data is of a boolean type.

915

"strValue": "A String", # Contains value if the data is of string type.

916

"durationValue": "A String", # Contains value if the data is of duration type.

917

"int64Value": "A String", # Contains value if the data is of int64 type.

918

},

919

],

920

"outputCollectionName": [ # User names for all collection outputs to this transform.

921

"A String",

922

],

923

"id": "A String", # SDK generated id of this transform instance.

924

},

925

],

926

"displayData": [ # Pipeline level display data.

927

{ # Data provided with a pipeline or transform to provide descriptive info.

928

"key": "A String", # The key identifying the display data.

929

# This is intended to be used as a label for the display data

930

# when viewed in a dax monitoring system.

931

"shortStrValue": "A String", # A possible additional shorter value to display.

932

# For example a java_class_name_value of com.mypackage.MyDoFn

933

# will be stored with MyDoFn as the short_str_value and

934

# com.mypackage.MyDoFn as the java_class_name value.

935

# short_str_value can be displayed and java_class_name_value

936

# will be displayed as a tooltip.

937

"timestampValue": "A String", # Contains value if the data is of timestamp type.

938

"url": "A String", # An optional full URL.

939

"floatValue": 3.14, # Contains value if the data is of float type.

940

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

941

# language namespace (i.e. python module) which defines the display data.

942

# This allows a dax monitoring system to specially handle the data

943

# and perform custom rendering.

944

"javaClassValue": "A String", # Contains value if the data is of java class type.

945

"label": "A String", # An optional label to display in a dax UI for the element.

946

"boolValue": True or False, # Contains value if the data is of a boolean type.

947

"strValue": "A String", # Contains value if the data is of string type.

948

"durationValue": "A String", # Contains value if the data is of duration type.

949

"int64Value": "A String", # Contains value if the data is of int64 type.

950

},

951

],

952

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

953

{ # Description of the composing transforms, names/ids, and input/outputs of a

954

# stage of execution. Some composing transforms and sources may have been

955

# generated by the Dataflow service during execution planning.

956

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

957

{ # Description of an interstitial value between transforms in an execution

958

# stage.

959

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

960

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

961

# source is most closely associated.

962

"name": "A String", # Dataflow service generated name for this source.

963

},

964

],

965

"kind": "A String", # Type of tranform this stage is executing.

966

"name": "A String", # Dataflow service generated name for this stage.

967

"outputSource": [ # Output sources for this stage.

968

{ # Description of an input or output of an execution stage.

969

"userName": "A String", # Human-readable name for this source; may be user or system generated.

970

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

971

# source is most closely associated.

972

"name": "A String", # Dataflow service generated name for this source.

973

"sizeBytes": "A String", # Size of the source, if measurable.

974

},

975

],

976

"inputSource": [ # Input sources for this stage.

977

{ # Description of an input or output of an execution stage.

978

"userName": "A String", # Human-readable name for this source; may be user or system generated.

979

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

980

# source is most closely associated.

981

"name": "A String", # Dataflow service generated name for this source.

982

"sizeBytes": "A String", # Size of the source, if measurable.

983

},

984

],

985

"componentTransform": [ # Transforms that comprise this execution stage.

986

{ # Description of a transform executed as part of an execution stage.

987

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

988

"originalTransform": "A String", # User name for the original user transform with which this transform is

989

# most closely associated.

990

"name": "A String", # Dataflow service generated name for this source.

991

},

992

],

993

"id": "A String", # Dataflow service generated id for this stage.

994

},

995

],

996

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

997

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

998

{ # Defines a particular step within a Cloud Dataflow job.

999

#

1000

# A job consists of multiple steps, each of which performs some

1001

# specific operation as part of the overall job. Data is typically

1002

# passed from one step to another as part of the job.

1003

#

1004

# Here's an example of a sequence of steps which together implement a

1005

# Map-Reduce job:

1006

#

1007

# * Read a collection of data from some source, parsing the

1008

# collection's elements.

1009

#

1010

# * Validate the elements.

1011

#

1012

# * Apply a user-defined function to map each element to some value

1013

# and extract an element-specific key value.

1014

#

1015

# * Group elements with the same key into a single element with

1016

# that key, transforming a multiply-keyed collection into a

1017

# uniquely-keyed collection.

1018

#

1019

# * Write the elements out to some data sink.

1020

#

1021

# Note that the Cloud Dataflow service may be used to run many different

1022

# types of jobs, not just Map-Reduce.

1023

"kind": "A String", # The kind of step in the Cloud Dataflow job.

1024

"properties": { # Named properties associated with the step. Each kind of

1025

# predefined step has its own required set of properties.

1026

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1027

"a_key": "", # Properties of the object.

1028

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1029

"name": "A String", # The name that identifies the step. This must be unique for each

1030

# step with respect to all other steps in the Cloud Dataflow job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1031

},

1032

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1033

"currentState": "A String", # The current state of the job.

1034

#

1035

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1036

# specified.

1037

#

1038

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1039

# terminal state. After a job has reached a terminal state, no

1040

# further state updates may be made.

1041

#

1042

# This field may be mutated by the Cloud Dataflow service;

1043

# callers cannot mutate it.

1044

"tempFiles": [ # A set of files the system should be aware of that are used

1045

# for temporary storage. These temporary files will be

1046

# removed on job completion.

1047

# No duplicates are allowed.

1048

# No file patterns are supported.

1049

#

1050

# The supported files are:

1051

#

1052

# Google Cloud Storage:

1053

#

1054

# storage.googleapis.com/{bucket}/{object}

1055

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1056

"A String",

1057

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1058

"type": "A String", # The type of Cloud Dataflow job.

1059

"id": "A String", # The unique ID of this job.

1060

#

1061

# This field is set by the Cloud Dataflow service when the Job is

1062

# created, and is immutable for the life of the job.

1063

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1064

# of the job it replaced.

1065

#

1066

# When sending a `CreateJobRequest`, you can update a job by specifying it

1067

# here. The job named here is stopped, and its intermediate state is

1068

# transferred to this job.

1069

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1070

# isn't contained in the submitted job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1071

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1072

"a_key": { # Contains information about how a particular

1073

# google.dataflow.v1beta3.Step will be executed.

1074

"stepName": [ # The steps associated with the execution stage.

1075

# Note that stages may have several steps, and that a given step

1076

# might be run by more than one stage.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

}</pre>

</div>

<code class="details" id="get">get(projectId, location, jobId, x__xgafv=None, view=None)</code>

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1087

<pre>Gets the state of the specified Cloud Dataflow job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1088

1089

Args:

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1090

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

1091

location: string, The location that contains this job. (required)

1092

jobId: string, The job ID. (required)

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1093

x__xgafv: string, V1 error format.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

Allowed values

1 - v1 error format

2 - v2 error format

view: string, The level of information requested in response.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1098

1099

Returns:

1100

An object of the form:

1101

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1102

{ # Defines a job to be run by the Cloud Dataflow service.

1103

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1104

# If this field is set, the service will ensure its uniqueness.

1105

# The request to create a job will fail if the service has knowledge of a

1106

# previously submitted job with the same client's ID and job name.

1107

# The caller may use this field to ensure idempotence of job

1108

# creation across retried attempts to create a job.

1109

# By default, the field is empty and, in that case, the service ignores it.

1110

"requestedState": "A String", # The job's requested state.

1111

#

1112

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1113

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1114

# also be used to directly set a job's requested state to

1115

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1116

# job if it has not already reached a terminal state.

1117

"name": "A String", # The user-specified Cloud Dataflow job name.

1118

#

1119

# Only one Job with a given name may exist in a project at any

1120

# given time. If a caller attempts to create a Job with the same

1121

# name as an already-existing Job, the attempt returns the

1122

# existing Job.

1123

#

1124

# The name must match the regular expression

1125

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1126

"currentStateTime": "A String", # The timestamp associated with the current state.

1127

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1128

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1129

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1130

"labels": { # User-defined labels for this job.

1131

#

1132

# The labels map can contain no more than 64 entries. Entries of the labels

1133

# map are UTF8 strings that comply with the following restrictions:

1134

#

1135

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1136

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1137

# * Both keys and values are additionally constrained to be <= 128 bytes in

1138

# size.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1139

"a_key": "A String",

1140

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1141

"location": "A String", # The location that contains this job.

1142

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1143

# Cloud Dataflow service.

1144

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1145

# corresponding name prefixes of the new job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1146

"a_key": "A String",

1147

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1148

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1149

"version": { # A structure describing which components and their versions of the service

1150

# are required in order to run the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1151

"a_key": "", # Properties of the object.

1152

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1153

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1154

# storage. The system will append the suffix "/temp-{JOBNAME} to

1155

# this resource prefix, where {JOBNAME} is the value of the

1156

# job_name field. The resulting bucket and object prefix is used

1157

# as the prefix of the resources used to store temporary data

1158

# needed during the job execution. NOTE: This will override the

1159

# value in taskrunner_settings.

1160

# The supported resource type is:

1161

#

1162

# Google Cloud Storage:

1163

#

1164

# storage.googleapis.com/{bucket}/{object}

1165

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1166

"internalExperiments": { # Experimental settings.

1167

"a_key": "", # Properties of the object. Contains field @type with type URL.

1168

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1169

"dataset": "A String", # The dataset for the current project where various workflow

1170

# related tables are stored.

1171

#

1172

# The supported resource type is:

1173

#

1174

# Google BigQuery:

1175

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1176

"experiments": [ # The list of experiments to enable.

1177

"A String",

1178

],

1179

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1180

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1181

# options are passed through the service and are used to recreate the

1182

# SDK pipeline options on the worker in a language agnostic and platform

1183

# independent way.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1184

"a_key": "", # Properties of the object.

1185

},

1186

"userAgent": { # A description of the process that generated the request.

1187

"a_key": "", # Properties of the object.

1188

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1189

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1190

# unspecified, the service will attempt to choose a reasonable

1191

# default. This should be in the form of the API service name,

1192

# e.g. "compute.googleapis.com".

1193

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1194

# specified in order for the job to have workers.

1195

{ # Describes one particular pool of Cloud Dataflow workers to be

1196

# instantiated by the Cloud Dataflow service in order to perform the

1197

# computations required by a job. Note that a workflow job may use

1198

# multiple pools, in order to match the various computational

1199

# requirements of the various stages of the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1200

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1201

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1202

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1203

# are supported.

1204

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1205

# service will attempt to choose a reasonable default.

1206

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1207

# the service will use the network "default".

1208

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1209

# will attempt to choose a reasonable default.

1210

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1211

# attempt to choose a reasonable default.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1212

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1213

"a_key": "A String",

1214

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1215

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1216

# Compute Engine API.

1217

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1218

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1219

# `TEARDOWN_NEVER`.

1220

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1221

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1222

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1223

# down.

1224

#

1225

# If the workers are not torn down by the service, they will

1226

# continue to run and use Google Compute Engine VM resources in the

1227

# user's project until they are explicitly terminated by the user.

1228

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1229

# policy except for small, manually supervised test jobs.

1230

#

1231

# If unknown or unspecified, the service will attempt to choose a reasonable

1232

# default.

1233

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1234

# service will choose a number of threads (according to the number of cores

1235

# on the selected machine type for batch, or 1 by convention for streaming).

1236

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1237

# the form "regions/REGION/subnetworks/SUBNETWORK".

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1238

"poolArgs": { # Extra arguments for this worker pool.

1239

"a_key": "", # Properties of the object. Contains field @type with type URL.

1240

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1241

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1242

# execute the job. If zero or unspecified, the service will

1243

# attempt to choose a reasonable default.

1244

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1245

# using the standard Dataflow task runner. Users should ignore

1246

# this field.

1247

"workflowFileName": "A String", # The file to store the workflow in.

1248

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1249

# will not be uploaded.

1250

#

1251

# The supported resource type is:

1252

#

1253

# Google Cloud Storage:

1254

# storage.googleapis.com/{bucket}/{object}

1255

# bucket.storage.googleapis.com/{object}

1256

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1257

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1258

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

1259

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1260

"vmId": "A String", # The ID string of the VM.

1261

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1262

# taskrunner; e.g. "wheel".

1263

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1264

# taskrunner; e.g. "root".

1265

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1266

# access the Cloud Dataflow API.

1267

"A String",

1268

],

1269

"languageHint": "A String", # The suggested backend language.

1270

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1271

# console.

1272

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1273

"logDir": "A String", # The directory on the VM to store logs.

1274

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1275

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1276

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1277

# "shuffle/v1beta1".

1278

"workerId": "A String", # The ID of the worker running this pipeline.

1279

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1280

#

1281

# When workers access Google Cloud APIs, they logically do so via

1282

# relative URLs. If this field is specified, it supplies the base

1283

# URL to use for resolving these relative URLs. The normative

1284

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1285

# Locators".

1286

#

1287

# If not specified, the default value is "http://www.googleapis.com/"

1288

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1289

# "dataflow/v1b3/projects".

1290

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1291

# storage.

1292

#

1293

# The supported resource type is:

1294

#

1295

# Google Cloud Storage:

1296

#

1297

# storage.googleapis.com/{bucket}/{object}

1298

# bucket.storage.googleapis.com/{object}

1299

},

1300

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

1301

"harnessCommand": "A String", # The command to launch the worker harness.

1302

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1303

# temporary storage.

1304

#

1305

# The supported resource type is:

1306

#

1307

# Google Cloud Storage:

1308

# storage.googleapis.com/{bucket}/{object}

1309

# bucket.storage.googleapis.com/{object}

1310

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1311

#

1312

# When workers access Google Cloud APIs, they logically do so via

1313

# relative URLs. If this field is specified, it supplies the base

1314

# URL to use for resolving these relative URLs. The normative

1315

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1316

# Locators".

1317

#

1318

# If not specified, the default value is "http://www.googleapis.com/"

1319

},

1320

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1321

# select a default set of packages which are useful to worker

1322

# harnesses written in a particular language.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1323

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1324

{ # The packages that must be installed in order for a worker to run the

1325

# steps of the Cloud Dataflow job that will be assigned to its worker

1326

# pool.

1327

#

1328

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1329

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1330

# might use this to install jars containing the user's code and all of the

1331

# various dependencies (libraries, data files, etc.) required in order

1332

# for that code to run.

1333

"location": "A String", # The resource to read the package from. The supported resource type is:

1334

#

1335

# Google Cloud Storage:

1336

#

1337

# storage.googleapis.com/{bucket}

1338

# bucket.storage.googleapis.com/

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1339

"name": "A String", # The name of the package.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1340

},

1341

],

1342

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1343

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1344

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1345

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1346

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1347

{ # Describes the data disk used by a workflow job.

1348

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1349

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1350

# attempt to choose a reasonable default.

1351

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1352

# must be a disk type appropriate to the project and zone in which

1353

# the workers will run. If unknown or unspecified, the service

1354

# will attempt to choose a reasonable default.

1355

#

1356

# For example, the standard persistent disk type is a resource name

1357

# typically ending in "pd-standard". If SSD persistent disks are

1358

# available, the resource name typically ends with "pd-ssd". The

1359

# actual valid values are defined the Google Compute Engine API,

1360

# not by the Cloud Dataflow API; consult the Google Compute Engine

1361

# documentation for more information about determining the set of

1362

# available disk types for a particular project and zone.

1363

#

1364

# Google Compute Engine Disk types are local to a particular

1365

# project in a particular zone, and so the resource name will

1366

# typically look something like this:

1367

#

1368

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1369

},

1370

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1371

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1372

# attempt to choose a reasonable default.

1373

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1374

# harness, residing in Google Container Registry.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1375

},

1376

],

1377

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1378

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1379

# A description of the user pipeline and stages through which it is executed.

1380

# Created by Cloud Dataflow service. Only retrieved with

1381

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1382

# form. This data is provided by the Dataflow service for ease of visualizing

1383

# the pipeline and interpretting Dataflow provided metrics.

1384

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1385

{ # Description of the type, names/ids, and input/outputs for a transform.

1386

"kind": "A String", # Type of transform.

1387

"name": "A String", # User provided name for this transform instance.

1388

"inputCollectionName": [ # User names for all collection inputs to this transform.

1389

"A String",

1390

],

1391

"displayData": [ # Transform-specific display data.

1392

{ # Data provided with a pipeline or transform to provide descriptive info.

1393

"key": "A String", # The key identifying the display data.

1394

# This is intended to be used as a label for the display data

1395

# when viewed in a dax monitoring system.

1396

"shortStrValue": "A String", # A possible additional shorter value to display.

1397

# For example a java_class_name_value of com.mypackage.MyDoFn

1398

# will be stored with MyDoFn as the short_str_value and

1399

# com.mypackage.MyDoFn as the java_class_name value.

1400

# short_str_value can be displayed and java_class_name_value

1401

# will be displayed as a tooltip.

1402

"timestampValue": "A String", # Contains value if the data is of timestamp type.

1403

"url": "A String", # An optional full URL.

1404

"floatValue": 3.14, # Contains value if the data is of float type.

1405

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1406

# language namespace (i.e. python module) which defines the display data.

1407

# This allows a dax monitoring system to specially handle the data

1408

# and perform custom rendering.

1409

"javaClassValue": "A String", # Contains value if the data is of java class type.

1410

"label": "A String", # An optional label to display in a dax UI for the element.

1411

"boolValue": True or False, # Contains value if the data is of a boolean type.

1412

"strValue": "A String", # Contains value if the data is of string type.

1413

"durationValue": "A String", # Contains value if the data is of duration type.

1414

"int64Value": "A String", # Contains value if the data is of int64 type.

1415

},

1416

],

1417

"outputCollectionName": [ # User names for all collection outputs to this transform.

1418

"A String",

1419

],

1420

"id": "A String", # SDK generated id of this transform instance.

1421

},

1422

],

1423

"displayData": [ # Pipeline level display data.

1424

{ # Data provided with a pipeline or transform to provide descriptive info.

1425

"key": "A String", # The key identifying the display data.

1426

# This is intended to be used as a label for the display data

1427

# when viewed in a dax monitoring system.

1428

"shortStrValue": "A String", # A possible additional shorter value to display.

1429

# For example a java_class_name_value of com.mypackage.MyDoFn

1430

# will be stored with MyDoFn as the short_str_value and

1431

# com.mypackage.MyDoFn as the java_class_name value.

1432

# short_str_value can be displayed and java_class_name_value

1433

# will be displayed as a tooltip.

1434

"timestampValue": "A String", # Contains value if the data is of timestamp type.

1435

"url": "A String", # An optional full URL.

1436

"floatValue": 3.14, # Contains value if the data is of float type.

1437

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1438

# language namespace (i.e. python module) which defines the display data.

1439

# This allows a dax monitoring system to specially handle the data

1440

# and perform custom rendering.

1441

"javaClassValue": "A String", # Contains value if the data is of java class type.

1442

"label": "A String", # An optional label to display in a dax UI for the element.

1443

"boolValue": True or False, # Contains value if the data is of a boolean type.

1444

"strValue": "A String", # Contains value if the data is of string type.

1445

"durationValue": "A String", # Contains value if the data is of duration type.

1446

"int64Value": "A String", # Contains value if the data is of int64 type.

1447

},

1448

],

1449

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

1450

{ # Description of the composing transforms, names/ids, and input/outputs of a

1451

# stage of execution. Some composing transforms and sources may have been

1452

# generated by the Dataflow service during execution planning.

1453

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

1454

{ # Description of an interstitial value between transforms in an execution

1455

# stage.

1456

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1457

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1458

# source is most closely associated.

1459

"name": "A String", # Dataflow service generated name for this source.

1460

},

1461

],

1462

"kind": "A String", # Type of tranform this stage is executing.

1463

"name": "A String", # Dataflow service generated name for this stage.

1464

"outputSource": [ # Output sources for this stage.

1465

{ # Description of an input or output of an execution stage.

1466

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1467

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1468

# source is most closely associated.

1469

"name": "A String", # Dataflow service generated name for this source.

1470

"sizeBytes": "A String", # Size of the source, if measurable.

1471

},

1472

],

1473

"inputSource": [ # Input sources for this stage.

1474

{ # Description of an input or output of an execution stage.

1475

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1476

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1477

# source is most closely associated.

1478

"name": "A String", # Dataflow service generated name for this source.

1479

"sizeBytes": "A String", # Size of the source, if measurable.

1480

},

1481

],

1482

"componentTransform": [ # Transforms that comprise this execution stage.

1483

{ # Description of a transform executed as part of an execution stage.

1484

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1485

"originalTransform": "A String", # User name for the original user transform with which this transform is

1486

# most closely associated.

1487

"name": "A String", # Dataflow service generated name for this source.

1488

},

1489

],

1490

"id": "A String", # Dataflow service generated id for this stage.

1491

},

1492

],

1493

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1494

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1495

{ # Defines a particular step within a Cloud Dataflow job.

1496

#

1497

# A job consists of multiple steps, each of which performs some

1498

# specific operation as part of the overall job. Data is typically

1499

# passed from one step to another as part of the job.

1500

#

1501

# Here's an example of a sequence of steps which together implement a

1502

# Map-Reduce job:

1503

#

1504

# * Read a collection of data from some source, parsing the

1505

# collection's elements.

1506

#

1507

# * Validate the elements.

1508

#

1509

# * Apply a user-defined function to map each element to some value

1510

# and extract an element-specific key value.

1511

#

1512

# * Group elements with the same key into a single element with

1513

# that key, transforming a multiply-keyed collection into a

1514

# uniquely-keyed collection.

1515

#

1516

# * Write the elements out to some data sink.

1517

#

1518

# Note that the Cloud Dataflow service may be used to run many different

1519

# types of jobs, not just Map-Reduce.

1520

"kind": "A String", # The kind of step in the Cloud Dataflow job.

1521

"properties": { # Named properties associated with the step. Each kind of

1522

# predefined step has its own required set of properties.

1523

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1524

"a_key": "", # Properties of the object.

1525

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1526

"name": "A String", # The name that identifies the step. This must be unique for each

1527

# step with respect to all other steps in the Cloud Dataflow job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1528

},

1529

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1530

"currentState": "A String", # The current state of the job.

1531

#

1532

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1533

# specified.

1534

#

1535

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1536

# terminal state. After a job has reached a terminal state, no

1537

# further state updates may be made.

1538

#

1539

# This field may be mutated by the Cloud Dataflow service;

1540

# callers cannot mutate it.

1541

"tempFiles": [ # A set of files the system should be aware of that are used

1542

# for temporary storage. These temporary files will be

1543

# removed on job completion.

1544

# No duplicates are allowed.

1545

# No file patterns are supported.

1546

#

1547

# The supported files are:

1548

#

1549

# Google Cloud Storage:

1550

#

1551

# storage.googleapis.com/{bucket}/{object}

1552

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1553

"A String",

1554

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1555

"type": "A String", # The type of Cloud Dataflow job.

1556

"id": "A String", # The unique ID of this job.

1557

#

1558

# This field is set by the Cloud Dataflow service when the Job is

1559

# created, and is immutable for the life of the job.

1560

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1561

# of the job it replaced.

1562

#

1563

# When sending a `CreateJobRequest`, you can update a job by specifying it

1564

# here. The job named here is stopped, and its intermediate state is

1565

# transferred to this job.

1566

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1567

# isn't contained in the submitted job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1568

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1569

"a_key": { # Contains information about how a particular

1570

# google.dataflow.v1beta3.Step will be executed.

1571

"stepName": [ # The steps associated with the execution stage.

1572

# Note that stages may have several steps, and that a given step

1573

# might be run by more than one stage.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

}</pre>

</div>

<code class="details" id="getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</code>

1584

<pre>Request the job status.

1585

1586

Args:

1587

projectId: string, A project id. (required)

1588

location: string, The location which contains the job specified by job_id. (required)

1589

jobId: string, The job to get messages for. (required)

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1590

startTime: string, Return only metric data that has changed since this time.

1591

Default is to return all information about all metrics for the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1592

x__xgafv: string, V1 error format.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1593

Allowed values

1594

1 - v1 error format

1595

2 - v2 error format

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1596

1597

Returns:

1598

An object of the form:

1599

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1600

{ # JobMetrics contains a collection of metrics descibing the detailed progress

1601

# of a Dataflow job. Metrics correspond to user-defined and system-defined

1602

# metrics in the job.

1603

#

1604

# This resource captures only the most recent values of each metric;

1605

# time-series data can be queried for them (under the same metric names)

1606

# from Cloud Monitoring.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1607

"metrics": [ # All metrics for this job.

1608

{ # Describes the state of a metric.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1609

"meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1610

# This holds the count of the aggregated values and is used in combination

1611

# with mean_sum above to obtain the actual mean aggregate value.

1612

# The only possible value type is Long.

1613

"kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are

1614

# "Sum", "Max", "Min", "Mean", "Set", "And", and "Or".

1615

# The specified aggregation kind is case-insensitive.

1616

#

1617

# If omitted, this is not an aggregated value but instead

1618

# a single metric sample value.

1619

"set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only

1620

# possible value type is a list of Values whose type can be Long, Double,

1621

# or String, according to the metric's type. All Values in the list must

1622

# be of the same type.

1623

"name": { # Identifies a metric, by describing the source which generated the # Name of the metric.

1624

# metric.

1625

"origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;

1626

# will be "dataflow" for metrics defined by the Dataflow service or SDK.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1627

"name": "A String", # Worker-defined metric name.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1628

"context": { # Zero or more labeled fields which identify the part of the job this

1629

# metric is associated with, such as the name of a step or collection.

1630

#

1631

# For example, built-in counters associated with steps will have

1632

# context['step'] = <step-name>. Counters associated with PCollections

1633

# in the SDK will have context['pcollection'] = <pcollection-name>.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1634

"a_key": "A String",

1635

},

1636

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1637

"meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1638

# This holds the sum of the aggregated values and is used in combination

1639

# with mean_count below to obtain the actual mean aggregate value.

1640

# The only possible value types are Long and Double.

1641

"cumulative": True or False, # True if this metric is reported as the total cumulative aggregate

1642

# value accumulated since the worker started working on this WorkItem.

1643

# By default this is false, indicating that this metric is reported

1644

# as a delta that is not associated with any WorkItem.

1645

"updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are

1646

# reporting work progress; it will be filled in responses from the

1647

# metrics API.

1648

"scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",

1649

# "And", and "Or". The possible value types are Long, Double, and Boolean.

1650

"internal": "", # Worker-computed aggregate value for internal use by the Dataflow

1651

# service.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1652

},

1653

],

1654

"metricTime": "A String", # Timestamp as of which metric values are current.

}</pre>

</div>

<code class="details" id="list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</code>

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1660

<pre>List the jobs of a project.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1661

1662

Args:

1663

projectId: string, The project which owns the jobs. (required)

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1664

location: string, The location that contains this job. (required)

1665

pageSize: integer, If there are many jobs, limit response to at most this many.

1666

The actual number of jobs returned will be the lesser of max_responses

1667

and an unspecified server-defined limit.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1668

x__xgafv: string, V1 error format.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

Allowed values

1 - v1 error format

2 - v2 error format

pageToken: string, Set this to the 'next_page_token' field of a previous response

1673

to request additional results in a long list.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1674

filter: string, The kind of filter to use.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1675

view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1676

1677

Returns:

1678

An object of the form:

1679

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1680

{ # Response to a request to list Cloud Dataflow jobs. This may be a partial

1681

# response, depending on the page size in the ListJobsRequest.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1682

"nextPageToken": "A String", # Set if there may be more results than fit in this response.

1683

"failedLocation": [ # Zero or more messages describing locations that failed to respond.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1684

{ # Indicates which location failed to respond to a request for data.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1685

"name": "A String", # The name of the failed location.

1686

},

1687

],

1688

"jobs": [ # A subset of the requested job information.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1689

{ # Defines a job to be run by the Cloud Dataflow service.

1690

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1691

# If this field is set, the service will ensure its uniqueness.

1692

# The request to create a job will fail if the service has knowledge of a

1693

# previously submitted job with the same client's ID and job name.

1694

# The caller may use this field to ensure idempotence of job

1695

# creation across retried attempts to create a job.

1696

# By default, the field is empty and, in that case, the service ignores it.

1697

"requestedState": "A String", # The job's requested state.

1698

#

1699

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1700

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1701

# also be used to directly set a job's requested state to

1702

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1703

# job if it has not already reached a terminal state.

1704

"name": "A String", # The user-specified Cloud Dataflow job name.

1705

#

1706

# Only one Job with a given name may exist in a project at any

1707

# given time. If a caller attempts to create a Job with the same

1708

# name as an already-existing Job, the attempt returns the

1709

# existing Job.

1710

#

1711

# The name must match the regular expression

1712

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1713

"currentStateTime": "A String", # The timestamp associated with the current state.

1714

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1715

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1716

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1717

"labels": { # User-defined labels for this job.

1718

#

1719

# The labels map can contain no more than 64 entries. Entries of the labels

1720

# map are UTF8 strings that comply with the following restrictions:

1721

#

1722

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1723

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1724

# * Both keys and values are additionally constrained to be <= 128 bytes in

1725

# size.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1726

"a_key": "A String",

1727

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1728

"location": "A String", # The location that contains this job.

1729

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1730

# Cloud Dataflow service.

1731

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1732

# corresponding name prefixes of the new job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1733

"a_key": "A String",

1734

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1735

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1736

"version": { # A structure describing which components and their versions of the service

1737

# are required in order to run the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1738

"a_key": "", # Properties of the object.

1739

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1740

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1741

# storage. The system will append the suffix "/temp-{JOBNAME} to

1742

# this resource prefix, where {JOBNAME} is the value of the

1743

# job_name field. The resulting bucket and object prefix is used

1744

# as the prefix of the resources used to store temporary data

1745

# needed during the job execution. NOTE: This will override the

1746

# value in taskrunner_settings.

1747

# The supported resource type is:

1748

#

1749

# Google Cloud Storage:

1750

#

1751

# storage.googleapis.com/{bucket}/{object}

1752

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1753

"internalExperiments": { # Experimental settings.

1754

"a_key": "", # Properties of the object. Contains field @type with type URL.

1755

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1756

"dataset": "A String", # The dataset for the current project where various workflow

1757

# related tables are stored.

1758

#

1759

# The supported resource type is:

1760

#

1761

# Google BigQuery:

1762

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1763

"experiments": [ # The list of experiments to enable.

1764

"A String",

1765

],

1766

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1767

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1768

# options are passed through the service and are used to recreate the

1769

# SDK pipeline options on the worker in a language agnostic and platform

1770

# independent way.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1771

"a_key": "", # Properties of the object.

1772

},

1773

"userAgent": { # A description of the process that generated the request.

1774

"a_key": "", # Properties of the object.

1775

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1776

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1777

# unspecified, the service will attempt to choose a reasonable

1778

# default. This should be in the form of the API service name,

1779

# e.g. "compute.googleapis.com".

1780

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1781

# specified in order for the job to have workers.

1782

{ # Describes one particular pool of Cloud Dataflow workers to be

1783

# instantiated by the Cloud Dataflow service in order to perform the

1784

# computations required by a job. Note that a workflow job may use

1785

# multiple pools, in order to match the various computational

1786

# requirements of the various stages of the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1787

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1788

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1789

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1790

# are supported.

1791

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1792

# service will attempt to choose a reasonable default.

1793

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1794

# the service will use the network "default".

1795

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1796

# will attempt to choose a reasonable default.

1797

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1798

# attempt to choose a reasonable default.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1799

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1800

"a_key": "A String",

1801

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1802

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1803

# Compute Engine API.

1804

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1805

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1806

# `TEARDOWN_NEVER`.

1807

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1808

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1809

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1810

# down.

1811

#

1812

# If the workers are not torn down by the service, they will

1813

# continue to run and use Google Compute Engine VM resources in the

1814

# user's project until they are explicitly terminated by the user.

1815

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1816

# policy except for small, manually supervised test jobs.

1817

#

1818

# If unknown or unspecified, the service will attempt to choose a reasonable

1819

# default.

1820

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1821

# service will choose a number of threads (according to the number of cores

1822

# on the selected machine type for batch, or 1 by convention for streaming).

1823

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1824

# the form "regions/REGION/subnetworks/SUBNETWORK".

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1825

"poolArgs": { # Extra arguments for this worker pool.

1826

"a_key": "", # Properties of the object. Contains field @type with type URL.

1827

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1828

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1829

# execute the job. If zero or unspecified, the service will

1830

# attempt to choose a reasonable default.

1831

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1832

# using the standard Dataflow task runner. Users should ignore

1833

# this field.

1834

"workflowFileName": "A String", # The file to store the workflow in.

1835

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1836

# will not be uploaded.

1837

#

1838

# The supported resource type is:

1839

#

1840

# Google Cloud Storage:

1841

# storage.googleapis.com/{bucket}/{object}

1842

# bucket.storage.googleapis.com/{object}

1843

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1844

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1845

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

1846

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1847

"vmId": "A String", # The ID string of the VM.

1848

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1849

# taskrunner; e.g. "wheel".

1850

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1851

# taskrunner; e.g. "root".

1852

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1853

# access the Cloud Dataflow API.

1854

"A String",

1855

],

1856

"languageHint": "A String", # The suggested backend language.

1857

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1858

# console.

1859

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1860

"logDir": "A String", # The directory on the VM to store logs.

1861

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1862

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1863

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1864

# "shuffle/v1beta1".

1865

"workerId": "A String", # The ID of the worker running this pipeline.

1866

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1867

#

1868

# When workers access Google Cloud APIs, they logically do so via

1869

# relative URLs. If this field is specified, it supplies the base

1870

# URL to use for resolving these relative URLs. The normative

1871

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1872

# Locators".

1873

#

1874

# If not specified, the default value is "http://www.googleapis.com/"

1875

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1876

# "dataflow/v1b3/projects".

1877

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1878

# storage.

1879

#

1880

# The supported resource type is:

1881

#

1882

# Google Cloud Storage:

1883

#

1884

# storage.googleapis.com/{bucket}/{object}

1885

# bucket.storage.googleapis.com/{object}

1886

},

1887

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

1888

"harnessCommand": "A String", # The command to launch the worker harness.

1889

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1890

# temporary storage.

1891

#

1892

# The supported resource type is:

1893

#

1894

# Google Cloud Storage:

1895

# storage.googleapis.com/{bucket}/{object}

1896

# bucket.storage.googleapis.com/{object}

1897

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1898

#

1899

# When workers access Google Cloud APIs, they logically do so via

1900

# relative URLs. If this field is specified, it supplies the base

1901

# URL to use for resolving these relative URLs. The normative

1902

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1903

# Locators".

1904

#

1905

# If not specified, the default value is "http://www.googleapis.com/"

1906

},

1907

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1908

# select a default set of packages which are useful to worker

1909

# harnesses written in a particular language.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1910

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1911

{ # The packages that must be installed in order for a worker to run the

1912

# steps of the Cloud Dataflow job that will be assigned to its worker

1913

# pool.

1914

#

1915

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1916

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1917

# might use this to install jars containing the user's code and all of the

1918

# various dependencies (libraries, data files, etc.) required in order

1919

# for that code to run.

1920

"location": "A String", # The resource to read the package from. The supported resource type is:

1921

#

1922

# Google Cloud Storage:

1923

#

1924

# storage.googleapis.com/{bucket}

1925

# bucket.storage.googleapis.com/

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1926

"name": "A String", # The name of the package.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1927

},

1928

],

1929

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1930

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1931

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1932

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1933

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1934

{ # Describes the data disk used by a workflow job.

1935

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1936

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1937

# attempt to choose a reasonable default.

1938

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1939

# must be a disk type appropriate to the project and zone in which

1940

# the workers will run. If unknown or unspecified, the service

1941

# will attempt to choose a reasonable default.

1942

#

1943

# For example, the standard persistent disk type is a resource name

1944

# typically ending in "pd-standard". If SSD persistent disks are

1945

# available, the resource name typically ends with "pd-ssd". The

1946

# actual valid values are defined the Google Compute Engine API,

1947

# not by the Cloud Dataflow API; consult the Google Compute Engine

1948

# documentation for more information about determining the set of

1949

# available disk types for a particular project and zone.

1950

#

1951

# Google Compute Engine Disk types are local to a particular

1952

# project in a particular zone, and so the resource name will

1953

# typically look something like this:

1954

#

1955

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1956

},

1957

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1958

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1959

# attempt to choose a reasonable default.

1960

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1961

# harness, residing in Google Container Registry.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

1962

},

1963

],

1964

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

1965

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1966

# A description of the user pipeline and stages through which it is executed.

1967

# Created by Cloud Dataflow service. Only retrieved with

1968

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1969

# form. This data is provided by the Dataflow service for ease of visualizing

1970

# the pipeline and interpretting Dataflow provided metrics.

1971

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1972

{ # Description of the type, names/ids, and input/outputs for a transform.

1973

"kind": "A String", # Type of transform.

1974

"name": "A String", # User provided name for this transform instance.

1975

"inputCollectionName": [ # User names for all collection inputs to this transform.

1976

"A String",

1977

],

1978

"displayData": [ # Transform-specific display data.

1979

{ # Data provided with a pipeline or transform to provide descriptive info.

1980

"key": "A String", # The key identifying the display data.

1981

# This is intended to be used as a label for the display data

1982

# when viewed in a dax monitoring system.

1983

"shortStrValue": "A String", # A possible additional shorter value to display.

1984

# For example a java_class_name_value of com.mypackage.MyDoFn

1985

# will be stored with MyDoFn as the short_str_value and

1986

# com.mypackage.MyDoFn as the java_class_name value.

1987

# short_str_value can be displayed and java_class_name_value

1988

# will be displayed as a tooltip.

1989

"timestampValue": "A String", # Contains value if the data is of timestamp type.

1990

"url": "A String", # An optional full URL.

1991

"floatValue": 3.14, # Contains value if the data is of float type.

1992

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1993

# language namespace (i.e. python module) which defines the display data.

1994

# This allows a dax monitoring system to specially handle the data

1995

# and perform custom rendering.

1996

"javaClassValue": "A String", # Contains value if the data is of java class type.

1997

"label": "A String", # An optional label to display in a dax UI for the element.

1998

"boolValue": True or False, # Contains value if the data is of a boolean type.

1999

"strValue": "A String", # Contains value if the data is of string type.

2000

"durationValue": "A String", # Contains value if the data is of duration type.

2001

"int64Value": "A String", # Contains value if the data is of int64 type.

2002

},

2003

],

2004

"outputCollectionName": [ # User names for all collection outputs to this transform.

2005

"A String",

2006

],

2007

"id": "A String", # SDK generated id of this transform instance.

2008

},

2009

],

2010

"displayData": [ # Pipeline level display data.

2011

{ # Data provided with a pipeline or transform to provide descriptive info.

2012

"key": "A String", # The key identifying the display data.

2013

# This is intended to be used as a label for the display data

2014

# when viewed in a dax monitoring system.

2015

"shortStrValue": "A String", # A possible additional shorter value to display.

2016

# For example a java_class_name_value of com.mypackage.MyDoFn

2017

# will be stored with MyDoFn as the short_str_value and

2018

# com.mypackage.MyDoFn as the java_class_name value.

2019

# short_str_value can be displayed and java_class_name_value

2020

# will be displayed as a tooltip.

2021

"timestampValue": "A String", # Contains value if the data is of timestamp type.

2022

"url": "A String", # An optional full URL.

2023

"floatValue": 3.14, # Contains value if the data is of float type.

2024

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2025

# language namespace (i.e. python module) which defines the display data.

2026

# This allows a dax monitoring system to specially handle the data

2027

# and perform custom rendering.

2028

"javaClassValue": "A String", # Contains value if the data is of java class type.

2029

"label": "A String", # An optional label to display in a dax UI for the element.

2030

"boolValue": True or False, # Contains value if the data is of a boolean type.

2031

"strValue": "A String", # Contains value if the data is of string type.

2032

"durationValue": "A String", # Contains value if the data is of duration type.

2033

"int64Value": "A String", # Contains value if the data is of int64 type.

2034

},

2035

],

2036

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2037

{ # Description of the composing transforms, names/ids, and input/outputs of a

2038

# stage of execution. Some composing transforms and sources may have been

2039

# generated by the Dataflow service during execution planning.

2040

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2041

{ # Description of an interstitial value between transforms in an execution

2042

# stage.

2043

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2044

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2045

# source is most closely associated.

2046

"name": "A String", # Dataflow service generated name for this source.

2047

},

2048

],

2049

"kind": "A String", # Type of tranform this stage is executing.

2050

"name": "A String", # Dataflow service generated name for this stage.

2051

"outputSource": [ # Output sources for this stage.

2052

{ # Description of an input or output of an execution stage.

2053

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2054

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2055

# source is most closely associated.

2056

"name": "A String", # Dataflow service generated name for this source.

2057

"sizeBytes": "A String", # Size of the source, if measurable.

2058

},

2059

],

2060

"inputSource": [ # Input sources for this stage.

2061

{ # Description of an input or output of an execution stage.

2062

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2063

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2064

# source is most closely associated.

2065

"name": "A String", # Dataflow service generated name for this source.

2066

"sizeBytes": "A String", # Size of the source, if measurable.

2067

},

2068

],

2069

"componentTransform": [ # Transforms that comprise this execution stage.

2070

{ # Description of a transform executed as part of an execution stage.

2071

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2072

"originalTransform": "A String", # User name for the original user transform with which this transform is

2073

# most closely associated.

2074

"name": "A String", # Dataflow service generated name for this source.

2075

},

2076

],

2077

"id": "A String", # Dataflow service generated id for this stage.

2078

},

2079

],

2080

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2081

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2082

{ # Defines a particular step within a Cloud Dataflow job.

2083

#

2084

# A job consists of multiple steps, each of which performs some

2085

# specific operation as part of the overall job. Data is typically

2086

# passed from one step to another as part of the job.

2087

#

2088

# Here's an example of a sequence of steps which together implement a

2089

# Map-Reduce job:

2090

#

2091

# * Read a collection of data from some source, parsing the

2092

# collection's elements.

2093

#

2094

# * Validate the elements.

2095

#

2096

# * Apply a user-defined function to map each element to some value

2097

# and extract an element-specific key value.

2098

#

2099

# * Group elements with the same key into a single element with

2100

# that key, transforming a multiply-keyed collection into a

2101

# uniquely-keyed collection.

2102

#

2103

# * Write the elements out to some data sink.

2104

#

2105

# Note that the Cloud Dataflow service may be used to run many different

2106

# types of jobs, not just Map-Reduce.

2107

"kind": "A String", # The kind of step in the Cloud Dataflow job.

2108

"properties": { # Named properties associated with the step. Each kind of

2109

# predefined step has its own required set of properties.

2110

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2111

"a_key": "", # Properties of the object.

2112

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2113

"name": "A String", # The name that identifies the step. This must be unique for each

2114

# step with respect to all other steps in the Cloud Dataflow job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2115

},

2116

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2117

"currentState": "A String", # The current state of the job.

2118

#

2119

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2120

# specified.

2121

#

2122

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2123

# terminal state. After a job has reached a terminal state, no

2124

# further state updates may be made.

2125

#

2126

# This field may be mutated by the Cloud Dataflow service;

2127

# callers cannot mutate it.

2128

"tempFiles": [ # A set of files the system should be aware of that are used

2129

# for temporary storage. These temporary files will be

2130

# removed on job completion.

2131

# No duplicates are allowed.

2132

# No file patterns are supported.

2133

#

2134

# The supported files are:

2135

#

2136

# Google Cloud Storage:

2137

#

2138

# storage.googleapis.com/{bucket}/{object}

2139

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2140

"A String",

2141

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2142

"type": "A String", # The type of Cloud Dataflow job.

2143

"id": "A String", # The unique ID of this job.

2144

#

2145

# This field is set by the Cloud Dataflow service when the Job is

2146

# created, and is immutable for the life of the job.

2147

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2148

# of the job it replaced.

2149

#

2150

# When sending a `CreateJobRequest`, you can update a job by specifying it

2151

# here. The job named here is stopped, and its intermediate state is

2152

# transferred to this job.

2153

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2154

# isn't contained in the submitted job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2155

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2156

"a_key": { # Contains information about how a particular

2157

# google.dataflow.v1beta3.Step will be executed.

2158

"stepName": [ # The steps associated with the execution stage.

2159

# Note that stages may have several steps, and that a given step

2160

# might be run by more than one stage.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

},

],

}</pre>

</div>

<code class="details" id="list_next">list_next(previous_request, previous_response)</code>

2173

<pre>Retrieves the next page of results.

2174

2175

Args:

2176

previous_request: The request for the previous page. (required)

2177

previous_response: The response from the request for the previous page. (required)

2178

2179

Returns:

2180

A request object that you can call 'execute()' on to request the next

2181

page. Returns None if there are no more items in the collection.

</pre>

</div>

<code class="details" id="update">update(projectId, location, jobId, body, x__xgafv=None)</code>

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2187

<pre>Updates the state of an existing Cloud Dataflow job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2188

2189

Args:

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2190

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

2191

location: string, The location that contains this job. (required)

2192

jobId: string, The job ID. (required)

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2193

body: object, The request body. (required)

2194

The object takes the form of:

2195

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2196

{ # Defines a job to be run by the Cloud Dataflow service.

2197

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2198

# If this field is set, the service will ensure its uniqueness.

2199

# The request to create a job will fail if the service has knowledge of a

2200

# previously submitted job with the same client's ID and job name.

2201

# The caller may use this field to ensure idempotence of job

2202

# creation across retried attempts to create a job.

2203

# By default, the field is empty and, in that case, the service ignores it.

2204

"requestedState": "A String", # The job's requested state.

2205

#

2206

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2207

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2208

# also be used to directly set a job's requested state to

2209

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2210

# job if it has not already reached a terminal state.

2211

"name": "A String", # The user-specified Cloud Dataflow job name.

2212

#

2213

# Only one Job with a given name may exist in a project at any

2214

# given time. If a caller attempts to create a Job with the same

2215

# name as an already-existing Job, the attempt returns the

2216

# existing Job.

2217

#

2218

# The name must match the regular expression

2219

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

2220

"currentStateTime": "A String", # The timestamp associated with the current state.

2221

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2222

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2223

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

2224

"labels": { # User-defined labels for this job.

2225

#

2226

# The labels map can contain no more than 64 entries. Entries of the labels

2227

# map are UTF8 strings that comply with the following restrictions:

2228

#

2229

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2230

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2231

# * Both keys and values are additionally constrained to be <= 128 bytes in

2232

# size.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2233

"a_key": "A String",

2234

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2235

"location": "A String", # The location that contains this job.

2236

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2237

# Cloud Dataflow service.

2238

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2239

# corresponding name prefixes of the new job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2240

"a_key": "A String",

2241

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2242

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2243

"version": { # A structure describing which components and their versions of the service

2244

# are required in order to run the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2245

"a_key": "", # Properties of the object.

2246

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2247

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2248

# storage. The system will append the suffix "/temp-{JOBNAME} to

2249

# this resource prefix, where {JOBNAME} is the value of the

2250

# job_name field. The resulting bucket and object prefix is used

2251

# as the prefix of the resources used to store temporary data

2252

# needed during the job execution. NOTE: This will override the

2253

# value in taskrunner_settings.

2254

# The supported resource type is:

2255

#

2256

# Google Cloud Storage:

2257

#

2258

# storage.googleapis.com/{bucket}/{object}

2259

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2260

"internalExperiments": { # Experimental settings.

2261

"a_key": "", # Properties of the object. Contains field @type with type URL.

2262

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2263

"dataset": "A String", # The dataset for the current project where various workflow

2264

# related tables are stored.

2265

#

2266

# The supported resource type is:

2267

#

2268

# Google BigQuery:

2269

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2270

"experiments": [ # The list of experiments to enable.

2271

"A String",

2272

],

2273

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2274

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2275

# options are passed through the service and are used to recreate the

2276

# SDK pipeline options on the worker in a language agnostic and platform

2277

# independent way.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2278

"a_key": "", # Properties of the object.

2279

},

2280

"userAgent": { # A description of the process that generated the request.

2281

"a_key": "", # Properties of the object.

2282

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2283

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2284

# unspecified, the service will attempt to choose a reasonable

2285

# default. This should be in the form of the API service name,

2286

# e.g. "compute.googleapis.com".

2287

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2288

# specified in order for the job to have workers.

2289

{ # Describes one particular pool of Cloud Dataflow workers to be

2290

# instantiated by the Cloud Dataflow service in order to perform the

2291

# computations required by a job. Note that a workflow job may use

2292

# multiple pools, in order to match the various computational

2293

# requirements of the various stages of the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2294

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2295

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2296

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2297

# are supported.

2298

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2299

# service will attempt to choose a reasonable default.

2300

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2301

# the service will use the network "default".

2302

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2303

# will attempt to choose a reasonable default.

2304

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2305

# attempt to choose a reasonable default.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2306

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2307

"a_key": "A String",

2308

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2309

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2310

# Compute Engine API.

2311

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2312

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2313

# `TEARDOWN_NEVER`.

2314

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2315

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2316

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2317

# down.

2318

#

2319

# If the workers are not torn down by the service, they will

2320

# continue to run and use Google Compute Engine VM resources in the

2321

# user's project until they are explicitly terminated by the user.

2322

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2323

# policy except for small, manually supervised test jobs.

2324

#

2325

# If unknown or unspecified, the service will attempt to choose a reasonable

2326

# default.

2327

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2328

# service will choose a number of threads (according to the number of cores

2329

# on the selected machine type for batch, or 1 by convention for streaming).

2330

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2331

# the form "regions/REGION/subnetworks/SUBNETWORK".

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2332

"poolArgs": { # Extra arguments for this worker pool.

2333

"a_key": "", # Properties of the object. Contains field @type with type URL.

2334

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2335

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2336

# execute the job. If zero or unspecified, the service will

2337

# attempt to choose a reasonable default.

2338

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2339

# using the standard Dataflow task runner. Users should ignore

2340

# this field.

2341

"workflowFileName": "A String", # The file to store the workflow in.

2342

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2343

# will not be uploaded.

2344

#

2345

# The supported resource type is:

2346

#

2347

# Google Cloud Storage:

2348

# storage.googleapis.com/{bucket}/{object}

2349

# bucket.storage.googleapis.com/{object}

2350

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

2351

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

2352

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

2353

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2354

"vmId": "A String", # The ID string of the VM.

2355

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2356

# taskrunner; e.g. "wheel".

2357

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2358

# taskrunner; e.g. "root".

2359

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2360

# access the Cloud Dataflow API.

2361

"A String",

2362

],

2363

"languageHint": "A String", # The suggested backend language.

2364

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2365

# console.

2366

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2367

"logDir": "A String", # The directory on the VM to store logs.

2368

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2369

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2370

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2371

# "shuffle/v1beta1".

2372

"workerId": "A String", # The ID of the worker running this pipeline.

2373

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2374

#

2375

# When workers access Google Cloud APIs, they logically do so via

2376

# relative URLs. If this field is specified, it supplies the base

2377

# URL to use for resolving these relative URLs. The normative

2378

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2379

# Locators".

2380

#

2381

# If not specified, the default value is "http://www.googleapis.com/"

2382

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2383

# "dataflow/v1b3/projects".

2384

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2385

# storage.

2386

#

2387

# The supported resource type is:

2388

#

2389

# Google Cloud Storage:

2390

#

2391

# storage.googleapis.com/{bucket}/{object}

2392

# bucket.storage.googleapis.com/{object}

2393

},

2394

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

2395

"harnessCommand": "A String", # The command to launch the worker harness.

2396

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2397

# temporary storage.

2398

#

2399

# The supported resource type is:

2400

#

2401

# Google Cloud Storage:

2402

# storage.googleapis.com/{bucket}/{object}

2403

# bucket.storage.googleapis.com/{object}

2404

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2405

#

2406

# When workers access Google Cloud APIs, they logically do so via

2407

# relative URLs. If this field is specified, it supplies the base

2408

# URL to use for resolving these relative URLs. The normative

2409

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2410

# Locators".

2411

#

2412

# If not specified, the default value is "http://www.googleapis.com/"

2413

},

2414

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

2415

# select a default set of packages which are useful to worker

2416

# harnesses written in a particular language.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2417

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2418

{ # The packages that must be installed in order for a worker to run the

2419

# steps of the Cloud Dataflow job that will be assigned to its worker

2420

# pool.

2421

#

2422

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2423

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2424

# might use this to install jars containing the user's code and all of the

2425

# various dependencies (libraries, data files, etc.) required in order

2426

# for that code to run.

2427

"location": "A String", # The resource to read the package from. The supported resource type is:

2428

#

2429

# Google Cloud Storage:

2430

#

2431

# storage.googleapis.com/{bucket}

2432

# bucket.storage.googleapis.com/

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2433

"name": "A String", # The name of the package.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2434

},

2435

],

2436

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2437

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2438

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2439

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2440

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2441

{ # Describes the data disk used by a workflow job.

2442

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2443

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2444

# attempt to choose a reasonable default.

2445

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2446

# must be a disk type appropriate to the project and zone in which

2447

# the workers will run. If unknown or unspecified, the service

2448

# will attempt to choose a reasonable default.

2449

#

2450

# For example, the standard persistent disk type is a resource name

2451

# typically ending in "pd-standard". If SSD persistent disks are

2452

# available, the resource name typically ends with "pd-ssd". The

2453

# actual valid values are defined the Google Compute Engine API,

2454

# not by the Cloud Dataflow API; consult the Google Compute Engine

2455

# documentation for more information about determining the set of

2456

# available disk types for a particular project and zone.

2457

#

2458

# Google Compute Engine Disk types are local to a particular

2459

# project in a particular zone, and so the resource name will

2460

# typically look something like this:

2461

#

2462

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2463

},

2464

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2465

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2466

# attempt to choose a reasonable default.

2467

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2468

# harness, residing in Google Container Registry.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2469

},

2470

],

2471

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2472

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2473

# A description of the user pipeline and stages through which it is executed.

2474

# Created by Cloud Dataflow service. Only retrieved with

2475

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2476

# form. This data is provided by the Dataflow service for ease of visualizing

2477

# the pipeline and interpretting Dataflow provided metrics.

2478

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2479

{ # Description of the type, names/ids, and input/outputs for a transform.

2480

"kind": "A String", # Type of transform.

2481

"name": "A String", # User provided name for this transform instance.

2482

"inputCollectionName": [ # User names for all collection inputs to this transform.

2483

"A String",

2484

],

2485

"displayData": [ # Transform-specific display data.

2486

{ # Data provided with a pipeline or transform to provide descriptive info.

2487

"key": "A String", # The key identifying the display data.

2488

# This is intended to be used as a label for the display data

2489

# when viewed in a dax monitoring system.

2490

"shortStrValue": "A String", # A possible additional shorter value to display.

2491

# For example a java_class_name_value of com.mypackage.MyDoFn

2492

# will be stored with MyDoFn as the short_str_value and

2493

# com.mypackage.MyDoFn as the java_class_name value.

2494

# short_str_value can be displayed and java_class_name_value

2495

# will be displayed as a tooltip.

2496

"timestampValue": "A String", # Contains value if the data is of timestamp type.

2497

"url": "A String", # An optional full URL.

2498

"floatValue": 3.14, # Contains value if the data is of float type.

2499

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2500

# language namespace (i.e. python module) which defines the display data.

2501

# This allows a dax monitoring system to specially handle the data

2502

# and perform custom rendering.

2503

"javaClassValue": "A String", # Contains value if the data is of java class type.

2504

"label": "A String", # An optional label to display in a dax UI for the element.

2505

"boolValue": True or False, # Contains value if the data is of a boolean type.

2506

"strValue": "A String", # Contains value if the data is of string type.

2507

"durationValue": "A String", # Contains value if the data is of duration type.

2508

"int64Value": "A String", # Contains value if the data is of int64 type.

2509

},

2510

],

2511

"outputCollectionName": [ # User names for all collection outputs to this transform.

2512

"A String",

2513

],

2514

"id": "A String", # SDK generated id of this transform instance.

2515

},

2516

],

2517

"displayData": [ # Pipeline level display data.

2518

{ # Data provided with a pipeline or transform to provide descriptive info.

2519

"key": "A String", # The key identifying the display data.

2520

# This is intended to be used as a label for the display data

2521

# when viewed in a dax monitoring system.

2522

"shortStrValue": "A String", # A possible additional shorter value to display.

2523

# For example a java_class_name_value of com.mypackage.MyDoFn

2524

# will be stored with MyDoFn as the short_str_value and

2525

# com.mypackage.MyDoFn as the java_class_name value.

2526

# short_str_value can be displayed and java_class_name_value

2527

# will be displayed as a tooltip.

2528

"timestampValue": "A String", # Contains value if the data is of timestamp type.

2529

"url": "A String", # An optional full URL.

2530

"floatValue": 3.14, # Contains value if the data is of float type.

2531

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2532

# language namespace (i.e. python module) which defines the display data.

2533

# This allows a dax monitoring system to specially handle the data

2534

# and perform custom rendering.

2535

"javaClassValue": "A String", # Contains value if the data is of java class type.

2536

"label": "A String", # An optional label to display in a dax UI for the element.

2537

"boolValue": True or False, # Contains value if the data is of a boolean type.

2538

"strValue": "A String", # Contains value if the data is of string type.

2539

"durationValue": "A String", # Contains value if the data is of duration type.

2540

"int64Value": "A String", # Contains value if the data is of int64 type.

2541

},

2542

],

2543

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2544

{ # Description of the composing transforms, names/ids, and input/outputs of a

2545

# stage of execution. Some composing transforms and sources may have been

2546

# generated by the Dataflow service during execution planning.

2547

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2548

{ # Description of an interstitial value between transforms in an execution

2549

# stage.

2550

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2551

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2552

# source is most closely associated.

2553

"name": "A String", # Dataflow service generated name for this source.

2554

},

2555

],

2556

"kind": "A String", # Type of tranform this stage is executing.

2557

"name": "A String", # Dataflow service generated name for this stage.

2558

"outputSource": [ # Output sources for this stage.

2559

{ # Description of an input or output of an execution stage.

2560

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2561

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2562

# source is most closely associated.

2563

"name": "A String", # Dataflow service generated name for this source.

2564

"sizeBytes": "A String", # Size of the source, if measurable.

2565

},

2566

],

2567

"inputSource": [ # Input sources for this stage.

2568

{ # Description of an input or output of an execution stage.

2569

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2570

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2571

# source is most closely associated.

2572

"name": "A String", # Dataflow service generated name for this source.

2573

"sizeBytes": "A String", # Size of the source, if measurable.

2574

},

2575

],

2576

"componentTransform": [ # Transforms that comprise this execution stage.

2577

{ # Description of a transform executed as part of an execution stage.

2578

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2579

"originalTransform": "A String", # User name for the original user transform with which this transform is

2580

# most closely associated.

2581

"name": "A String", # Dataflow service generated name for this source.

2582

},

2583

],

2584

"id": "A String", # Dataflow service generated id for this stage.

2585

},

2586

],

2587

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2588

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2589

{ # Defines a particular step within a Cloud Dataflow job.

2590

#

2591

# A job consists of multiple steps, each of which performs some

2592

# specific operation as part of the overall job. Data is typically

2593

# passed from one step to another as part of the job.

2594

#

2595

# Here's an example of a sequence of steps which together implement a

2596

# Map-Reduce job:

2597

#

2598

# * Read a collection of data from some source, parsing the

2599

# collection's elements.

2600

#

2601

# * Validate the elements.

2602

#

2603

# * Apply a user-defined function to map each element to some value

2604

# and extract an element-specific key value.

2605

#

2606

# * Group elements with the same key into a single element with

2607

# that key, transforming a multiply-keyed collection into a

2608

# uniquely-keyed collection.

2609

#

2610

# * Write the elements out to some data sink.

2611

#

2612

# Note that the Cloud Dataflow service may be used to run many different

2613

# types of jobs, not just Map-Reduce.

2614

"kind": "A String", # The kind of step in the Cloud Dataflow job.

2615

"properties": { # Named properties associated with the step. Each kind of

2616

# predefined step has its own required set of properties.

2617

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2618

"a_key": "", # Properties of the object.

2619

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2620

"name": "A String", # The name that identifies the step. This must be unique for each

2621

# step with respect to all other steps in the Cloud Dataflow job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2622

},

2623

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2624

"currentState": "A String", # The current state of the job.

2625

#

2626

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2627

# specified.

2628

#

2629

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2630

# terminal state. After a job has reached a terminal state, no

2631

# further state updates may be made.

2632

#

2633

# This field may be mutated by the Cloud Dataflow service;

2634

# callers cannot mutate it.

2635

"tempFiles": [ # A set of files the system should be aware of that are used

2636

# for temporary storage. These temporary files will be

2637

# removed on job completion.

2638

# No duplicates are allowed.

2639

# No file patterns are supported.

2640

#

2641

# The supported files are:

2642

#

2643

# Google Cloud Storage:

2644

#

2645

# storage.googleapis.com/{bucket}/{object}

2646

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2647

"A String",

2648

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2649

"type": "A String", # The type of Cloud Dataflow job.

2650

"id": "A String", # The unique ID of this job.

2651

#

2652

# This field is set by the Cloud Dataflow service when the Job is

2653

# created, and is immutable for the life of the job.

2654

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2655

# of the job it replaced.

2656

#

2657

# When sending a `CreateJobRequest`, you can update a job by specifying it

2658

# here. The job named here is stopped, and its intermediate state is

2659

# transferred to this job.

2660

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2661

# isn't contained in the submitted job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2662

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2663

"a_key": { # Contains information about how a particular

2664

# google.dataflow.v1beta3.Step will be executed.

2665

"stepName": [ # The steps associated with the execution stage.

2666

# Note that stages may have several steps, and that a given step

2667

# might be run by more than one stage.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

}

x__xgafv: string, V1 error format.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2676

Allowed values

2677

1 - v1 error format

2678

2 - v2 error format

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2679

2680

Returns:

2681

An object of the form:

2682

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2683

{ # Defines a job to be run by the Cloud Dataflow service.

2684

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2685

# If this field is set, the service will ensure its uniqueness.

2686

# The request to create a job will fail if the service has knowledge of a

2687

# previously submitted job with the same client's ID and job name.

2688

# The caller may use this field to ensure idempotence of job

2689

# creation across retried attempts to create a job.

2690

# By default, the field is empty and, in that case, the service ignores it.

2691

"requestedState": "A String", # The job's requested state.

2692

#

2693

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2694

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2695

# also be used to directly set a job's requested state to

2696

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2697

# job if it has not already reached a terminal state.

2698

"name": "A String", # The user-specified Cloud Dataflow job name.

2699

#

2700

# Only one Job with a given name may exist in a project at any

2701

# given time. If a caller attempts to create a Job with the same

2702

# name as an already-existing Job, the attempt returns the

2703

# existing Job.

2704

#

2705

# The name must match the regular expression

2706

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

2707

"currentStateTime": "A String", # The timestamp associated with the current state.

2708

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2709

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2710

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

2711

"labels": { # User-defined labels for this job.

2712

#

2713

# The labels map can contain no more than 64 entries. Entries of the labels

2714

# map are UTF8 strings that comply with the following restrictions:

2715

#

2716

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2717

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2718

# * Both keys and values are additionally constrained to be <= 128 bytes in

2719

# size.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2720

"a_key": "A String",

2721

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2722

"location": "A String", # The location that contains this job.

2723

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2724

# Cloud Dataflow service.

2725

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2726

# corresponding name prefixes of the new job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2727

"a_key": "A String",

2728

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2729

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2730

"version": { # A structure describing which components and their versions of the service

2731

# are required in order to run the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2732

"a_key": "", # Properties of the object.

2733

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2734

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2735

# storage. The system will append the suffix "/temp-{JOBNAME} to

2736

# this resource prefix, where {JOBNAME} is the value of the

2737

# job_name field. The resulting bucket and object prefix is used

2738

# as the prefix of the resources used to store temporary data

2739

# needed during the job execution. NOTE: This will override the

2740

# value in taskrunner_settings.

2741

# The supported resource type is:

2742

#

2743

# Google Cloud Storage:

2744

#

2745

# storage.googleapis.com/{bucket}/{object}

2746

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2747

"internalExperiments": { # Experimental settings.

2748

"a_key": "", # Properties of the object. Contains field @type with type URL.

2749

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2750

"dataset": "A String", # The dataset for the current project where various workflow

2751

# related tables are stored.

2752

#

2753

# The supported resource type is:

2754

#

2755

# Google BigQuery:

2756

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2757

"experiments": [ # The list of experiments to enable.

2758

"A String",

2759

],

2760

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2761

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2762

# options are passed through the service and are used to recreate the

2763

# SDK pipeline options on the worker in a language agnostic and platform

2764

# independent way.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2765

"a_key": "", # Properties of the object.

2766

},

2767

"userAgent": { # A description of the process that generated the request.

2768

"a_key": "", # Properties of the object.

2769

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2770

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2771

# unspecified, the service will attempt to choose a reasonable

2772

# default. This should be in the form of the API service name,

2773

# e.g. "compute.googleapis.com".

2774

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2775

# specified in order for the job to have workers.

2776

{ # Describes one particular pool of Cloud Dataflow workers to be

2777

# instantiated by the Cloud Dataflow service in order to perform the

2778

# computations required by a job. Note that a workflow job may use

2779

# multiple pools, in order to match the various computational

2780

# requirements of the various stages of the job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2781

"diskSourceImage": "A String", # Fully qualified source image for disks.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2782

"ipConfiguration": "A String", # Configuration for VM IPs.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2783

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2784

# are supported.

2785

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2786

# service will attempt to choose a reasonable default.

2787

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2788

# the service will use the network "default".

2789

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2790

# will attempt to choose a reasonable default.

2791

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2792

# attempt to choose a reasonable default.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2793

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2794

"a_key": "A String",

2795

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2796

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2797

# Compute Engine API.

2798

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2799

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2800

# `TEARDOWN_NEVER`.

2801

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2802

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2803

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2804

# down.

2805

#

2806

# If the workers are not torn down by the service, they will

2807

# continue to run and use Google Compute Engine VM resources in the

2808

# user's project until they are explicitly terminated by the user.

2809

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2810

# policy except for small, manually supervised test jobs.

2811

#

2812

# If unknown or unspecified, the service will attempt to choose a reasonable

2813

# default.

2814

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2815

# service will choose a number of threads (according to the number of cores

2816

# on the selected machine type for batch, or 1 by convention for streaming).

2817

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2818

# the form "regions/REGION/subnetworks/SUBNETWORK".

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2819

"poolArgs": { # Extra arguments for this worker pool.

2820

"a_key": "", # Properties of the object. Contains field @type with type URL.

2821

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2822

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2823

# execute the job. If zero or unspecified, the service will

2824

# attempt to choose a reasonable default.

2825

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2826

# using the standard Dataflow task runner. Users should ignore

2827

# this field.

2828

"workflowFileName": "A String", # The file to store the workflow in.

2829

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2830

# will not be uploaded.

2831

#

2832

# The supported resource type is:

2833

#

2834

# Google Cloud Storage:

2835

# storage.googleapis.com/{bucket}/{object}

2836

# bucket.storage.googleapis.com/{object}

2837

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

2838

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

2839

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

2840

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2841

"vmId": "A String", # The ID string of the VM.

2842

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2843

# taskrunner; e.g. "wheel".

2844

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2845

# taskrunner; e.g. "root".

2846

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2847

# access the Cloud Dataflow API.

2848

"A String",

2849

],

2850

"languageHint": "A String", # The suggested backend language.

2851

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2852

# console.

2853

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2854

"logDir": "A String", # The directory on the VM to store logs.

2855

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2856

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2857

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2858

# "shuffle/v1beta1".

2859

"workerId": "A String", # The ID of the worker running this pipeline.

2860

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2861

#

2862

# When workers access Google Cloud APIs, they logically do so via

2863

# relative URLs. If this field is specified, it supplies the base

2864

# URL to use for resolving these relative URLs. The normative

2865

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2866

# Locators".

2867

#

2868

# If not specified, the default value is "http://www.googleapis.com/"

2869

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2870

# "dataflow/v1b3/projects".

2871

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2872

# storage.

2873

#

2874

# The supported resource type is:

2875

#

2876

# Google Cloud Storage:

2877

#

2878

# storage.googleapis.com/{bucket}/{object}

2879

# bucket.storage.googleapis.com/{object}

2880

},

2881

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

2882

"harnessCommand": "A String", # The command to launch the worker harness.

2883

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2884

# temporary storage.

2885

#

2886

# The supported resource type is:

2887

#

2888

# Google Cloud Storage:

2889

# storage.googleapis.com/{bucket}/{object}

2890

# bucket.storage.googleapis.com/{object}

2891

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2892

#

2893

# When workers access Google Cloud APIs, they logically do so via

2894

# relative URLs. If this field is specified, it supplies the base

2895

# URL to use for resolving these relative URLs. The normative

2896

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2897

# Locators".

2898

#

2899

# If not specified, the default value is "http://www.googleapis.com/"

2900

},

2901

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

2902

# select a default set of packages which are useful to worker

2903

# harnesses written in a particular language.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2904

"packages": [ # Packages to be installed on workers.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2905

{ # The packages that must be installed in order for a worker to run the

2906

# steps of the Cloud Dataflow job that will be assigned to its worker

2907

# pool.

2908

#

2909

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2910

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2911

# might use this to install jars containing the user's code and all of the

2912

# various dependencies (libraries, data files, etc.) required in order

2913

# for that code to run.

2914

"location": "A String", # The resource to read the package from. The supported resource type is:

2915

#

2916

# Google Cloud Storage:

2917

#

2918

# storage.googleapis.com/{bucket}

2919

# bucket.storage.googleapis.com/

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2920

"name": "A String", # The name of the package.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2921

},

2922

],

2923

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2924

"algorithm": "A String", # The algorithm to use for autoscaling.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2925

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2926

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2927

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2928

{ # Describes the data disk used by a workflow job.

2929

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2930

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2931

# attempt to choose a reasonable default.

2932

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2933

# must be a disk type appropriate to the project and zone in which

2934

# the workers will run. If unknown or unspecified, the service

2935

# will attempt to choose a reasonable default.

2936

#

2937

# For example, the standard persistent disk type is a resource name

2938

# typically ending in "pd-standard". If SSD persistent disks are

2939

# available, the resource name typically ends with "pd-ssd". The

2940

# actual valid values are defined the Google Compute Engine API,

2941

# not by the Cloud Dataflow API; consult the Google Compute Engine

2942

# documentation for more information about determining the set of

2943

# available disk types for a particular project and zone.

2944

#

2945

# Google Compute Engine Disk types are local to a particular

2946

# project in a particular zone, and so the resource name will

2947

# typically look something like this:

2948

#

2949

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2950

},

2951

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2952

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2953

# attempt to choose a reasonable default.

2954

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2955

# harness, residing in Google Container Registry.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

2956

},

2957

],

2958

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

2959

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2960

# A description of the user pipeline and stages through which it is executed.

2961

# Created by Cloud Dataflow service. Only retrieved with

2962

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2963

# form. This data is provided by the Dataflow service for ease of visualizing

2964

# the pipeline and interpretting Dataflow provided metrics.

2965

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2966

{ # Description of the type, names/ids, and input/outputs for a transform.

2967

"kind": "A String", # Type of transform.

2968

"name": "A String", # User provided name for this transform instance.

2969

"inputCollectionName": [ # User names for all collection inputs to this transform.

2970

"A String",

2971

],

2972

"displayData": [ # Transform-specific display data.

2973

{ # Data provided with a pipeline or transform to provide descriptive info.

2974

"key": "A String", # The key identifying the display data.

2975

# This is intended to be used as a label for the display data

2976

# when viewed in a dax monitoring system.

2977

"shortStrValue": "A String", # A possible additional shorter value to display.

2978

# For example a java_class_name_value of com.mypackage.MyDoFn

2979

# will be stored with MyDoFn as the short_str_value and

2980

# com.mypackage.MyDoFn as the java_class_name value.

2981

# short_str_value can be displayed and java_class_name_value

2982

# will be displayed as a tooltip.

2983

"timestampValue": "A String", # Contains value if the data is of timestamp type.

2984

"url": "A String", # An optional full URL.

2985

"floatValue": 3.14, # Contains value if the data is of float type.

2986

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2987

# language namespace (i.e. python module) which defines the display data.

2988

# This allows a dax monitoring system to specially handle the data

2989

# and perform custom rendering.

2990

"javaClassValue": "A String", # Contains value if the data is of java class type.

2991

"label": "A String", # An optional label to display in a dax UI for the element.

2992

"boolValue": True or False, # Contains value if the data is of a boolean type.

2993

"strValue": "A String", # Contains value if the data is of string type.

2994

"durationValue": "A String", # Contains value if the data is of duration type.

2995

"int64Value": "A String", # Contains value if the data is of int64 type.

2996

},

2997

],

2998

"outputCollectionName": [ # User names for all collection outputs to this transform.

2999

"A String",

3000

],

3001

"id": "A String", # SDK generated id of this transform instance.

3002

},

3003

],

3004

"displayData": [ # Pipeline level display data.

3005

{ # Data provided with a pipeline or transform to provide descriptive info.

3006

"key": "A String", # The key identifying the display data.

3007

# This is intended to be used as a label for the display data

3008

# when viewed in a dax monitoring system.

3009

"shortStrValue": "A String", # A possible additional shorter value to display.

3010

# For example a java_class_name_value of com.mypackage.MyDoFn

3011

# will be stored with MyDoFn as the short_str_value and

3012

# com.mypackage.MyDoFn as the java_class_name value.

3013

# short_str_value can be displayed and java_class_name_value

3014

# will be displayed as a tooltip.

3015

"timestampValue": "A String", # Contains value if the data is of timestamp type.

3016

"url": "A String", # An optional full URL.

3017

"floatValue": 3.14, # Contains value if the data is of float type.

3018

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

3019

# language namespace (i.e. python module) which defines the display data.

3020

# This allows a dax monitoring system to specially handle the data

3021

# and perform custom rendering.

3022

"javaClassValue": "A String", # Contains value if the data is of java class type.

3023

"label": "A String", # An optional label to display in a dax UI for the element.

3024

"boolValue": True or False, # Contains value if the data is of a boolean type.

3025

"strValue": "A String", # Contains value if the data is of string type.

3026

"durationValue": "A String", # Contains value if the data is of duration type.

3027

"int64Value": "A String", # Contains value if the data is of int64 type.

3028

},

3029

],

3030

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

3031

{ # Description of the composing transforms, names/ids, and input/outputs of a

3032

# stage of execution. Some composing transforms and sources may have been

3033

# generated by the Dataflow service during execution planning.

3034

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

3035

{ # Description of an interstitial value between transforms in an execution

3036

# stage.

3037

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3038

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3039

# source is most closely associated.

3040

"name": "A String", # Dataflow service generated name for this source.

3041

},

3042

],

3043

"kind": "A String", # Type of tranform this stage is executing.

3044

"name": "A String", # Dataflow service generated name for this stage.

3045

"outputSource": [ # Output sources for this stage.

3046

{ # Description of an input or output of an execution stage.

3047

"userName": "A String", # Human-readable name for this source; may be user or system generated.

3048

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3049

# source is most closely associated.

3050

"name": "A String", # Dataflow service generated name for this source.

3051

"sizeBytes": "A String", # Size of the source, if measurable.

3052

},

3053

],

3054

"inputSource": [ # Input sources for this stage.

3055

{ # Description of an input or output of an execution stage.

3056

"userName": "A String", # Human-readable name for this source; may be user or system generated.

3057

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3058

# source is most closely associated.

3059

"name": "A String", # Dataflow service generated name for this source.

3060

"sizeBytes": "A String", # Size of the source, if measurable.

3061

},

3062

],

3063

"componentTransform": [ # Transforms that comprise this execution stage.

3064

{ # Description of a transform executed as part of an execution stage.

3065

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3066

"originalTransform": "A String", # User name for the original user transform with which this transform is

3067

# most closely associated.

3068

"name": "A String", # Dataflow service generated name for this source.

3069

},

3070

],

3071

"id": "A String", # Dataflow service generated id for this stage.

3072

},

3073

],

3074

},

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

3075

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

3076

{ # Defines a particular step within a Cloud Dataflow job.

3077

#

3078

# A job consists of multiple steps, each of which performs some

3079

# specific operation as part of the overall job. Data is typically

3080

# passed from one step to another as part of the job.

3081

#

3082

# Here's an example of a sequence of steps which together implement a

3083

# Map-Reduce job:

3084

#

3085

# * Read a collection of data from some source, parsing the

3086

# collection's elements.

3087

#

3088

# * Validate the elements.

3089

#

3090

# * Apply a user-defined function to map each element to some value

3091

# and extract an element-specific key value.

3092

#

3093

# * Group elements with the same key into a single element with

3094

# that key, transforming a multiply-keyed collection into a

3095

# uniquely-keyed collection.

3096

#

3097

# * Write the elements out to some data sink.

3098

#

3099

# Note that the Cloud Dataflow service may be used to run many different

3100

# types of jobs, not just Map-Reduce.

3101

"kind": "A String", # The kind of step in the Cloud Dataflow job.

3102

"properties": { # Named properties associated with the step. Each kind of

3103

# predefined step has its own required set of properties.

3104

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

3105

"a_key": "", # Properties of the object.

3106

},

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

3107

"name": "A String", # The name that identifies the step. This must be unique for each

3108

# step with respect to all other steps in the Cloud Dataflow job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

3109

},

3110

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

3111

"currentState": "A String", # The current state of the job.

3112

#

3113

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

3114

# specified.

3115

#

3116

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

3117

# terminal state. After a job has reached a terminal state, no

3118

# further state updates may be made.

3119

#

3120

# This field may be mutated by the Cloud Dataflow service;

3121

# callers cannot mutate it.

3122

"tempFiles": [ # A set of files the system should be aware of that are used

3123

# for temporary storage. These temporary files will be

3124

# removed on job completion.

3125

# No duplicates are allowed.

3126

# No file patterns are supported.

3127

#

3128

# The supported files are:

3129

#

3130

# Google Cloud Storage:

3131

#

3132

# storage.googleapis.com/{bucket}/{object}

3133

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

3134

"A String",

3135

],

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

3136

"type": "A String", # The type of Cloud Dataflow job.

3137

"id": "A String", # The unique ID of this job.

3138

#

3139

# This field is set by the Cloud Dataflow service when the Job is

3140

# created, and is immutable for the life of the job.

3141

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

3142

# of the job it replaced.

3143

#

3144

# When sending a `CreateJobRequest`, you can update a job by specifying it

3145

# here. The job named here is stopped, and its intermediate state is

3146

# transferred to this job.

3147

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

3148

# isn't contained in the submitted job.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

3149

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

c30d2b5

2017-03-13 12:12:03 -0400

[diff] [blame^]

3150

"a_key": { # Contains information about how a particular

3151

# google.dataflow.v1beta3.Step will be executed.

3152

"stepName": [ # The steps associated with the execution stage.

3153

# Note that stages may have several steps, and that a given step

3154

# might be run by more than one stage.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

}</pre>

</div>

</body></html>