Blame - docs/dyn/dataflow_v1b3.projects.locations.jobs.html - platform/external/python/google-api-python-client

2017-03-13 12:12:03 -0400

[diff] [blame]

89

<p class="firstline">Creates a Cloud Dataflow job.</p>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

90

91

<code><a href="#get">get(projectId, location, jobId, x__xgafv=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

92

<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

93

94

<code><a href="#getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</a></code></p>

95

<p class="firstline">Request the job status.</p>

96

97

<code><a href="#list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

98

<p class="firstline">List the jobs of a project.</p>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

99

100

<code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>

101

<p class="firstline">Retrieves the next page of results.</p>

102

103

<code><a href="#update">update(projectId, location, jobId, body, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

104

<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

105

<h3>Method Details</h3>

106

107

<code class="details" id="create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

108

<pre>Creates a Cloud Dataflow job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

109

110

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

111

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

112

location: string, The location that contains this job. (required)

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

113

body: object, The request body. (required)

114

The object takes the form of:

115

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

116

{ # Defines a job to be run by the Cloud Dataflow service.

117

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

118

# If this field is set, the service will ensure its uniqueness.

119

# The request to create a job will fail if the service has knowledge of a

120

# previously submitted job with the same client's ID and job name.

121

# The caller may use this field to ensure idempotence of job

122

# creation across retried attempts to create a job.

123

# By default, the field is empty and, in that case, the service ignores it.

124

"requestedState": "A String", # The job's requested state.

125

#

126

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

127

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

128

# also be used to directly set a job's requested state to

129

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

130

# job if it has not already reached a terminal state.

131

"name": "A String", # The user-specified Cloud Dataflow job name.

132

#

133

# Only one Job with a given name may exist in a project at any

134

# given time. If a caller attempts to create a Job with the same

135

# name as an already-existing Job, the attempt returns the

136

# existing Job.

137

#

138

# The name must match the regular expression

139

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

140

"currentStateTime": "A String", # The timestamp associated with the current state.

141

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

142

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

143

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

144

"labels": { # User-defined labels for this job.

145

#

146

# The labels map can contain no more than 64 entries. Entries of the labels

147

# map are UTF8 strings that comply with the following restrictions:

148

#

149

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

150

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

151

# * Both keys and values are additionally constrained to be <= 128 bytes in

152

# size.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

153

"a_key": "A String",

154

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

155

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

156

# corresponding name prefixes of the new job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

157

"a_key": "A String",

158

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

159

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

160

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

161

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

162

"version": { # A structure describing which components and their versions of the service

163

# are required in order to run the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

164

"a_key": "", # Properties of the object.

165

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

166

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

167

# storage. The system will append the suffix "/temp-{JOBNAME} to

168

# this resource prefix, where {JOBNAME} is the value of the

169

# job_name field. The resulting bucket and object prefix is used

170

# as the prefix of the resources used to store temporary data

171

# needed during the job execution. NOTE: This will override the

172

# value in taskrunner_settings.

173

# The supported resource type is:

174

#

175

# Google Cloud Storage:

176

#

177

# storage.googleapis.com/{bucket}/{object}

178

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

179

"internalExperiments": { # Experimental settings.

180

"a_key": "", # Properties of the object. Contains field @type with type URL.

181

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

182

"dataset": "A String", # The dataset for the current project where various workflow

183

# related tables are stored.

184

#

185

# The supported resource type is:

186

#

187

# Google BigQuery:

188

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

189

"experiments": [ # The list of experiments to enable.

190

"A String",

191

],

192

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

193

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

194

# options are passed through the service and are used to recreate the

195

# SDK pipeline options on the worker in a language agnostic and platform

196

# independent way.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

197

"a_key": "", # Properties of the object.

198

},

199

"userAgent": { # A description of the process that generated the request.

200

"a_key": "", # Properties of the object.

201

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

202

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

203

# unspecified, the service will attempt to choose a reasonable

204

# default. This should be in the form of the API service name,

205

# e.g. "compute.googleapis.com".

206

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

207

# specified in order for the job to have workers.

208

{ # Describes one particular pool of Cloud Dataflow workers to be

209

# instantiated by the Cloud Dataflow service in order to perform the

210

# computations required by a job. Note that a workflow job may use

211

# multiple pools, in order to match the various computational

212

# requirements of the various stages of the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

213

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

214

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

215

# using the standard Dataflow task runner. Users should ignore

216

# this field.

217

"workflowFileName": "A String", # The file to store the workflow in.

218

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

219

# will not be uploaded.

220

#

221

# The supported resource type is:

222

#

223

# Google Cloud Storage:

224

# storage.googleapis.com/{bucket}/{object}

225

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

226

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

227

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

228

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

229

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

230

"vmId": "A String", # The ID string of the VM.

231

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

232

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

233

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

234

# access the Cloud Dataflow API.

235

"A String",

236

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

237

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

238

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

239

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

240

# "shuffle/v1beta1".

241

"workerId": "A String", # The ID of the worker running this pipeline.

242

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

243

#

244

# When workers access Google Cloud APIs, they logically do so via

245

# relative URLs. If this field is specified, it supplies the base

246

# URL to use for resolving these relative URLs. The normative

247

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

248

# Locators".

249

#

250

# If not specified, the default value is "http://www.googleapis.com/"

251

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

252

# "dataflow/v1b3/projects".

253

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

254

# storage.

255

#

256

# The supported resource type is:

257

#

258

# Google Cloud Storage:

259

#

260

# storage.googleapis.com/{bucket}/{object}

261

# bucket.storage.googleapis.com/{object}

262

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

263

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

264

# taskrunner; e.g. "wheel".

265

"languageHint": "A String", # The suggested backend language.

266

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

267

# console.

268

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

269

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

270

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

271

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

272

#

273

# When workers access Google Cloud APIs, they logically do so via

274

# relative URLs. If this field is specified, it supplies the base

275

# URL to use for resolving these relative URLs. The normative

276

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

277

# Locators".

278

#

279

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

280

"harnessCommand": "A String", # The command to launch the worker harness.

281

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

282

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

283

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

284

# The supported resource type is:

285

#

286

# Google Cloud Storage:

287

# storage.googleapis.com/{bucket}/{object}

288

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

289

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

290

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

291

# are supported.

292

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

293

# service will attempt to choose a reasonable default.

294

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

295

# the service will use the network "default".

296

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

297

# will attempt to choose a reasonable default.

298

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

299

# attempt to choose a reasonable default.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

300

"dataDisks": [ # Data disks that are used by a VM in this workflow.

301

{ # Describes the data disk used by a workflow job.

302

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

303

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

304

# attempt to choose a reasonable default.

305

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

306

# must be a disk type appropriate to the project and zone in which

307

# the workers will run. If unknown or unspecified, the service

308

# will attempt to choose a reasonable default.

309

#

310

# For example, the standard persistent disk type is a resource name

311

# typically ending in "pd-standard". If SSD persistent disks are

312

# available, the resource name typically ends with "pd-ssd". The

313

# actual valid values are defined the Google Compute Engine API,

314

# not by the Cloud Dataflow API; consult the Google Compute Engine

315

# documentation for more information about determining the set of

316

# available disk types for a particular project and zone.

317

#

318

# Google Compute Engine Disk types are local to a particular

319

# project in a particular zone, and so the resource name will

320

# typically look something like this:

321

#

322

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

323

},

324

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

325

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

326

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

327

# `TEARDOWN_NEVER`.

328

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

329

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

330

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

331

# down.

332

#

333

# If the workers are not torn down by the service, they will

334

# continue to run and use Google Compute Engine VM resources in the

335

# user's project until they are explicitly terminated by the user.

336

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

337

# policy except for small, manually supervised test jobs.

338

#

339

# If unknown or unspecified, the service will attempt to choose a reasonable

340

# default.

341

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

342

# Compute Engine API.

343

"ipConfiguration": "A String", # Configuration for VM IPs.

344

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

345

# service will choose a number of threads (according to the number of cores

346

# on the selected machine type for batch, or 1 by convention for streaming).

347

"poolArgs": { # Extra arguments for this worker pool.

348

"a_key": "", # Properties of the object. Contains field @type with type URL.

349

},

350

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

351

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

352

# attempt to choose a reasonable default.

353

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

354

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

355

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

356

# the form "regions/REGION/subnetworks/SUBNETWORK".

357

"packages": [ # Packages to be installed on workers.

358

{ # The packages that must be installed in order for a worker to run the

359

# steps of the Cloud Dataflow job that will be assigned to its worker

360

# pool.

361

#

362

# This is the mechanism by which the Cloud Dataflow SDK causes code to

363

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

364

# might use this to install jars containing the user's code and all of the

365

# various dependencies (libraries, data files, etc.) required in order

366

# for that code to run.

367

"location": "A String", # The resource to read the package from. The supported resource type is:

368

#

369

# Google Cloud Storage:

370

#

371

# storage.googleapis.com/{bucket}

372

# bucket.storage.googleapis.com/

373

"name": "A String", # The name of the package.

374

},

375

],

376

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

377

"algorithm": "A String", # The algorithm to use for autoscaling.

378

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

379

},

380

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

381

# select a default set of packages which are useful to worker

382

# harnesses written in a particular language.

383

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

384

# attempt to choose a reasonable default.

385

"metadata": { # Metadata to set on the Google Compute Engine VMs.

386

"a_key": "A String",

387

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

388

},

389

],

390

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

391

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

392

# of the job it replaced.

393

#

394

# When sending a `CreateJobRequest`, you can update a job by specifying it

395

# here. The job named here is stopped, and its intermediate state is

396

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

397

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

398

# A description of the user pipeline and stages through which it is executed.

399

# Created by Cloud Dataflow service. Only retrieved with

400

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

401

# form. This data is provided by the Dataflow service for ease of visualizing

402

# the pipeline and interpretting Dataflow provided metrics.

403

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

404

{ # Description of the type, names/ids, and input/outputs for a transform.

405

"kind": "A String", # Type of transform.

406

"name": "A String", # User provided name for this transform instance.

407

"inputCollectionName": [ # User names for all collection inputs to this transform.

408

"A String",

409

],

410

"displayData": [ # Transform-specific display data.

411

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

412

"shortStrValue": "A String", # A possible additional shorter value to display.

413

# For example a java_class_name_value of com.mypackage.MyDoFn

414

# will be stored with MyDoFn as the short_str_value and

415

# com.mypackage.MyDoFn as the java_class_name value.

416

# short_str_value can be displayed and java_class_name_value

417

# will be displayed as a tooltip.

418

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

419

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

420

"url": "A String", # An optional full URL.

421

"floatValue": 3.14, # Contains value if the data is of float type.

422

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

423

# language namespace (i.e. python module) which defines the display data.

424

# This allows a dax monitoring system to specially handle the data

425

# and perform custom rendering.

426

"javaClassValue": "A String", # Contains value if the data is of java class type.

427

"label": "A String", # An optional label to display in a dax UI for the element.

428

"boolValue": True or False, # Contains value if the data is of a boolean type.

429

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

430

"key": "A String", # The key identifying the display data.

431

# This is intended to be used as a label for the display data

432

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

433

"int64Value": "A String", # Contains value if the data is of int64 type.

434

},

435

],

436

"outputCollectionName": [ # User names for all collection outputs to this transform.

437

"A String",

438

],

439

"id": "A String", # SDK generated id of this transform instance.

440

},

441

],

442

"displayData": [ # Pipeline level display data.

443

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

444

"shortStrValue": "A String", # A possible additional shorter value to display.

445

# For example a java_class_name_value of com.mypackage.MyDoFn

446

# will be stored with MyDoFn as the short_str_value and

447

# com.mypackage.MyDoFn as the java_class_name value.

448

# short_str_value can be displayed and java_class_name_value

449

# will be displayed as a tooltip.

450

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

451

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

452

"url": "A String", # An optional full URL.

453

"floatValue": 3.14, # Contains value if the data is of float type.

454

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

455

# language namespace (i.e. python module) which defines the display data.

456

# This allows a dax monitoring system to specially handle the data

457

# and perform custom rendering.

458

"javaClassValue": "A String", # Contains value if the data is of java class type.

459

"label": "A String", # An optional label to display in a dax UI for the element.

460

"boolValue": True or False, # Contains value if the data is of a boolean type.

461

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

462

"key": "A String", # The key identifying the display data.

463

# This is intended to be used as a label for the display data

464

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

465

"int64Value": "A String", # Contains value if the data is of int64 type.

466

},

467

],

468

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

469

{ # Description of the composing transforms, names/ids, and input/outputs of a

470

# stage of execution. Some composing transforms and sources may have been

471

# generated by the Dataflow service during execution planning.

472

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

473

{ # Description of an interstitial value between transforms in an execution

474

# stage.

475

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

476

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

477

# source is most closely associated.

478

"name": "A String", # Dataflow service generated name for this source.

479

},

480

],

481

"kind": "A String", # Type of tranform this stage is executing.

482

"name": "A String", # Dataflow service generated name for this stage.

483

"outputSource": [ # Output sources for this stage.

484

{ # Description of an input or output of an execution stage.

485

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

486

"sizeBytes": "A String", # Size of the source, if measurable.

487

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

488

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

489

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

490

},

491

],

492

"inputSource": [ # Input sources for this stage.

493

{ # Description of an input or output of an execution stage.

494

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

495

"sizeBytes": "A String", # Size of the source, if measurable.

496

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

497

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

498

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

499

},

500

],

501

"componentTransform": [ # Transforms that comprise this execution stage.

502

{ # Description of a transform executed as part of an execution stage.

503

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

504

"originalTransform": "A String", # User name for the original user transform with which this transform is

505

# most closely associated.

506

"name": "A String", # Dataflow service generated name for this source.

507

},

508

],

509

"id": "A String", # Dataflow service generated id for this stage.

510

},

511

],

512

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

513

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

514

{ # Defines a particular step within a Cloud Dataflow job.

515

#

516

# A job consists of multiple steps, each of which performs some

517

# specific operation as part of the overall job. Data is typically

518

# passed from one step to another as part of the job.

519

#

520

# Here's an example of a sequence of steps which together implement a

521

# Map-Reduce job:

522

#

523

# * Read a collection of data from some source, parsing the

524

# collection's elements.

525

#

526

# * Validate the elements.

527

#

528

# * Apply a user-defined function to map each element to some value

529

# and extract an element-specific key value.

530

#

531

# * Group elements with the same key into a single element with

532

# that key, transforming a multiply-keyed collection into a

533

# uniquely-keyed collection.

534

#

535

# * Write the elements out to some data sink.

536

#

537

# Note that the Cloud Dataflow service may be used to run many different

538

# types of jobs, not just Map-Reduce.

539

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

540

"name": "A String", # The name that identifies the step. This must be unique for each

541

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

542

"properties": { # Named properties associated with the step. Each kind of

543

# predefined step has its own required set of properties.

544

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

545

"a_key": "", # Properties of the object.

546

},

547

},

548

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

549

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

550

"tempFiles": [ # A set of files the system should be aware of that are used

551

# for temporary storage. These temporary files will be

552

# removed on job completion.

553

# No duplicates are allowed.

554

# No file patterns are supported.

555

#

556

# The supported files are:

557

#

558

# Google Cloud Storage:

559

#

560

# storage.googleapis.com/{bucket}/{object}

561

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

562

"A String",

563

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

564

"type": "A String", # The type of Cloud Dataflow job.

565

"id": "A String", # The unique ID of this job.

566

#

567

# This field is set by the Cloud Dataflow service when the Job is

568

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

569

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

570

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

571

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

572

# specified.

573

#

574

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

575

# terminal state. After a job has reached a terminal state, no

576

# further state updates may be made.

577

#

578

# This field may be mutated by the Cloud Dataflow service;

579

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

580

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

581

# isn't contained in the submitted job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

582

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

583

"a_key": { # Contains information about how a particular

584

# google.dataflow.v1beta3.Step will be executed.

585

"stepName": [ # The steps associated with the execution stage.

586

# Note that stages may have several steps, and that a given step

587

# might be run by more than one stage.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

}

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

Allowed values

1 - v1 error format

2 - v2 error format

replaceJobId: string, Deprecated. This field is now in the Job message.

600

view: string, The level of information requested in response.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

601

602

Returns:

603

An object of the form:

604

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

605

{ # Defines a job to be run by the Cloud Dataflow service.

606

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

607

# If this field is set, the service will ensure its uniqueness.

608

# The request to create a job will fail if the service has knowledge of a

609

# previously submitted job with the same client's ID and job name.

610

# The caller may use this field to ensure idempotence of job

611

# creation across retried attempts to create a job.

612

# By default, the field is empty and, in that case, the service ignores it.

613

"requestedState": "A String", # The job's requested state.

614

#

615

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

616

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

617

# also be used to directly set a job's requested state to

618

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

619

# job if it has not already reached a terminal state.

620

"name": "A String", # The user-specified Cloud Dataflow job name.

621

#

622

# Only one Job with a given name may exist in a project at any

623

# given time. If a caller attempts to create a Job with the same

624

# name as an already-existing Job, the attempt returns the

625

# existing Job.

626

#

627

# The name must match the regular expression

628

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

629

"currentStateTime": "A String", # The timestamp associated with the current state.

630

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

631

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

632

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

633

"labels": { # User-defined labels for this job.

634

#

635

# The labels map can contain no more than 64 entries. Entries of the labels

636

# map are UTF8 strings that comply with the following restrictions:

637

#

638

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

639

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

640

# * Both keys and values are additionally constrained to be <= 128 bytes in

641

# size.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

642

"a_key": "A String",

643

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

644

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

645

# corresponding name prefixes of the new job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

646

"a_key": "A String",

647

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

648

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

649

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

650

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

651

"version": { # A structure describing which components and their versions of the service

652

# are required in order to run the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

653

"a_key": "", # Properties of the object.

654

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

655

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

656

# storage. The system will append the suffix "/temp-{JOBNAME} to

657

# this resource prefix, where {JOBNAME} is the value of the

658

# job_name field. The resulting bucket and object prefix is used

659

# as the prefix of the resources used to store temporary data

660

# needed during the job execution. NOTE: This will override the

661

# value in taskrunner_settings.

662

# The supported resource type is:

663

#

664

# Google Cloud Storage:

665

#

666

# storage.googleapis.com/{bucket}/{object}

667

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

668

"internalExperiments": { # Experimental settings.

669

"a_key": "", # Properties of the object. Contains field @type with type URL.

670

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

671

"dataset": "A String", # The dataset for the current project where various workflow

672

# related tables are stored.

673

#

674

# The supported resource type is:

675

#

676

# Google BigQuery:

677

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

678

"experiments": [ # The list of experiments to enable.

679

"A String",

680

],

681

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

682

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

683

# options are passed through the service and are used to recreate the

684

# SDK pipeline options on the worker in a language agnostic and platform

685

# independent way.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

686

"a_key": "", # Properties of the object.

687

},

688

"userAgent": { # A description of the process that generated the request.

689

"a_key": "", # Properties of the object.

690

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

691

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

692

# unspecified, the service will attempt to choose a reasonable

693

# default. This should be in the form of the API service name,

694

# e.g. "compute.googleapis.com".

695

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

696

# specified in order for the job to have workers.

697

{ # Describes one particular pool of Cloud Dataflow workers to be

698

# instantiated by the Cloud Dataflow service in order to perform the

699

# computations required by a job. Note that a workflow job may use

700

# multiple pools, in order to match the various computational

701

# requirements of the various stages of the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

702

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

703

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

704

# using the standard Dataflow task runner. Users should ignore

705

# this field.

706

"workflowFileName": "A String", # The file to store the workflow in.

707

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

708

# will not be uploaded.

709

#

710

# The supported resource type is:

711

#

712

# Google Cloud Storage:

713

# storage.googleapis.com/{bucket}/{object}

714

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

715

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

716

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

717

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

718

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

719

"vmId": "A String", # The ID string of the VM.

720

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

721

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

722

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

723

# access the Cloud Dataflow API.

724

"A String",

725

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

726

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

727

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

728

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

729

# "shuffle/v1beta1".

730

"workerId": "A String", # The ID of the worker running this pipeline.

731

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

732

#

733

# When workers access Google Cloud APIs, they logically do so via

734

# relative URLs. If this field is specified, it supplies the base

735

# URL to use for resolving these relative URLs. The normative

736

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

737

# Locators".

738

#

739

# If not specified, the default value is "http://www.googleapis.com/"

740

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

741

# "dataflow/v1b3/projects".

742

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

743

# storage.

744

#

745

# The supported resource type is:

746

#

747

# Google Cloud Storage:

748

#

749

# storage.googleapis.com/{bucket}/{object}

750

# bucket.storage.googleapis.com/{object}

751

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

752

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

753

# taskrunner; e.g. "wheel".

754

"languageHint": "A String", # The suggested backend language.

755

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

756

# console.

757

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

758

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

759

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

760

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

761

#

762

# When workers access Google Cloud APIs, they logically do so via

763

# relative URLs. If this field is specified, it supplies the base

764

# URL to use for resolving these relative URLs. The normative

765

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

766

# Locators".

767

#

768

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

769

"harnessCommand": "A String", # The command to launch the worker harness.

770

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

771

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

772

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

773

# The supported resource type is:

774

#

775

# Google Cloud Storage:

776

# storage.googleapis.com/{bucket}/{object}

777

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

778

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

779

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

780

# are supported.

781

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

782

# service will attempt to choose a reasonable default.

783

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

784

# the service will use the network "default".

785

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

786

# will attempt to choose a reasonable default.

787

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

788

# attempt to choose a reasonable default.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

789

"dataDisks": [ # Data disks that are used by a VM in this workflow.

790

{ # Describes the data disk used by a workflow job.

791

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

792

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

793

# attempt to choose a reasonable default.

794

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

795

# must be a disk type appropriate to the project and zone in which

796

# the workers will run. If unknown or unspecified, the service

797

# will attempt to choose a reasonable default.

798

#

799

# For example, the standard persistent disk type is a resource name

800

# typically ending in "pd-standard". If SSD persistent disks are

801

# available, the resource name typically ends with "pd-ssd". The

802

# actual valid values are defined the Google Compute Engine API,

803

# not by the Cloud Dataflow API; consult the Google Compute Engine

804

# documentation for more information about determining the set of

805

# available disk types for a particular project and zone.

806

#

807

# Google Compute Engine Disk types are local to a particular

808

# project in a particular zone, and so the resource name will

809

# typically look something like this:

810

#

811

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

812

},

813

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

814

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

815

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

816

# `TEARDOWN_NEVER`.

817

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

818

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

819

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

820

# down.

821

#

822

# If the workers are not torn down by the service, they will

823

# continue to run and use Google Compute Engine VM resources in the

824

# user's project until they are explicitly terminated by the user.

825

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

826

# policy except for small, manually supervised test jobs.

827

#

828

# If unknown or unspecified, the service will attempt to choose a reasonable

829

# default.

830

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

831

# Compute Engine API.

832

"ipConfiguration": "A String", # Configuration for VM IPs.

833

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

834

# service will choose a number of threads (according to the number of cores

835

# on the selected machine type for batch, or 1 by convention for streaming).

836

"poolArgs": { # Extra arguments for this worker pool.

837

"a_key": "", # Properties of the object. Contains field @type with type URL.

838

},

839

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

840

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

841

# attempt to choose a reasonable default.

842

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

843

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

844

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

845

# the form "regions/REGION/subnetworks/SUBNETWORK".

846

"packages": [ # Packages to be installed on workers.

847

{ # The packages that must be installed in order for a worker to run the

848

# steps of the Cloud Dataflow job that will be assigned to its worker

849

# pool.

850

#

851

# This is the mechanism by which the Cloud Dataflow SDK causes code to

852

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

853

# might use this to install jars containing the user's code and all of the

854

# various dependencies (libraries, data files, etc.) required in order

855

# for that code to run.

856

"location": "A String", # The resource to read the package from. The supported resource type is:

857

#

858

# Google Cloud Storage:

859

#

860

# storage.googleapis.com/{bucket}

861

# bucket.storage.googleapis.com/

862

"name": "A String", # The name of the package.

863

},

864

],

865

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

866

"algorithm": "A String", # The algorithm to use for autoscaling.

867

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

868

},

869

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

870

# select a default set of packages which are useful to worker

871

# harnesses written in a particular language.

872

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

873

# attempt to choose a reasonable default.

874

"metadata": { # Metadata to set on the Google Compute Engine VMs.

875

"a_key": "A String",

876

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

877

},

878

],

879

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

880

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

881

# of the job it replaced.

882

#

883

# When sending a `CreateJobRequest`, you can update a job by specifying it

884

# here. The job named here is stopped, and its intermediate state is

885

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

886

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

887

# A description of the user pipeline and stages through which it is executed.

888

# Created by Cloud Dataflow service. Only retrieved with

889

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

890

# form. This data is provided by the Dataflow service for ease of visualizing

891

# the pipeline and interpretting Dataflow provided metrics.

892

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

893

{ # Description of the type, names/ids, and input/outputs for a transform.

894

"kind": "A String", # Type of transform.

895

"name": "A String", # User provided name for this transform instance.

896

"inputCollectionName": [ # User names for all collection inputs to this transform.

897

"A String",

898

],

899

"displayData": [ # Transform-specific display data.

900

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

901

"shortStrValue": "A String", # A possible additional shorter value to display.

902

# For example a java_class_name_value of com.mypackage.MyDoFn

903

# will be stored with MyDoFn as the short_str_value and

904

# com.mypackage.MyDoFn as the java_class_name value.

905

# short_str_value can be displayed and java_class_name_value

906

# will be displayed as a tooltip.

907

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

908

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

909

"url": "A String", # An optional full URL.

910

"floatValue": 3.14, # Contains value if the data is of float type.

911

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

912

# language namespace (i.e. python module) which defines the display data.

913

# This allows a dax monitoring system to specially handle the data

914

# and perform custom rendering.

915

"javaClassValue": "A String", # Contains value if the data is of java class type.

916

"label": "A String", # An optional label to display in a dax UI for the element.

917

"boolValue": True or False, # Contains value if the data is of a boolean type.

918

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

919

"key": "A String", # The key identifying the display data.

920

# This is intended to be used as a label for the display data

921

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

922

"int64Value": "A String", # Contains value if the data is of int64 type.

923

},

924

],

925

"outputCollectionName": [ # User names for all collection outputs to this transform.

926

"A String",

927

],

928

"id": "A String", # SDK generated id of this transform instance.

929

},

930

],

931

"displayData": [ # Pipeline level display data.

932

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

933

"shortStrValue": "A String", # A possible additional shorter value to display.

934

# For example a java_class_name_value of com.mypackage.MyDoFn

935

# will be stored with MyDoFn as the short_str_value and

936

# com.mypackage.MyDoFn as the java_class_name value.

937

# short_str_value can be displayed and java_class_name_value

938

# will be displayed as a tooltip.

939

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

940

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

941

"url": "A String", # An optional full URL.

942

"floatValue": 3.14, # Contains value if the data is of float type.

943

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

944

# language namespace (i.e. python module) which defines the display data.

945

# This allows a dax monitoring system to specially handle the data

946

# and perform custom rendering.

947

"javaClassValue": "A String", # Contains value if the data is of java class type.

948

"label": "A String", # An optional label to display in a dax UI for the element.

949

"boolValue": True or False, # Contains value if the data is of a boolean type.

950

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

951

"key": "A String", # The key identifying the display data.

952

# This is intended to be used as a label for the display data

953

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

954

"int64Value": "A String", # Contains value if the data is of int64 type.

955

},

956

],

957

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

958

{ # Description of the composing transforms, names/ids, and input/outputs of a

959

# stage of execution. Some composing transforms and sources may have been

960

# generated by the Dataflow service during execution planning.

961

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

962

{ # Description of an interstitial value between transforms in an execution

963

# stage.

964

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

965

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

966

# source is most closely associated.

967

"name": "A String", # Dataflow service generated name for this source.

968

},

969

],

970

"kind": "A String", # Type of tranform this stage is executing.

971

"name": "A String", # Dataflow service generated name for this stage.

972

"outputSource": [ # Output sources for this stage.

973

{ # Description of an input or output of an execution stage.

974

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

975

"sizeBytes": "A String", # Size of the source, if measurable.

976

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

977

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

978

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

979

},

980

],

981

"inputSource": [ # Input sources for this stage.

982

{ # Description of an input or output of an execution stage.

983

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

984

"sizeBytes": "A String", # Size of the source, if measurable.

985

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

986

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

987

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

988

},

989

],

990

"componentTransform": [ # Transforms that comprise this execution stage.

991

{ # Description of a transform executed as part of an execution stage.

992

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

993

"originalTransform": "A String", # User name for the original user transform with which this transform is

994

# most closely associated.

995

"name": "A String", # Dataflow service generated name for this source.

996

},

997

],

998

"id": "A String", # Dataflow service generated id for this stage.

999

},

1000

],

1001

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1002

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1003

{ # Defines a particular step within a Cloud Dataflow job.

1004

#

1005

# A job consists of multiple steps, each of which performs some

1006

# specific operation as part of the overall job. Data is typically

1007

# passed from one step to another as part of the job.

1008

#

1009

# Here's an example of a sequence of steps which together implement a

1010

# Map-Reduce job:

1011

#

1012

# * Read a collection of data from some source, parsing the

1013

# collection's elements.

1014

#

1015

# * Validate the elements.

1016

#

1017

# * Apply a user-defined function to map each element to some value

1018

# and extract an element-specific key value.

1019

#

1020

# * Group elements with the same key into a single element with

1021

# that key, transforming a multiply-keyed collection into a

1022

# uniquely-keyed collection.

1023

#

1024

# * Write the elements out to some data sink.

1025

#

1026

# Note that the Cloud Dataflow service may be used to run many different

1027

# types of jobs, not just Map-Reduce.

1028

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1029

"name": "A String", # The name that identifies the step. This must be unique for each

1030

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1031

"properties": { # Named properties associated with the step. Each kind of

1032

# predefined step has its own required set of properties.

1033

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1034

"a_key": "", # Properties of the object.

1035

},

1036

},

1037

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1038

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1039

"tempFiles": [ # A set of files the system should be aware of that are used

1040

# for temporary storage. These temporary files will be

1041

# removed on job completion.

1042

# No duplicates are allowed.

1043

# No file patterns are supported.

1044

#

1045

# The supported files are:

1046

#

1047

# Google Cloud Storage:

1048

#

1049

# storage.googleapis.com/{bucket}/{object}

1050

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1051

"A String",

1052

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1053

"type": "A String", # The type of Cloud Dataflow job.

1054

"id": "A String", # The unique ID of this job.

1055

#

1056

# This field is set by the Cloud Dataflow service when the Job is

1057

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1058

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1059

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1060

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1061

# specified.

1062

#

1063

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1064

# terminal state. After a job has reached a terminal state, no

1065

# further state updates may be made.

1066

#

1067

# This field may be mutated by the Cloud Dataflow service;

1068

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1069

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1070

# isn't contained in the submitted job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1071

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1072

"a_key": { # Contains information about how a particular

1073

# google.dataflow.v1beta3.Step will be executed.

1074

"stepName": [ # The steps associated with the execution stage.

1075

# Note that stages may have several steps, and that a given step

1076

# might be run by more than one stage.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

}</pre>

</div>

<code class="details" id="get">get(projectId, location, jobId, x__xgafv=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1087

<pre>Gets the state of the specified Cloud Dataflow job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1088

1089

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1090

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

1091

location: string, The location that contains this job. (required)

1092

jobId: string, The job ID. (required)

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1093

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

Allowed values

1 - v1 error format

2 - v2 error format

view: string, The level of information requested in response.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1098

1099

Returns:

1100

An object of the form:

1101

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1102

{ # Defines a job to be run by the Cloud Dataflow service.

1103

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1104

# If this field is set, the service will ensure its uniqueness.

1105

# The request to create a job will fail if the service has knowledge of a

1106

# previously submitted job with the same client's ID and job name.

1107

# The caller may use this field to ensure idempotence of job

1108

# creation across retried attempts to create a job.

1109

# By default, the field is empty and, in that case, the service ignores it.

1110

"requestedState": "A String", # The job's requested state.

1111

#

1112

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1113

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1114

# also be used to directly set a job's requested state to

1115

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1116

# job if it has not already reached a terminal state.

1117

"name": "A String", # The user-specified Cloud Dataflow job name.

1118

#

1119

# Only one Job with a given name may exist in a project at any

1120

# given time. If a caller attempts to create a Job with the same

1121

# name as an already-existing Job, the attempt returns the

1122

# existing Job.

1123

#

1124

# The name must match the regular expression

1125

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1126

"currentStateTime": "A String", # The timestamp associated with the current state.

1127

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1128

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1129

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1130

"labels": { # User-defined labels for this job.

1131

#

1132

# The labels map can contain no more than 64 entries. Entries of the labels

1133

# map are UTF8 strings that comply with the following restrictions:

1134

#

1135

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1136

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1137

# * Both keys and values are additionally constrained to be <= 128 bytes in

1138

# size.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1139

"a_key": "A String",

1140

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1141

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1142

# corresponding name prefixes of the new job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1143

"a_key": "A String",

1144

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1145

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1146

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1147

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1148

"version": { # A structure describing which components and their versions of the service

1149

# are required in order to run the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1150

"a_key": "", # Properties of the object.

1151

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1152

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1153

# storage. The system will append the suffix "/temp-{JOBNAME} to

1154

# this resource prefix, where {JOBNAME} is the value of the

1155

# job_name field. The resulting bucket and object prefix is used

1156

# as the prefix of the resources used to store temporary data

1157

# needed during the job execution. NOTE: This will override the

1158

# value in taskrunner_settings.

1159

# The supported resource type is:

1160

#

1161

# Google Cloud Storage:

1162

#

1163

# storage.googleapis.com/{bucket}/{object}

1164

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1165

"internalExperiments": { # Experimental settings.

1166

"a_key": "", # Properties of the object. Contains field @type with type URL.

1167

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1168

"dataset": "A String", # The dataset for the current project where various workflow

1169

# related tables are stored.

1170

#

1171

# The supported resource type is:

1172

#

1173

# Google BigQuery:

1174

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1175

"experiments": [ # The list of experiments to enable.

1176

"A String",

1177

],

1178

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1179

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1180

# options are passed through the service and are used to recreate the

1181

# SDK pipeline options on the worker in a language agnostic and platform

1182

# independent way.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1183

"a_key": "", # Properties of the object.

1184

},

1185

"userAgent": { # A description of the process that generated the request.

1186

"a_key": "", # Properties of the object.

1187

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1188

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1189

# unspecified, the service will attempt to choose a reasonable

1190

# default. This should be in the form of the API service name,

1191

# e.g. "compute.googleapis.com".

1192

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1193

# specified in order for the job to have workers.

1194

{ # Describes one particular pool of Cloud Dataflow workers to be

1195

# instantiated by the Cloud Dataflow service in order to perform the

1196

# computations required by a job. Note that a workflow job may use

1197

# multiple pools, in order to match the various computational

1198

# requirements of the various stages of the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1199

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1200

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1201

# using the standard Dataflow task runner. Users should ignore

1202

# this field.

1203

"workflowFileName": "A String", # The file to store the workflow in.

1204

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1205

# will not be uploaded.

1206

#

1207

# The supported resource type is:

1208

#

1209

# Google Cloud Storage:

1210

# storage.googleapis.com/{bucket}/{object}

1211

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1212

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1213

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1214

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1215

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1216

"vmId": "A String", # The ID string of the VM.

1217

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1218

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1219

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1220

# access the Cloud Dataflow API.

1221

"A String",

1222

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1223

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1224

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1225

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1226

# "shuffle/v1beta1".

1227

"workerId": "A String", # The ID of the worker running this pipeline.

1228

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1229

#

1230

# When workers access Google Cloud APIs, they logically do so via

1231

# relative URLs. If this field is specified, it supplies the base

1232

# URL to use for resolving these relative URLs. The normative

1233

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1234

# Locators".

1235

#

1236

# If not specified, the default value is "http://www.googleapis.com/"

1237

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1238

# "dataflow/v1b3/projects".

1239

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1240

# storage.

1241

#

1242

# The supported resource type is:

1243

#

1244

# Google Cloud Storage:

1245

#

1246

# storage.googleapis.com/{bucket}/{object}

1247

# bucket.storage.googleapis.com/{object}

1248

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1249

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1250

# taskrunner; e.g. "wheel".

1251

"languageHint": "A String", # The suggested backend language.

1252

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1253

# console.

1254

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1255

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1256

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1257

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1258

#

1259

# When workers access Google Cloud APIs, they logically do so via

1260

# relative URLs. If this field is specified, it supplies the base

1261

# URL to use for resolving these relative URLs. The normative

1262

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1263

# Locators".

1264

#

1265

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1266

"harnessCommand": "A String", # The command to launch the worker harness.

1267

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1268

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1269

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1270

# The supported resource type is:

1271

#

1272

# Google Cloud Storage:

1273

# storage.googleapis.com/{bucket}/{object}

1274

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1275

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1276

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1277

# are supported.

1278

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1279

# service will attempt to choose a reasonable default.

1280

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1281

# the service will use the network "default".

1282

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1283

# will attempt to choose a reasonable default.

1284

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1285

# attempt to choose a reasonable default.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1286

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1287

{ # Describes the data disk used by a workflow job.

1288

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1289

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1290

# attempt to choose a reasonable default.

1291

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1292

# must be a disk type appropriate to the project and zone in which

1293

# the workers will run. If unknown or unspecified, the service

1294

# will attempt to choose a reasonable default.

1295

#

1296

# For example, the standard persistent disk type is a resource name

1297

# typically ending in "pd-standard". If SSD persistent disks are

1298

# available, the resource name typically ends with "pd-ssd". The

1299

# actual valid values are defined the Google Compute Engine API,

1300

# not by the Cloud Dataflow API; consult the Google Compute Engine

1301

# documentation for more information about determining the set of

1302

# available disk types for a particular project and zone.

1303

#

1304

# Google Compute Engine Disk types are local to a particular

1305

# project in a particular zone, and so the resource name will

1306

# typically look something like this:

1307

#

1308

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1309

},

1310

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1311

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1312

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1313

# `TEARDOWN_NEVER`.

1314

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1315

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1316

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1317

# down.

1318

#

1319

# If the workers are not torn down by the service, they will

1320

# continue to run and use Google Compute Engine VM resources in the

1321

# user's project until they are explicitly terminated by the user.

1322

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1323

# policy except for small, manually supervised test jobs.

1324

#

1325

# If unknown or unspecified, the service will attempt to choose a reasonable

1326

# default.

1327

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1328

# Compute Engine API.

1329

"ipConfiguration": "A String", # Configuration for VM IPs.

1330

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1331

# service will choose a number of threads (according to the number of cores

1332

# on the selected machine type for batch, or 1 by convention for streaming).

1333

"poolArgs": { # Extra arguments for this worker pool.

1334

"a_key": "", # Properties of the object. Contains field @type with type URL.

1335

},

1336

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1337

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1338

# attempt to choose a reasonable default.

1339

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1340

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1341

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1342

# the form "regions/REGION/subnetworks/SUBNETWORK".

1343

"packages": [ # Packages to be installed on workers.

1344

{ # The packages that must be installed in order for a worker to run the

1345

# steps of the Cloud Dataflow job that will be assigned to its worker

1346

# pool.

1347

#

1348

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1349

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1350

# might use this to install jars containing the user's code and all of the

1351

# various dependencies (libraries, data files, etc.) required in order

1352

# for that code to run.

1353

"location": "A String", # The resource to read the package from. The supported resource type is:

1354

#

1355

# Google Cloud Storage:

1356

#

1357

# storage.googleapis.com/{bucket}

1358

# bucket.storage.googleapis.com/

1359

"name": "A String", # The name of the package.

1360

},

1361

],

1362

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1363

"algorithm": "A String", # The algorithm to use for autoscaling.

1364

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1365

},

1366

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1367

# select a default set of packages which are useful to worker

1368

# harnesses written in a particular language.

1369

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1370

# attempt to choose a reasonable default.

1371

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1372

"a_key": "A String",

1373

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1374

},

1375

],

1376

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1377

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1378

# of the job it replaced.

1379

#

1380

# When sending a `CreateJobRequest`, you can update a job by specifying it

1381

# here. The job named here is stopped, and its intermediate state is

1382

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1383

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1384

# A description of the user pipeline and stages through which it is executed.

1385

# Created by Cloud Dataflow service. Only retrieved with

1386

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1387

# form. This data is provided by the Dataflow service for ease of visualizing

1388

# the pipeline and interpretting Dataflow provided metrics.

1389

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1390

{ # Description of the type, names/ids, and input/outputs for a transform.

1391

"kind": "A String", # Type of transform.

1392

"name": "A String", # User provided name for this transform instance.

1393

"inputCollectionName": [ # User names for all collection inputs to this transform.

1394

"A String",

1395

],

1396

"displayData": [ # Transform-specific display data.

1397

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1398

"shortStrValue": "A String", # A possible additional shorter value to display.

1399

# For example a java_class_name_value of com.mypackage.MyDoFn

1400

# will be stored with MyDoFn as the short_str_value and

1401

# com.mypackage.MyDoFn as the java_class_name value.

1402

# short_str_value can be displayed and java_class_name_value

1403

# will be displayed as a tooltip.

1404

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1405

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1406

"url": "A String", # An optional full URL.

1407

"floatValue": 3.14, # Contains value if the data is of float type.

1408

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1409

# language namespace (i.e. python module) which defines the display data.

1410

# This allows a dax monitoring system to specially handle the data

1411

# and perform custom rendering.

1412

"javaClassValue": "A String", # Contains value if the data is of java class type.

1413

"label": "A String", # An optional label to display in a dax UI for the element.

1414

"boolValue": True or False, # Contains value if the data is of a boolean type.

1415

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1416

"key": "A String", # The key identifying the display data.

1417

# This is intended to be used as a label for the display data

1418

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1419

"int64Value": "A String", # Contains value if the data is of int64 type.

1420

},

1421

],

1422

"outputCollectionName": [ # User names for all collection outputs to this transform.

1423

"A String",

1424

],

1425

"id": "A String", # SDK generated id of this transform instance.

1426

},

1427

],

1428

"displayData": [ # Pipeline level display data.

1429

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1430

"shortStrValue": "A String", # A possible additional shorter value to display.

1431

# For example a java_class_name_value of com.mypackage.MyDoFn

1432

# will be stored with MyDoFn as the short_str_value and

1433

# com.mypackage.MyDoFn as the java_class_name value.

1434

# short_str_value can be displayed and java_class_name_value

1435

# will be displayed as a tooltip.

1436

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1437

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1438

"url": "A String", # An optional full URL.

1439

"floatValue": 3.14, # Contains value if the data is of float type.

1440

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1441

# language namespace (i.e. python module) which defines the display data.

1442

# This allows a dax monitoring system to specially handle the data

1443

# and perform custom rendering.

1444

"javaClassValue": "A String", # Contains value if the data is of java class type.

1445

"label": "A String", # An optional label to display in a dax UI for the element.

1446

"boolValue": True or False, # Contains value if the data is of a boolean type.

1447

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1448

"key": "A String", # The key identifying the display data.

1449

# This is intended to be used as a label for the display data

1450

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1451

"int64Value": "A String", # Contains value if the data is of int64 type.

1452

},

1453

],

1454

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

1455

{ # Description of the composing transforms, names/ids, and input/outputs of a

1456

# stage of execution. Some composing transforms and sources may have been

1457

# generated by the Dataflow service during execution planning.

1458

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

1459

{ # Description of an interstitial value between transforms in an execution

1460

# stage.

1461

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1462

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1463

# source is most closely associated.

1464

"name": "A String", # Dataflow service generated name for this source.

1465

},

1466

],

1467

"kind": "A String", # Type of tranform this stage is executing.

1468

"name": "A String", # Dataflow service generated name for this stage.

1469

"outputSource": [ # Output sources for this stage.

1470

{ # Description of an input or output of an execution stage.

1471

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1472

"sizeBytes": "A String", # Size of the source, if measurable.

1473

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1474

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1475

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1476

},

1477

],

1478

"inputSource": [ # Input sources for this stage.

1479

{ # Description of an input or output of an execution stage.

1480

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1481

"sizeBytes": "A String", # Size of the source, if measurable.

1482

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1483

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1484

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1485

},

1486

],

1487

"componentTransform": [ # Transforms that comprise this execution stage.

1488

{ # Description of a transform executed as part of an execution stage.

1489

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1490

"originalTransform": "A String", # User name for the original user transform with which this transform is

1491

# most closely associated.

1492

"name": "A String", # Dataflow service generated name for this source.

1493

},

1494

],

1495

"id": "A String", # Dataflow service generated id for this stage.

1496

},

1497

],

1498

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1499

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1500

{ # Defines a particular step within a Cloud Dataflow job.

1501

#

1502

# A job consists of multiple steps, each of which performs some

1503

# specific operation as part of the overall job. Data is typically

1504

# passed from one step to another as part of the job.

1505

#

1506

# Here's an example of a sequence of steps which together implement a

1507

# Map-Reduce job:

1508

#

1509

# * Read a collection of data from some source, parsing the

1510

# collection's elements.

1511

#

1512

# * Validate the elements.

1513

#

1514

# * Apply a user-defined function to map each element to some value

1515

# and extract an element-specific key value.

1516

#

1517

# * Group elements with the same key into a single element with

1518

# that key, transforming a multiply-keyed collection into a

1519

# uniquely-keyed collection.

1520

#

1521

# * Write the elements out to some data sink.

1522

#

1523

# Note that the Cloud Dataflow service may be used to run many different

1524

# types of jobs, not just Map-Reduce.

1525

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1526

"name": "A String", # The name that identifies the step. This must be unique for each

1527

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1528

"properties": { # Named properties associated with the step. Each kind of

1529

# predefined step has its own required set of properties.

1530

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1531

"a_key": "", # Properties of the object.

1532

},

1533

},

1534

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1535

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1536

"tempFiles": [ # A set of files the system should be aware of that are used

1537

# for temporary storage. These temporary files will be

1538

# removed on job completion.

1539

# No duplicates are allowed.

1540

# No file patterns are supported.

1541

#

1542

# The supported files are:

1543

#

1544

# Google Cloud Storage:

1545

#

1546

# storage.googleapis.com/{bucket}/{object}

1547

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1548

"A String",

1549

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1550

"type": "A String", # The type of Cloud Dataflow job.

1551

"id": "A String", # The unique ID of this job.

1552

#

1553

# This field is set by the Cloud Dataflow service when the Job is

1554

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1555

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1556

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1557

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1558

# specified.

1559

#

1560

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1561

# terminal state. After a job has reached a terminal state, no

1562

# further state updates may be made.

1563

#

1564

# This field may be mutated by the Cloud Dataflow service;

1565

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1566

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1567

# isn't contained in the submitted job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1568

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1569

"a_key": { # Contains information about how a particular

1570

# google.dataflow.v1beta3.Step will be executed.

1571

"stepName": [ # The steps associated with the execution stage.

1572

# Note that stages may have several steps, and that a given step

1573

# might be run by more than one stage.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

}</pre>

</div>

<code class="details" id="getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</code>

1584

<pre>Request the job status.

1585

1586

Args:

1587

projectId: string, A project id. (required)

1588

location: string, The location which contains the job specified by job_id. (required)

1589

jobId: string, The job to get messages for. (required)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1590

startTime: string, Return only metric data that has changed since this time.

1591

Default is to return all information about all metrics for the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1592

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1593

Allowed values

1594

1 - v1 error format

1595

2 - v2 error format

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1596

1597

Returns:

1598

An object of the form:

1599

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1600

{ # JobMetrics contains a collection of metrics descibing the detailed progress

1601

# of a Dataflow job. Metrics correspond to user-defined and system-defined

1602

# metrics in the job.

1603

#

1604

# This resource captures only the most recent values of each metric;

1605

# time-series data can be queried for them (under the same metric names)

1606

# from Cloud Monitoring.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1607

"metrics": [ # All metrics for this job.

1608

{ # Describes the state of a metric.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1609

"meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1610

# This holds the count of the aggregated values and is used in combination

1611

# with mean_sum above to obtain the actual mean aggregate value.

1612

# The only possible value type is Long.

1613

"kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are

1614

# "Sum", "Max", "Min", "Mean", "Set", "And", and "Or".

1615

# The specified aggregation kind is case-insensitive.

1616

#

1617

# If omitted, this is not an aggregated value but instead

1618

# a single metric sample value.

1619

"set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only

1620

# possible value type is a list of Values whose type can be Long, Double,

1621

# or String, according to the metric's type. All Values in the list must

1622

# be of the same type.

1623

"name": { # Identifies a metric, by describing the source which generated the # Name of the metric.

1624

# metric.

1625

"origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;

1626

# will be "dataflow" for metrics defined by the Dataflow service or SDK.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1627

"name": "A String", # Worker-defined metric name.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1628

"context": { # Zero or more labeled fields which identify the part of the job this

1629

# metric is associated with, such as the name of a step or collection.

1630

#

1631

# For example, built-in counters associated with steps will have

1632

# context['step'] = <step-name>. Counters associated with PCollections

1633

# in the SDK will have context['pcollection'] = <pcollection-name>.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1634

"a_key": "A String",

1635

},

1636

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1637

"meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1638

# This holds the sum of the aggregated values and is used in combination

1639

# with mean_count below to obtain the actual mean aggregate value.

1640

# The only possible value types are Long and Double.

1641

"cumulative": True or False, # True if this metric is reported as the total cumulative aggregate

1642

# value accumulated since the worker started working on this WorkItem.

1643

# By default this is false, indicating that this metric is reported

1644

# as a delta that is not associated with any WorkItem.

1645

"updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are

1646

# reporting work progress; it will be filled in responses from the

1647

# metrics API.

1648

"scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",

1649

# "And", and "Or". The possible value types are Long, Double, and Boolean.

1650

"internal": "", # Worker-computed aggregate value for internal use by the Dataflow

1651

# service.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1652

},

1653

],

1654

"metricTime": "A String", # Timestamp as of which metric values are current.

}</pre>

</div>

<code class="details" id="list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1660

<pre>List the jobs of a project.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1661

1662

Args:

1663

projectId: string, The project which owns the jobs. (required)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1664

location: string, The location that contains this job. (required)

1665

pageSize: integer, If there are many jobs, limit response to at most this many.

1666

The actual number of jobs returned will be the lesser of max_responses

1667

and an unspecified server-defined limit.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1668

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

Allowed values

1 - v1 error format

2 - v2 error format

pageToken: string, Set this to the 'next_page_token' field of a previous response

1673

to request additional results in a long list.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1674

filter: string, The kind of filter to use.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1675

view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1676

1677

Returns:

1678

An object of the form:

1679

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1680

{ # Response to a request to list Cloud Dataflow jobs. This may be a partial

1681

# response, depending on the page size in the ListJobsRequest.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1682

"nextPageToken": "A String", # Set if there may be more results than fit in this response.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1683

"jobs": [ # A subset of the requested job information.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1684

{ # Defines a job to be run by the Cloud Dataflow service.

1685

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1686

# If this field is set, the service will ensure its uniqueness.

1687

# The request to create a job will fail if the service has knowledge of a

1688

# previously submitted job with the same client's ID and job name.

1689

# The caller may use this field to ensure idempotence of job

1690

# creation across retried attempts to create a job.

1691

# By default, the field is empty and, in that case, the service ignores it.

1692

"requestedState": "A String", # The job's requested state.

1693

#

1694

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1695

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1696

# also be used to directly set a job's requested state to

1697

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1698

# job if it has not already reached a terminal state.

1699

"name": "A String", # The user-specified Cloud Dataflow job name.

1700

#

1701

# Only one Job with a given name may exist in a project at any

1702

# given time. If a caller attempts to create a Job with the same

1703

# name as an already-existing Job, the attempt returns the

1704

# existing Job.

1705

#

1706

# The name must match the regular expression

1707

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1708

"currentStateTime": "A String", # The timestamp associated with the current state.

1709

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1710

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1711

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1712

"labels": { # User-defined labels for this job.

1713

#

1714

# The labels map can contain no more than 64 entries. Entries of the labels

1715

# map are UTF8 strings that comply with the following restrictions:

1716

#

1717

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1718

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1719

# * Both keys and values are additionally constrained to be <= 128 bytes in

1720

# size.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1721

"a_key": "A String",

1722

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1723

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1724

# corresponding name prefixes of the new job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1725

"a_key": "A String",

1726

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1727

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1728

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1729

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1730

"version": { # A structure describing which components and their versions of the service

1731

# are required in order to run the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1732

"a_key": "", # Properties of the object.

1733

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1734

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1735

# storage. The system will append the suffix "/temp-{JOBNAME} to

1736

# this resource prefix, where {JOBNAME} is the value of the

1737

# job_name field. The resulting bucket and object prefix is used

1738

# as the prefix of the resources used to store temporary data

1739

# needed during the job execution. NOTE: This will override the

1740

# value in taskrunner_settings.

1741

# The supported resource type is:

1742

#

1743

# Google Cloud Storage:

1744

#

1745

# storage.googleapis.com/{bucket}/{object}

1746

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1747

"internalExperiments": { # Experimental settings.

1748

"a_key": "", # Properties of the object. Contains field @type with type URL.

1749

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1750

"dataset": "A String", # The dataset for the current project where various workflow

1751

# related tables are stored.

1752

#

1753

# The supported resource type is:

1754

#

1755

# Google BigQuery:

1756

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1757

"experiments": [ # The list of experiments to enable.

1758

"A String",

1759

],

1760

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1761

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1762

# options are passed through the service and are used to recreate the

1763

# SDK pipeline options on the worker in a language agnostic and platform

1764

# independent way.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1765

"a_key": "", # Properties of the object.

1766

},

1767

"userAgent": { # A description of the process that generated the request.

1768

"a_key": "", # Properties of the object.

1769

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1770

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1771

# unspecified, the service will attempt to choose a reasonable

1772

# default. This should be in the form of the API service name,

1773

# e.g. "compute.googleapis.com".

1774

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1775

# specified in order for the job to have workers.

1776

{ # Describes one particular pool of Cloud Dataflow workers to be

1777

# instantiated by the Cloud Dataflow service in order to perform the

1778

# computations required by a job. Note that a workflow job may use

1779

# multiple pools, in order to match the various computational

1780

# requirements of the various stages of the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1781

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1782

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1783

# using the standard Dataflow task runner. Users should ignore

1784

# this field.

1785

"workflowFileName": "A String", # The file to store the workflow in.

1786

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1787

# will not be uploaded.

1788

#

1789

# The supported resource type is:

1790

#

1791

# Google Cloud Storage:

1792

# storage.googleapis.com/{bucket}/{object}

1793

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1794

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1795

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1796

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1797

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1798

"vmId": "A String", # The ID string of the VM.

1799

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1800

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1801

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1802

# access the Cloud Dataflow API.

1803

"A String",

1804

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1805

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1806

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1807

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1808

# "shuffle/v1beta1".

1809

"workerId": "A String", # The ID of the worker running this pipeline.

1810

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1811

#

1812

# When workers access Google Cloud APIs, they logically do so via

1813

# relative URLs. If this field is specified, it supplies the base

1814

# URL to use for resolving these relative URLs. The normative

1815

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1816

# Locators".

1817

#

1818

# If not specified, the default value is "http://www.googleapis.com/"

1819

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1820

# "dataflow/v1b3/projects".

1821

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1822

# storage.

1823

#

1824

# The supported resource type is:

1825

#

1826

# Google Cloud Storage:

1827

#

1828

# storage.googleapis.com/{bucket}/{object}

1829

# bucket.storage.googleapis.com/{object}

1830

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1831

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1832

# taskrunner; e.g. "wheel".

1833

"languageHint": "A String", # The suggested backend language.

1834

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1835

# console.

1836

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1837

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1838

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1839

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1840

#

1841

# When workers access Google Cloud APIs, they logically do so via

1842

# relative URLs. If this field is specified, it supplies the base

1843

# URL to use for resolving these relative URLs. The normative

1844

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1845

# Locators".

1846

#

1847

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1848

"harnessCommand": "A String", # The command to launch the worker harness.

1849

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1850

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1851

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1852

# The supported resource type is:

1853

#

1854

# Google Cloud Storage:

1855

# storage.googleapis.com/{bucket}/{object}

1856

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1857

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1858

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1859

# are supported.

1860

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1861

# service will attempt to choose a reasonable default.

1862

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1863

# the service will use the network "default".

1864

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1865

# will attempt to choose a reasonable default.

1866

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1867

# attempt to choose a reasonable default.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1868

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1869

{ # Describes the data disk used by a workflow job.

1870

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1871

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1872

# attempt to choose a reasonable default.

1873

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1874

# must be a disk type appropriate to the project and zone in which

1875

# the workers will run. If unknown or unspecified, the service

1876

# will attempt to choose a reasonable default.

1877

#

1878

# For example, the standard persistent disk type is a resource name

1879

# typically ending in "pd-standard". If SSD persistent disks are

1880

# available, the resource name typically ends with "pd-ssd". The

1881

# actual valid values are defined the Google Compute Engine API,

1882

# not by the Cloud Dataflow API; consult the Google Compute Engine

1883

# documentation for more information about determining the set of

1884

# available disk types for a particular project and zone.

1885

#

1886

# Google Compute Engine Disk types are local to a particular

1887

# project in a particular zone, and so the resource name will

1888

# typically look something like this:

1889

#

1890

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1891

},

1892

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1893

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1894

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1895

# `TEARDOWN_NEVER`.

1896

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1897

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1898

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1899

# down.

1900

#

1901

# If the workers are not torn down by the service, they will

1902

# continue to run and use Google Compute Engine VM resources in the

1903

# user's project until they are explicitly terminated by the user.

1904

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1905

# policy except for small, manually supervised test jobs.

1906

#

1907

# If unknown or unspecified, the service will attempt to choose a reasonable

1908

# default.

1909

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1910

# Compute Engine API.

1911

"ipConfiguration": "A String", # Configuration for VM IPs.

1912

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1913

# service will choose a number of threads (according to the number of cores

1914

# on the selected machine type for batch, or 1 by convention for streaming).

1915

"poolArgs": { # Extra arguments for this worker pool.

1916

"a_key": "", # Properties of the object. Contains field @type with type URL.

1917

},

1918

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1919

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1920

# attempt to choose a reasonable default.

1921

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1922

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1923

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1924

# the form "regions/REGION/subnetworks/SUBNETWORK".

1925

"packages": [ # Packages to be installed on workers.

1926

{ # The packages that must be installed in order for a worker to run the

1927

# steps of the Cloud Dataflow job that will be assigned to its worker

1928

# pool.

1929

#

1930

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1931

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1932

# might use this to install jars containing the user's code and all of the

1933

# various dependencies (libraries, data files, etc.) required in order

1934

# for that code to run.

1935

"location": "A String", # The resource to read the package from. The supported resource type is:

1936

#

1937

# Google Cloud Storage:

1938

#

1939

# storage.googleapis.com/{bucket}

1940

# bucket.storage.googleapis.com/

1941

"name": "A String", # The name of the package.

1942

},

1943

],

1944

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1945

"algorithm": "A String", # The algorithm to use for autoscaling.

1946

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1947

},

1948

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1949

# select a default set of packages which are useful to worker

1950

# harnesses written in a particular language.

1951

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1952

# attempt to choose a reasonable default.

1953

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1954

"a_key": "A String",

1955

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1956

},

1957

],

1958

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1959

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1960

# of the job it replaced.

1961

#

1962

# When sending a `CreateJobRequest`, you can update a job by specifying it

1963

# here. The job named here is stopped, and its intermediate state is

1964

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1965

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1966

# A description of the user pipeline and stages through which it is executed.

1967

# Created by Cloud Dataflow service. Only retrieved with

1968

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1969

# form. This data is provided by the Dataflow service for ease of visualizing

1970

# the pipeline and interpretting Dataflow provided metrics.

1971

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1972

{ # Description of the type, names/ids, and input/outputs for a transform.

1973

"kind": "A String", # Type of transform.

1974

"name": "A String", # User provided name for this transform instance.

1975

"inputCollectionName": [ # User names for all collection inputs to this transform.

1976

"A String",

1977

],

1978

"displayData": [ # Transform-specific display data.

1979

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1980

"shortStrValue": "A String", # A possible additional shorter value to display.

1981

# For example a java_class_name_value of com.mypackage.MyDoFn

1982

# will be stored with MyDoFn as the short_str_value and

1983

# com.mypackage.MyDoFn as the java_class_name value.

1984

# short_str_value can be displayed and java_class_name_value

1985

# will be displayed as a tooltip.

1986

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1987

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1988

"url": "A String", # An optional full URL.

1989

"floatValue": 3.14, # Contains value if the data is of float type.

1990

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1991

# language namespace (i.e. python module) which defines the display data.

1992

# This allows a dax monitoring system to specially handle the data

1993

# and perform custom rendering.

1994

"javaClassValue": "A String", # Contains value if the data is of java class type.

1995

"label": "A String", # An optional label to display in a dax UI for the element.

1996

"boolValue": True or False, # Contains value if the data is of a boolean type.

1997

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1998

"key": "A String", # The key identifying the display data.

1999

# This is intended to be used as a label for the display data

2000

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2001

"int64Value": "A String", # Contains value if the data is of int64 type.

2002

},

2003

],

2004

"outputCollectionName": [ # User names for all collection outputs to this transform.

2005

"A String",

2006

],

2007

"id": "A String", # SDK generated id of this transform instance.

2008

},

2009

],

2010

"displayData": [ # Pipeline level display data.

2011

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2012

"shortStrValue": "A String", # A possible additional shorter value to display.

2013

# For example a java_class_name_value of com.mypackage.MyDoFn

2014

# will be stored with MyDoFn as the short_str_value and

2015

# com.mypackage.MyDoFn as the java_class_name value.

2016

# short_str_value can be displayed and java_class_name_value

2017

# will be displayed as a tooltip.

2018

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2019

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2020

"url": "A String", # An optional full URL.

2021

"floatValue": 3.14, # Contains value if the data is of float type.

2022

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2023

# language namespace (i.e. python module) which defines the display data.

2024

# This allows a dax monitoring system to specially handle the data

2025

# and perform custom rendering.

2026

"javaClassValue": "A String", # Contains value if the data is of java class type.

2027

"label": "A String", # An optional label to display in a dax UI for the element.

2028

"boolValue": True or False, # Contains value if the data is of a boolean type.

2029

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2030

"key": "A String", # The key identifying the display data.

2031

# This is intended to be used as a label for the display data

2032

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2033

"int64Value": "A String", # Contains value if the data is of int64 type.

2034

},

2035

],

2036

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2037

{ # Description of the composing transforms, names/ids, and input/outputs of a

2038

# stage of execution. Some composing transforms and sources may have been

2039

# generated by the Dataflow service during execution planning.

2040

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2041

{ # Description of an interstitial value between transforms in an execution

2042

# stage.

2043

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2044

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2045

# source is most closely associated.

2046

"name": "A String", # Dataflow service generated name for this source.

2047

},

2048

],

2049

"kind": "A String", # Type of tranform this stage is executing.

2050

"name": "A String", # Dataflow service generated name for this stage.

2051

"outputSource": [ # Output sources for this stage.

2052

{ # Description of an input or output of an execution stage.

2053

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2054

"sizeBytes": "A String", # Size of the source, if measurable.

2055

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2056

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2057

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2058

},

2059

],

2060

"inputSource": [ # Input sources for this stage.

2061

{ # Description of an input or output of an execution stage.

2062

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2063

"sizeBytes": "A String", # Size of the source, if measurable.

2064

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2065

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2066

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2067

},

2068

],

2069

"componentTransform": [ # Transforms that comprise this execution stage.

2070

{ # Description of a transform executed as part of an execution stage.

2071

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2072

"originalTransform": "A String", # User name for the original user transform with which this transform is

2073

# most closely associated.

2074

"name": "A String", # Dataflow service generated name for this source.

2075

},

2076

],

2077

"id": "A String", # Dataflow service generated id for this stage.

2078

},

2079

],

2080

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2081

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2082

{ # Defines a particular step within a Cloud Dataflow job.

2083

#

2084

# A job consists of multiple steps, each of which performs some

2085

# specific operation as part of the overall job. Data is typically

2086

# passed from one step to another as part of the job.

2087

#

2088

# Here's an example of a sequence of steps which together implement a

2089

# Map-Reduce job:

2090

#

2091

# * Read a collection of data from some source, parsing the

2092

# collection's elements.

2093

#

2094

# * Validate the elements.

2095

#

2096

# * Apply a user-defined function to map each element to some value

2097

# and extract an element-specific key value.

2098

#

2099

# * Group elements with the same key into a single element with

2100

# that key, transforming a multiply-keyed collection into a

2101

# uniquely-keyed collection.

2102

#

2103

# * Write the elements out to some data sink.

2104

#

2105

# Note that the Cloud Dataflow service may be used to run many different

2106

# types of jobs, not just Map-Reduce.

2107

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2108

"name": "A String", # The name that identifies the step. This must be unique for each

2109

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2110

"properties": { # Named properties associated with the step. Each kind of

2111

# predefined step has its own required set of properties.

2112

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2113

"a_key": "", # Properties of the object.

2114

},

2115

},

2116

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2117

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2118

"tempFiles": [ # A set of files the system should be aware of that are used

2119

# for temporary storage. These temporary files will be

2120

# removed on job completion.

2121

# No duplicates are allowed.

2122

# No file patterns are supported.

2123

#

2124

# The supported files are:

2125

#

2126

# Google Cloud Storage:

2127

#

2128

# storage.googleapis.com/{bucket}/{object}

2129

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2130

"A String",

2131

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2132

"type": "A String", # The type of Cloud Dataflow job.

2133

"id": "A String", # The unique ID of this job.

2134

#

2135

# This field is set by the Cloud Dataflow service when the Job is

2136

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2137

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2138

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2139

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2140

# specified.

2141

#

2142

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2143

# terminal state. After a job has reached a terminal state, no

2144

# further state updates may be made.

2145

#

2146

# This field may be mutated by the Cloud Dataflow service;

2147

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2148

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2149

# isn't contained in the submitted job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2150

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2151

"a_key": { # Contains information about how a particular

2152

# google.dataflow.v1beta3.Step will be executed.

2153

"stepName": [ # The steps associated with the execution stage.

2154

# Note that stages may have several steps, and that a given step

2155

# might be run by more than one stage.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

},

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2163

"failedLocation": [ # Zero or more messages describing locations that failed to respond.

2164

{ # Indicates which location failed to respond to a request for data.

2165

"name": "A String", # The name of the failed location.

2166

},

2167

],

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

}</pre>

</div>

<code class="details" id="list_next">list_next(previous_request, previous_response)</code>

2173

<pre>Retrieves the next page of results.

2174

2175

Args:

2176

previous_request: The request for the previous page. (required)

2177

previous_response: The response from the request for the previous page. (required)

2178

2179

Returns:

2180

A request object that you can call 'execute()' on to request the next

2181

page. Returns None if there are no more items in the collection.

</pre>

</div>

<code class="details" id="update">update(projectId, location, jobId, body, x__xgafv=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2187

<pre>Updates the state of an existing Cloud Dataflow job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2188

2189

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2190

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

2191

location: string, The location that contains this job. (required)

2192

jobId: string, The job ID. (required)

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2193

body: object, The request body. (required)

2194

The object takes the form of:

2195

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2196

{ # Defines a job to be run by the Cloud Dataflow service.

2197

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2198

# If this field is set, the service will ensure its uniqueness.

2199

# The request to create a job will fail if the service has knowledge of a

2200

# previously submitted job with the same client's ID and job name.

2201

# The caller may use this field to ensure idempotence of job

2202

# creation across retried attempts to create a job.

2203

# By default, the field is empty and, in that case, the service ignores it.

2204

"requestedState": "A String", # The job's requested state.

2205

#

2206

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2207

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2208

# also be used to directly set a job's requested state to

2209

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2210

# job if it has not already reached a terminal state.

2211

"name": "A String", # The user-specified Cloud Dataflow job name.

2212

#

2213

# Only one Job with a given name may exist in a project at any

2214

# given time. If a caller attempts to create a Job with the same

2215

# name as an already-existing Job, the attempt returns the

2216

# existing Job.

2217

#

2218

# The name must match the regular expression

2219

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

2220

"currentStateTime": "A String", # The timestamp associated with the current state.

2221

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2222

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2223

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

2224

"labels": { # User-defined labels for this job.

2225

#

2226

# The labels map can contain no more than 64 entries. Entries of the labels

2227

# map are UTF8 strings that comply with the following restrictions:

2228

#

2229

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2230

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2231

# * Both keys and values are additionally constrained to be <= 128 bytes in

2232

# size.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2233

"a_key": "A String",

2234

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2235

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2236

# corresponding name prefixes of the new job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2237

"a_key": "A String",

2238

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2239

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2240

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2241

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2242

"version": { # A structure describing which components and their versions of the service

2243

# are required in order to run the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2244

"a_key": "", # Properties of the object.

2245

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2246

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2247

# storage. The system will append the suffix "/temp-{JOBNAME} to

2248

# this resource prefix, where {JOBNAME} is the value of the

2249

# job_name field. The resulting bucket and object prefix is used

2250

# as the prefix of the resources used to store temporary data

2251

# needed during the job execution. NOTE: This will override the

2252

# value in taskrunner_settings.

2253

# The supported resource type is:

2254

#

2255

# Google Cloud Storage:

2256

#

2257

# storage.googleapis.com/{bucket}/{object}

2258

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2259

"internalExperiments": { # Experimental settings.

2260

"a_key": "", # Properties of the object. Contains field @type with type URL.

2261

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2262

"dataset": "A String", # The dataset for the current project where various workflow

2263

# related tables are stored.

2264

#

2265

# The supported resource type is:

2266

#

2267

# Google BigQuery:

2268

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2269

"experiments": [ # The list of experiments to enable.

2270

"A String",

2271

],

2272

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2273

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2274

# options are passed through the service and are used to recreate the

2275

# SDK pipeline options on the worker in a language agnostic and platform

2276

# independent way.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2277

"a_key": "", # Properties of the object.

2278

},

2279

"userAgent": { # A description of the process that generated the request.

2280

"a_key": "", # Properties of the object.

2281

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2282

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2283

# unspecified, the service will attempt to choose a reasonable

2284

# default. This should be in the form of the API service name,

2285

# e.g. "compute.googleapis.com".

2286

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2287

# specified in order for the job to have workers.

2288

{ # Describes one particular pool of Cloud Dataflow workers to be

2289

# instantiated by the Cloud Dataflow service in order to perform the

2290

# computations required by a job. Note that a workflow job may use

2291

# multiple pools, in order to match the various computational

2292

# requirements of the various stages of the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2293

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2294

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2295

# using the standard Dataflow task runner. Users should ignore

2296

# this field.

2297

"workflowFileName": "A String", # The file to store the workflow in.

2298

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2299

# will not be uploaded.

2300

#

2301

# The supported resource type is:

2302

#

2303

# Google Cloud Storage:

2304

# storage.googleapis.com/{bucket}/{object}

2305

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2306

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2307

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2308

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

2309

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

2310

"vmId": "A String", # The ID string of the VM.

2311

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2312

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2313

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2314

# access the Cloud Dataflow API.

2315

"A String",

2316

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2317

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2318

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2319

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2320

# "shuffle/v1beta1".

2321

"workerId": "A String", # The ID of the worker running this pipeline.

2322

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2323

#

2324

# When workers access Google Cloud APIs, they logically do so via

2325

# relative URLs. If this field is specified, it supplies the base

2326

# URL to use for resolving these relative URLs. The normative

2327

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2328

# Locators".

2329

#

2330

# If not specified, the default value is "http://www.googleapis.com/"

2331

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2332

# "dataflow/v1b3/projects".

2333

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2334

# storage.

2335

#

2336

# The supported resource type is:

2337

#

2338

# Google Cloud Storage:

2339

#

2340

# storage.googleapis.com/{bucket}/{object}

2341

# bucket.storage.googleapis.com/{object}

2342

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2343

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2344

# taskrunner; e.g. "wheel".

2345

"languageHint": "A String", # The suggested backend language.

2346

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2347

# console.

2348

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2349

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2350

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2351

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2352

#

2353

# When workers access Google Cloud APIs, they logically do so via

2354

# relative URLs. If this field is specified, it supplies the base

2355

# URL to use for resolving these relative URLs. The normative

2356

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2357

# Locators".

2358

#

2359

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2360

"harnessCommand": "A String", # The command to launch the worker harness.

2361

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2362

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2363

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2364

# The supported resource type is:

2365

#

2366

# Google Cloud Storage:

2367

# storage.googleapis.com/{bucket}/{object}

2368

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2369

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2370

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2371

# are supported.

2372

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2373

# service will attempt to choose a reasonable default.

2374

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2375

# the service will use the network "default".

2376

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2377

# will attempt to choose a reasonable default.

2378

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2379

# attempt to choose a reasonable default.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2380

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2381

{ # Describes the data disk used by a workflow job.

2382

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2383

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2384

# attempt to choose a reasonable default.

2385

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2386

# must be a disk type appropriate to the project and zone in which

2387

# the workers will run. If unknown or unspecified, the service

2388

# will attempt to choose a reasonable default.

2389

#

2390

# For example, the standard persistent disk type is a resource name

2391

# typically ending in "pd-standard". If SSD persistent disks are

2392

# available, the resource name typically ends with "pd-ssd". The

2393

# actual valid values are defined the Google Compute Engine API,

2394

# not by the Cloud Dataflow API; consult the Google Compute Engine

2395

# documentation for more information about determining the set of

2396

# available disk types for a particular project and zone.

2397

#

2398

# Google Compute Engine Disk types are local to a particular

2399

# project in a particular zone, and so the resource name will

2400

# typically look something like this:

2401

#

2402

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2403

},

2404

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2405

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2406

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2407

# `TEARDOWN_NEVER`.

2408

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2409

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2410

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2411

# down.

2412

#

2413

# If the workers are not torn down by the service, they will

2414

# continue to run and use Google Compute Engine VM resources in the

2415

# user's project until they are explicitly terminated by the user.

2416

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2417

# policy except for small, manually supervised test jobs.

2418

#

2419

# If unknown or unspecified, the service will attempt to choose a reasonable

2420

# default.

2421

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2422

# Compute Engine API.

2423

"ipConfiguration": "A String", # Configuration for VM IPs.

2424

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2425

# service will choose a number of threads (according to the number of cores

2426

# on the selected machine type for batch, or 1 by convention for streaming).

2427

"poolArgs": { # Extra arguments for this worker pool.

2428

"a_key": "", # Properties of the object. Contains field @type with type URL.

2429

},

2430

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2431

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2432

# attempt to choose a reasonable default.

2433

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2434

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2435

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2436

# the form "regions/REGION/subnetworks/SUBNETWORK".

2437

"packages": [ # Packages to be installed on workers.

2438

{ # The packages that must be installed in order for a worker to run the

2439

# steps of the Cloud Dataflow job that will be assigned to its worker

2440

# pool.

2441

#

2442

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2443

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2444

# might use this to install jars containing the user's code and all of the

2445

# various dependencies (libraries, data files, etc.) required in order

2446

# for that code to run.

2447

"location": "A String", # The resource to read the package from. The supported resource type is:

2448

#

2449

# Google Cloud Storage:

2450

#

2451

# storage.googleapis.com/{bucket}

2452

# bucket.storage.googleapis.com/

2453

"name": "A String", # The name of the package.

2454

},

2455

],

2456

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

2457

"algorithm": "A String", # The algorithm to use for autoscaling.

2458

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

2459

},

2460

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

2461

# select a default set of packages which are useful to worker

2462

# harnesses written in a particular language.

2463

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2464

# attempt to choose a reasonable default.

2465

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2466

"a_key": "A String",

2467

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2468

},

2469

],

2470

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2471

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2472

# of the job it replaced.

2473

#

2474

# When sending a `CreateJobRequest`, you can update a job by specifying it

2475

# here. The job named here is stopped, and its intermediate state is

2476

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2477

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2478

# A description of the user pipeline and stages through which it is executed.

2479

# Created by Cloud Dataflow service. Only retrieved with

2480

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2481

# form. This data is provided by the Dataflow service for ease of visualizing

2482

# the pipeline and interpretting Dataflow provided metrics.

2483

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2484

{ # Description of the type, names/ids, and input/outputs for a transform.

2485

"kind": "A String", # Type of transform.

2486

"name": "A String", # User provided name for this transform instance.

2487

"inputCollectionName": [ # User names for all collection inputs to this transform.

2488

"A String",

2489

],

2490

"displayData": [ # Transform-specific display data.

2491

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2492

"shortStrValue": "A String", # A possible additional shorter value to display.

2493

# For example a java_class_name_value of com.mypackage.MyDoFn

2494

# will be stored with MyDoFn as the short_str_value and

2495

# com.mypackage.MyDoFn as the java_class_name value.

2496

# short_str_value can be displayed and java_class_name_value

2497

# will be displayed as a tooltip.

2498

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2499

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2500

"url": "A String", # An optional full URL.

2501

"floatValue": 3.14, # Contains value if the data is of float type.

2502

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2503

# language namespace (i.e. python module) which defines the display data.

2504

# This allows a dax monitoring system to specially handle the data

2505

# and perform custom rendering.

2506

"javaClassValue": "A String", # Contains value if the data is of java class type.

2507

"label": "A String", # An optional label to display in a dax UI for the element.

2508

"boolValue": True or False, # Contains value if the data is of a boolean type.

2509

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2510

"key": "A String", # The key identifying the display data.

2511

# This is intended to be used as a label for the display data

2512

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2513

"int64Value": "A String", # Contains value if the data is of int64 type.

2514

},

2515

],

2516

"outputCollectionName": [ # User names for all collection outputs to this transform.

2517

"A String",

2518

],

2519

"id": "A String", # SDK generated id of this transform instance.

2520

},

2521

],

2522

"displayData": [ # Pipeline level display data.

2523

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2524

"shortStrValue": "A String", # A possible additional shorter value to display.

2525

# For example a java_class_name_value of com.mypackage.MyDoFn

2526

# will be stored with MyDoFn as the short_str_value and

2527

# com.mypackage.MyDoFn as the java_class_name value.

2528

# short_str_value can be displayed and java_class_name_value

2529

# will be displayed as a tooltip.

2530

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2531

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2532

"url": "A String", # An optional full URL.

2533

"floatValue": 3.14, # Contains value if the data is of float type.

2534

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2535

# language namespace (i.e. python module) which defines the display data.

2536

# This allows a dax monitoring system to specially handle the data

2537

# and perform custom rendering.

2538

"javaClassValue": "A String", # Contains value if the data is of java class type.

2539

"label": "A String", # An optional label to display in a dax UI for the element.

2540

"boolValue": True or False, # Contains value if the data is of a boolean type.

2541

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2542

"key": "A String", # The key identifying the display data.

2543

# This is intended to be used as a label for the display data

2544

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2545

"int64Value": "A String", # Contains value if the data is of int64 type.

2546

},

2547

],

2548

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2549

{ # Description of the composing transforms, names/ids, and input/outputs of a

2550

# stage of execution. Some composing transforms and sources may have been

2551

# generated by the Dataflow service during execution planning.

2552

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2553

{ # Description of an interstitial value between transforms in an execution

2554

# stage.

2555

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2556

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2557

# source is most closely associated.

2558

"name": "A String", # Dataflow service generated name for this source.

2559

},

2560

],

2561

"kind": "A String", # Type of tranform this stage is executing.

2562

"name": "A String", # Dataflow service generated name for this stage.

2563

"outputSource": [ # Output sources for this stage.

2564

{ # Description of an input or output of an execution stage.

2565

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2566

"sizeBytes": "A String", # Size of the source, if measurable.

2567

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2568

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2569

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2570

},

2571

],

2572

"inputSource": [ # Input sources for this stage.

2573

{ # Description of an input or output of an execution stage.

2574

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2575

"sizeBytes": "A String", # Size of the source, if measurable.

2576

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2577

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2578

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2579

},

2580

],

2581

"componentTransform": [ # Transforms that comprise this execution stage.

2582

{ # Description of a transform executed as part of an execution stage.

2583

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2584

"originalTransform": "A String", # User name for the original user transform with which this transform is

2585

# most closely associated.

2586

"name": "A String", # Dataflow service generated name for this source.

2587

},

2588

],

2589

"id": "A String", # Dataflow service generated id for this stage.

2590

},

2591

],

2592

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2593

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2594

{ # Defines a particular step within a Cloud Dataflow job.

2595

#

2596

# A job consists of multiple steps, each of which performs some

2597

# specific operation as part of the overall job. Data is typically

2598

# passed from one step to another as part of the job.

2599

#

2600

# Here's an example of a sequence of steps which together implement a

2601

# Map-Reduce job:

2602

#

2603

# * Read a collection of data from some source, parsing the

2604

# collection's elements.

2605

#

2606

# * Validate the elements.

2607

#

2608

# * Apply a user-defined function to map each element to some value

2609

# and extract an element-specific key value.

2610

#

2611

# * Group elements with the same key into a single element with

2612

# that key, transforming a multiply-keyed collection into a

2613

# uniquely-keyed collection.

2614

#

2615

# * Write the elements out to some data sink.

2616

#

2617

# Note that the Cloud Dataflow service may be used to run many different

2618

# types of jobs, not just Map-Reduce.

2619

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2620

"name": "A String", # The name that identifies the step. This must be unique for each

2621

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2622

"properties": { # Named properties associated with the step. Each kind of

2623

# predefined step has its own required set of properties.

2624

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2625

"a_key": "", # Properties of the object.

2626

},

2627

},

2628

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2629

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2630

"tempFiles": [ # A set of files the system should be aware of that are used

2631

# for temporary storage. These temporary files will be

2632

# removed on job completion.

2633

# No duplicates are allowed.

2634

# No file patterns are supported.

2635

#

2636

# The supported files are:

2637

#

2638

# Google Cloud Storage:

2639

#

2640

# storage.googleapis.com/{bucket}/{object}

2641

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2642

"A String",

2643

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2644

"type": "A String", # The type of Cloud Dataflow job.

2645

"id": "A String", # The unique ID of this job.

2646

#

2647

# This field is set by the Cloud Dataflow service when the Job is

2648

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2649

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2650

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2651

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2652

# specified.

2653

#

2654

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2655

# terminal state. After a job has reached a terminal state, no

2656

# further state updates may be made.

2657

#

2658

# This field may be mutated by the Cloud Dataflow service;

2659

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2660

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2661

# isn't contained in the submitted job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2662

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2663

"a_key": { # Contains information about how a particular

2664

# google.dataflow.v1beta3.Step will be executed.

2665

"stepName": [ # The steps associated with the execution stage.

2666

# Note that stages may have several steps, and that a given step

2667

# might be run by more than one stage.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

"A String",

],

},

},

},

}

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2676

Allowed values

2677

1 - v1 error format

2678

2 - v2 error format

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2679

2680

Returns:

2681

An object of the form:

2682

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2683

{ # Defines a job to be run by the Cloud Dataflow service.

2684

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2685

# If this field is set, the service will ensure its uniqueness.

2686

# The request to create a job will fail if the service has knowledge of a

2687

# previously submitted job with the same client's ID and job name.

2688

# The caller may use this field to ensure idempotence of job

2689

# creation across retried attempts to create a job.

2690

# By default, the field is empty and, in that case, the service ignores it.

2691

"requestedState": "A String", # The job's requested state.

2692

#

2693

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2694

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2695

# also be used to directly set a job's requested state to

2696

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2697

# job if it has not already reached a terminal state.

2698

"name": "A String", # The user-specified Cloud Dataflow job name.

2699

#

2700

# Only one Job with a given name may exist in a project at any

2701

# given time. If a caller attempts to create a Job with the same

2702

# name as an already-existing Job, the attempt returns the

2703

# existing Job.

2704

#

2705

# The name must match the regular expression

2706

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

2707

"currentStateTime": "A String", # The timestamp associated with the current state.

2708

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2709

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2710

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

2711

"labels": { # User-defined labels for this job.

2712

#

2713

# The labels map can contain no more than 64 entries. Entries of the labels

2714

# map are UTF8 strings that comply with the following restrictions:

2715

#

2716

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2717

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2718

# * Both keys and values are additionally constrained to be <= 128 bytes in

2719

# size.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2720

"a_key": "A String",

2721

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2722

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2723

# corresponding name prefixes of the new job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2724

"a_key": "A String",

2725

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2726

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2727

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2728

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2729

"version": { # A structure describing which components and their versions of the service

2730

# are required in order to run the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2731

"a_key": "", # Properties of the object.

2732

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2733

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2734

# storage. The system will append the suffix "/temp-{JOBNAME} to

2735

# this resource prefix, where {JOBNAME} is the value of the

2736

# job_name field. The resulting bucket and object prefix is used

2737

# as the prefix of the resources used to store temporary data

2738

# needed during the job execution. NOTE: This will override the

2739

# value in taskrunner_settings.

2740

# The supported resource type is:

2741

#

2742

# Google Cloud Storage:

2743

#

2744

# storage.googleapis.com/{bucket}/{object}

2745

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2746

"internalExperiments": { # Experimental settings.

2747

"a_key": "", # Properties of the object. Contains field @type with type URL.

2748

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2749

"dataset": "A String", # The dataset for the current project where various workflow

2750

# related tables are stored.

2751

#

2752

# The supported resource type is:

2753

#

2754

# Google BigQuery:

2755

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2756

"experiments": [ # The list of experiments to enable.

2757

"A String",

2758

],

2759

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2760

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2761

# options are passed through the service and are used to recreate the

2762

# SDK pipeline options on the worker in a language agnostic and platform

2763

# independent way.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2764

"a_key": "", # Properties of the object.

2765

},

2766

"userAgent": { # A description of the process that generated the request.

2767

"a_key": "", # Properties of the object.

2768

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2769

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2770

# unspecified, the service will attempt to choose a reasonable

2771

# default. This should be in the form of the API service name,

2772

# e.g. "compute.googleapis.com".

2773

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2774

# specified in order for the job to have workers.

2775

{ # Describes one particular pool of Cloud Dataflow workers to be

2776

# instantiated by the Cloud Dataflow service in order to perform the

2777

# computations required by a job. Note that a workflow job may use

2778

# multiple pools, in order to match the various computational

2779

# requirements of the various stages of the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2780

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2781

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2782

# using the standard Dataflow task runner. Users should ignore

2783

# this field.

2784

"workflowFileName": "A String", # The file to store the workflow in.

2785

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2786

# will not be uploaded.

2787

#

2788

# The supported resource type is:

2789

#

2790

# Google Cloud Storage:

2791

# storage.googleapis.com/{bucket}/{object}

2792

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2793

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2794

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2795

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

2796

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

2797

"vmId": "A String", # The ID string of the VM.

2798

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2799

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2800

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2801

# access the Cloud Dataflow API.

2802

"A String",

2803

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2804

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2805

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2806

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2807

# "shuffle/v1beta1".

2808

"workerId": "A String", # The ID of the worker running this pipeline.

2809

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2810

#

2811

# When workers access Google Cloud APIs, they logically do so via

2812

# relative URLs. If this field is specified, it supplies the base

2813

# URL to use for resolving these relative URLs. The normative

2814

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2815

# Locators".

2816

#

2817

# If not specified, the default value is "http://www.googleapis.com/"

2818

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2819

# "dataflow/v1b3/projects".

2820

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2821

# storage.

2822

#

2823

# The supported resource type is:

2824

#

2825

# Google Cloud Storage:

2826

#

2827

# storage.googleapis.com/{bucket}/{object}

2828

# bucket.storage.googleapis.com/{object}

2829

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2830

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2831

# taskrunner; e.g. "wheel".

2832

"languageHint": "A String", # The suggested backend language.

2833

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2834

# console.

2835

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2836

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2837

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2838

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2839

#

2840

# When workers access Google Cloud APIs, they logically do so via

2841

# relative URLs. If this field is specified, it supplies the base

2842

# URL to use for resolving these relative URLs. The normative

2843

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2844

# Locators".

2845

#

2846

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2847

"harnessCommand": "A String", # The command to launch the worker harness.

2848

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2849

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2850

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2851

# The supported resource type is:

2852

#

2853

# Google Cloud Storage:

2854

# storage.googleapis.com/{bucket}/{object}

2855

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2856

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2857

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2858

# are supported.

2859

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2860

# service will attempt to choose a reasonable default.

2861

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2862

# the service will use the network "default".

2863

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2864

# will attempt to choose a reasonable default.

2865

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2866

# attempt to choose a reasonable default.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2867

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2868

{ # Describes the data disk used by a workflow job.

2869

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2870

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2871

# attempt to choose a reasonable default.

2872

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2873

# must be a disk type appropriate to the project and zone in which

2874

# the workers will run. If unknown or unspecified, the service

2875

# will attempt to choose a reasonable default.

2876

#

2877

# For example, the standard persistent disk type is a resource name

2878

# typically ending in "pd-standard". If SSD persistent disks are

2879

# available, the resource name typically ends with "pd-ssd". The

2880

# actual valid values are defined the Google Compute Engine API,

2881

# not by the Cloud Dataflow API; consult the Google Compute Engine

2882

# documentation for more information about determining the set of

2883

# available disk types for a particular project and zone.

2884

#

2885

# Google Compute Engine Disk types are local to a particular

2886

# project in a particular zone, and so the resource name will

2887

# typically look something like this:

2888

#

2889

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2890

},

2891

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2892

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2893

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2894

# `TEARDOWN_NEVER`.

2895

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2896

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2897

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2898

# down.

2899

#

2900

# If the workers are not torn down by the service, they will

2901

# continue to run and use Google Compute Engine VM resources in the

2902

# user's project until they are explicitly terminated by the user.

2903

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2904

# policy except for small, manually supervised test jobs.

2905

#

2906

# If unknown or unspecified, the service will attempt to choose a reasonable

2907

# default.

2908

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2909

# Compute Engine API.

2910

"ipConfiguration": "A String", # Configuration for VM IPs.

2911

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2912

# service will choose a number of threads (according to the number of cores

2913

# on the selected machine type for batch, or 1 by convention for streaming).

2914

"poolArgs": { # Extra arguments for this worker pool.

2915

"a_key": "", # Properties of the object. Contains field @type with type URL.

2916

},

2917

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2918

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2919

# attempt to choose a reasonable default.

2920

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2921

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2922

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2923

# the form "regions/REGION/subnetworks/SUBNETWORK".

2924

"packages": [ # Packages to be installed on workers.

2925

{ # The packages that must be installed in order for a worker to run the

2926

# steps of the Cloud Dataflow job that will be assigned to its worker

2927

# pool.

2928

#

2929

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2930

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2931

# might use this to install jars containing the user's code and all of the

2932

# various dependencies (libraries, data files, etc.) required in order

2933

# for that code to run.

2934

"location": "A String", # The resource to read the package from. The supported resource type is:

2935

#

2936

# Google Cloud Storage:

2937

#

2938

# storage.googleapis.com/{bucket}

2939

# bucket.storage.googleapis.com/

2940

"name": "A String", # The name of the package.

2941

},

2942

],

2943

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

2944

"algorithm": "A String", # The algorithm to use for autoscaling.

2945

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

2946

},

2947

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

2948

# select a default set of packages which are useful to worker

2949

# harnesses written in a particular language.

2950

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2951

# attempt to choose a reasonable default.

2952

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2953

"a_key": "A String",

2954

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2955

},

2956

],

2957

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2958

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2959

# of the job it replaced.

2960

#

2961

# When sending a `CreateJobRequest`, you can update a job by specifying it

2962

# here. The job named here is stopped, and its intermediate state is

2963

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2964

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2965

# A description of the user pipeline and stages through which it is executed.

2966

# Created by Cloud Dataflow service. Only retrieved with

2967

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2968

# form. This data is provided by the Dataflow service for ease of visualizing

2969

# the pipeline and interpretting Dataflow provided metrics.

2970

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2971

{ # Description of the type, names/ids, and input/outputs for a transform.

2972

"kind": "A String", # Type of transform.

2973

"name": "A String", # User provided name for this transform instance.

2974

"inputCollectionName": [ # User names for all collection inputs to this transform.

2975

"A String",

2976

],

2977

"displayData": [ # Transform-specific display data.

2978

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2979

"shortStrValue": "A String", # A possible additional shorter value to display.

2980

# For example a java_class_name_value of com.mypackage.MyDoFn

2981

# will be stored with MyDoFn as the short_str_value and

2982

# com.mypackage.MyDoFn as the java_class_name value.

2983

# short_str_value can be displayed and java_class_name_value

2984

# will be displayed as a tooltip.

2985

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2986

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2987

"url": "A String", # An optional full URL.

2988

"floatValue": 3.14, # Contains value if the data is of float type.

2989

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2990

# language namespace (i.e. python module) which defines the display data.

2991

# This allows a dax monitoring system to specially handle the data

2992

# and perform custom rendering.

2993

"javaClassValue": "A String", # Contains value if the data is of java class type.

2994

"label": "A String", # An optional label to display in a dax UI for the element.

2995

"boolValue": True or False, # Contains value if the data is of a boolean type.

2996

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

2997

"key": "A String", # The key identifying the display data.

2998

# This is intended to be used as a label for the display data

2999

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3000

"int64Value": "A String", # Contains value if the data is of int64 type.

3001

},

3002

],

3003

"outputCollectionName": [ # User names for all collection outputs to this transform.

3004

"A String",

3005

],

3006

"id": "A String", # SDK generated id of this transform instance.

3007

},

3008

],

3009

"displayData": [ # Pipeline level display data.

3010

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3011

"shortStrValue": "A String", # A possible additional shorter value to display.

3012

# For example a java_class_name_value of com.mypackage.MyDoFn

3013

# will be stored with MyDoFn as the short_str_value and

3014

# com.mypackage.MyDoFn as the java_class_name value.

3015

# short_str_value can be displayed and java_class_name_value

3016

# will be displayed as a tooltip.

3017

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3018

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3019

"url": "A String", # An optional full URL.

3020

"floatValue": 3.14, # Contains value if the data is of float type.

3021

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

3022

# language namespace (i.e. python module) which defines the display data.

3023

# This allows a dax monitoring system to specially handle the data

3024

# and perform custom rendering.

3025

"javaClassValue": "A String", # Contains value if the data is of java class type.

3026

"label": "A String", # An optional label to display in a dax UI for the element.

3027

"boolValue": True or False, # Contains value if the data is of a boolean type.

3028

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3029

"key": "A String", # The key identifying the display data.

3030

# This is intended to be used as a label for the display data

3031

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3032

"int64Value": "A String", # Contains value if the data is of int64 type.

3033

},

3034

],

3035

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

3036

{ # Description of the composing transforms, names/ids, and input/outputs of a

3037

# stage of execution. Some composing transforms and sources may have been

3038

# generated by the Dataflow service during execution planning.

3039

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

3040

{ # Description of an interstitial value between transforms in an execution

3041

# stage.

3042

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3043

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3044

# source is most closely associated.

3045

"name": "A String", # Dataflow service generated name for this source.

3046

},

3047

],

3048

"kind": "A String", # Type of tranform this stage is executing.

3049

"name": "A String", # Dataflow service generated name for this stage.

3050

"outputSource": [ # Output sources for this stage.

3051

{ # Description of an input or output of an execution stage.

3052

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3053

"sizeBytes": "A String", # Size of the source, if measurable.

3054

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3055

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3056

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3057

},

3058

],

3059

"inputSource": [ # Input sources for this stage.

3060

{ # Description of an input or output of an execution stage.

3061

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3062

"sizeBytes": "A String", # Size of the source, if measurable.

3063

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3064

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3065

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3066

},

3067

],

3068

"componentTransform": [ # Transforms that comprise this execution stage.

3069

{ # Description of a transform executed as part of an execution stage.

3070

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3071

"originalTransform": "A String", # User name for the original user transform with which this transform is

3072

# most closely associated.

3073

"name": "A String", # Dataflow service generated name for this source.

3074

},

3075

],

3076

"id": "A String", # Dataflow service generated id for this stage.

3077

},

3078

],

3079

},

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

3080

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3081

{ # Defines a particular step within a Cloud Dataflow job.

3082

#

3083

# A job consists of multiple steps, each of which performs some

3084

# specific operation as part of the overall job. Data is typically

3085

# passed from one step to another as part of the job.

3086

#

3087

# Here's an example of a sequence of steps which together implement a

3088

# Map-Reduce job:

3089

#

3090

# * Read a collection of data from some source, parsing the

3091

# collection's elements.

3092

#

3093

# * Validate the elements.

3094

#

3095

# * Apply a user-defined function to map each element to some value

3096

# and extract an element-specific key value.

3097

#

3098

# * Group elements with the same key into a single element with

3099

# that key, transforming a multiply-keyed collection into a

3100

# uniquely-keyed collection.

3101

#

3102

# * Write the elements out to some data sink.

3103

#

3104

# Note that the Cloud Dataflow service may be used to run many different

3105

# types of jobs, not just Map-Reduce.

3106

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3107

"name": "A String", # The name that identifies the step. This must be unique for each

3108

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3109

"properties": { # Named properties associated with the step. Each kind of

3110

# predefined step has its own required set of properties.

3111

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

3112

"a_key": "", # Properties of the object.

3113

},

3114

},

3115

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3116

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3117

"tempFiles": [ # A set of files the system should be aware of that are used

3118

# for temporary storage. These temporary files will be

3119

# removed on job completion.

3120

# No duplicates are allowed.

3121

# No file patterns are supported.

3122

#

3123

# The supported files are:

3124

#

3125

# Google Cloud Storage:

3126

#

3127

# storage.googleapis.com/{bucket}/{object}

3128

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

3129

"A String",

3130

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3131

"type": "A String", # The type of Cloud Dataflow job.

3132

"id": "A String", # The unique ID of this job.

3133

#

3134

# This field is set by the Cloud Dataflow service when the Job is

3135

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3136

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3137

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

3138

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

3139

# specified.

3140

#

3141

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

3142

# terminal state. After a job has reached a terminal state, no

3143

# further state updates may be made.

3144

#

3145

# This field may be mutated by the Cloud Dataflow service;

3146

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3147

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

3148

# isn't contained in the submitted job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

3149

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3150

"a_key": { # Contains information about how a particular

3151

# google.dataflow.v1beta3.Step will be executed.

3152

"stepName": [ # The steps associated with the execution stage.

3153

# Note that stages may have several steps, and that a given step

3154

# might be run by more than one stage.

Jon Wayne Parrott