Blame - docs/dyn/dataflow_v1b3.projects.jobs.html - platform/external/python/google-api-python-client

2016-08-16 12:44:29 -0700

[diff] [blame]

78

<code><a href="dataflow_v1b3.projects.jobs.debug.html">debug()</a></code>

79

</p>

80

<p class="firstline">Returns the debug Resource.</p>

81

82

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

83

<code><a href="dataflow_v1b3.projects.jobs.messages.html">messages()</a></code>

84

</p>

85

<p class="firstline">Returns the messages Resource.</p>

86

87

88

<code><a href="dataflow_v1b3.projects.jobs.workItems.html">workItems()</a></code>

89

</p>

90

<p class="firstline">Returns the workItems Resource.</p>

91

92

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

93

<code><a href="#create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

94

<p class="firstline">Creates a Cloud Dataflow job.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

95

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

96

<code><a href="#get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

97

<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

98

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

99

<code><a href="#getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</a></code></p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

100

<p class="firstline">Request the job status.</p>

101

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

102

<code><a href="#list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

103

<p class="firstline">List the jobs of a project.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

104

105

<code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>

106

<p class="firstline">Retrieves the next page of results.</p>

107

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

108

<code><a href="#update">update(projectId, jobId, body, location=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

109

<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

110

<h3>Method Details</h3>

111

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

112

<code class="details" id="create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

113

<pre>Creates a Cloud Dataflow job.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

114

115

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

116

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

117

body: object, The request body. (required)

118

The object takes the form of:

119

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

120

{ # Defines a job to be run by the Cloud Dataflow service.

121

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

122

# If this field is set, the service will ensure its uniqueness.

123

# The request to create a job will fail if the service has knowledge of a

124

# previously submitted job with the same client's ID and job name.

125

# The caller may use this field to ensure idempotence of job

126

# creation across retried attempts to create a job.

127

# By default, the field is empty and, in that case, the service ignores it.

128

"requestedState": "A String", # The job's requested state.

129

#

130

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

131

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

132

# also be used to directly set a job's requested state to

133

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

134

# job if it has not already reached a terminal state.

135

"name": "A String", # The user-specified Cloud Dataflow job name.

136

#

137

# Only one Job with a given name may exist in a project at any

138

# given time. If a caller attempts to create a Job with the same

139

# name as an already-existing Job, the attempt returns the

140

# existing Job.

141

#

142

# The name must match the regular expression

143

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

144

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

145

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

146

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

147

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

148

"currentState": "A String", # The current state of the job.

149

#

150

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

151

# specified.

152

#

153

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

154

# terminal state. After a job has reached a terminal state, no

155

# further state updates may be made.

156

#

157

# This field may be mutated by the Cloud Dataflow service;

158

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

159

"labels": { # User-defined labels for this job.

160

#

161

# The labels map can contain no more than 64 entries. Entries of the labels

162

# map are UTF8 strings that comply with the following restrictions:

163

#

164

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

165

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

166

# * Both keys and values are additionally constrained to be <= 128 bytes in

167

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

168

"a_key": "A String",

169

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

170

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

171

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

172

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

173

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

174

"id": "A String", # The unique ID of this job.

175

#

176

# This field is set by the Cloud Dataflow service when the Job is

177

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

178

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

179

"version": { # A structure describing which components and their versions of the service

180

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

181

"a_key": "", # Properties of the object.

182

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

183

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

184

# storage. The system will append the suffix "/temp-{JOBNAME} to

185

# this resource prefix, where {JOBNAME} is the value of the

186

# job_name field. The resulting bucket and object prefix is used

187

# as the prefix of the resources used to store temporary data

188

# needed during the job execution. NOTE: This will override the

189

# value in taskrunner_settings.

190

# The supported resource type is:

191

#

192

# Google Cloud Storage:

193

#

194

# storage.googleapis.com/{bucket}/{object}

195

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

196

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

197

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

198

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

199

"dataset": "A String", # The dataset for the current project where various workflow

200

# related tables are stored.

201

#

202

# The supported resource type is:

203

#

204

# Google BigQuery:

205

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

206

"experiments": [ # The list of experiments to enable.

207

"A String",

208

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

209

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

210

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

211

# options are passed through the service and are used to recreate the

212

# SDK pipeline options on the worker in a language agnostic and platform

213

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

214

"a_key": "", # Properties of the object.

215

},

216

"userAgent": { # A description of the process that generated the request.

217

"a_key": "", # Properties of the object.

218

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

219

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

220

# unspecified, the service will attempt to choose a reasonable

221

# default. This should be in the form of the API service name,

222

# e.g. "compute.googleapis.com".

223

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

224

# specified in order for the job to have workers.

225

{ # Describes one particular pool of Cloud Dataflow workers to be

226

# instantiated by the Cloud Dataflow service in order to perform the

227

# computations required by a job. Note that a workflow job may use

228

# multiple pools, in order to match the various computational

229

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

230

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

231

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

232

# using the standard Dataflow task runner. Users should ignore

233

# this field.

234

"workflowFileName": "A String", # The file to store the workflow in.

235

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

236

# will not be uploaded.

237

#

238

# The supported resource type is:

239

#

240

# Google Cloud Storage:

241

# storage.googleapis.com/{bucket}/{object}

242

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

243

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

244

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

245

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

246

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

247

# "shuffle/v1beta1".

248

"workerId": "A String", # The ID of the worker running this pipeline.

249

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

250

#

251

# When workers access Google Cloud APIs, they logically do so via

252

# relative URLs. If this field is specified, it supplies the base

253

# URL to use for resolving these relative URLs. The normative

254

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

255

# Locators".

256

#

257

# If not specified, the default value is "http://www.googleapis.com/"

258

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

259

# "dataflow/v1b3/projects".

260

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

261

# storage.

262

#

263

# The supported resource type is:

264

#

265

# Google Cloud Storage:

266

#

267

# storage.googleapis.com/{bucket}/{object}

268

# bucket.storage.googleapis.com/{object}

269

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

270

"vmId": "A String", # The ID string of the VM.

271

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

272

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

273

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

274

# access the Cloud Dataflow API.

275

"A String",

276

],

277

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

278

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

279

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

280

#

281

# When workers access Google Cloud APIs, they logically do so via

282

# relative URLs. If this field is specified, it supplies the base

283

# URL to use for resolving these relative URLs. The normative

284

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

285

# Locators".

286

#

287

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

288

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

289

# taskrunner; e.g. "wheel".

290

"languageHint": "A String", # The suggested backend language.

291

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

292

# console.

293

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

294

"logDir": "A String", # The directory on the VM to store logs.

295

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

296

"harnessCommand": "A String", # The command to launch the worker harness.

297

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

298

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

299

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

300

# The supported resource type is:

301

#

302

# Google Cloud Storage:

303

# storage.googleapis.com/{bucket}/{object}

304

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

305

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

306

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

307

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

308

# are supported.

309

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

310

# service will attempt to choose a reasonable default.

311

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

312

# the service will use the network "default".

313

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

314

# will attempt to choose a reasonable default.

315

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

316

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

317

"dataDisks": [ # Data disks that are used by a VM in this workflow.

318

{ # Describes the data disk used by a workflow job.

319

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

320

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

321

# attempt to choose a reasonable default.

322

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

323

# must be a disk type appropriate to the project and zone in which

324

# the workers will run. If unknown or unspecified, the service

325

# will attempt to choose a reasonable default.

326

#

327

# For example, the standard persistent disk type is a resource name

328

# typically ending in "pd-standard". If SSD persistent disks are

329

# available, the resource name typically ends with "pd-ssd". The

330

# actual valid values are defined the Google Compute Engine API,

331

# not by the Cloud Dataflow API; consult the Google Compute Engine

332

# documentation for more information about determining the set of

333

# available disk types for a particular project and zone.

334

#

335

# Google Compute Engine Disk types are local to a particular

336

# project in a particular zone, and so the resource name will

337

# typically look something like this:

338

#

339

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

340

},

341

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

342

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

343

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

344

# `TEARDOWN_NEVER`.

345

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

346

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

347

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

348

# down.

349

#

350

# If the workers are not torn down by the service, they will

351

# continue to run and use Google Compute Engine VM resources in the

352

# user's project until they are explicitly terminated by the user.

353

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

354

# policy except for small, manually supervised test jobs.

355

#

356

# If unknown or unspecified, the service will attempt to choose a reasonable

357

# default.

358

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

359

# Compute Engine API.

360

"ipConfiguration": "A String", # Configuration for VM IPs.

361

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

362

# service will choose a number of threads (according to the number of cores

363

# on the selected machine type for batch, or 1 by convention for streaming).

364

"poolArgs": { # Extra arguments for this worker pool.

365

"a_key": "", # Properties of the object. Contains field @type with type URL.

366

},

367

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

368

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

369

# attempt to choose a reasonable default.

370

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

371

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

372

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

373

# the form "regions/REGION/subnetworks/SUBNETWORK".

374

"packages": [ # Packages to be installed on workers.

375

{ # The packages that must be installed in order for a worker to run the

376

# steps of the Cloud Dataflow job that will be assigned to its worker

377

# pool.

378

#

379

# This is the mechanism by which the Cloud Dataflow SDK causes code to

380

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

381

# might use this to install jars containing the user's code and all of the

382

# various dependencies (libraries, data files, etc.) required in order

383

# for that code to run.

384

"location": "A String", # The resource to read the package from. The supported resource type is:

385

#

386

# Google Cloud Storage:

387

#

388

# storage.googleapis.com/{bucket}

389

# bucket.storage.googleapis.com/

390

"name": "A String", # The name of the package.

391

},

392

],

393

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

394

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

395

"algorithm": "A String", # The algorithm to use for autoscaling.

396

},

397

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

398

# select a default set of packages which are useful to worker

399

# harnesses written in a particular language.

400

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

401

# attempt to choose a reasonable default.

402

"metadata": { # Metadata to set on the Google Compute Engine VMs.

403

"a_key": "A String",

404

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

405

},

406

],

407

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

408

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

409

# A description of the user pipeline and stages through which it is executed.

410

# Created by Cloud Dataflow service. Only retrieved with

411

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

412

# form. This data is provided by the Dataflow service for ease of visualizing

413

# the pipeline and interpretting Dataflow provided metrics.

414

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

415

{ # Description of the type, names/ids, and input/outputs for a transform.

416

"kind": "A String", # Type of transform.

417

"name": "A String", # User provided name for this transform instance.

418

"inputCollectionName": [ # User names for all collection inputs to this transform.

419

"A String",

420

],

421

"displayData": [ # Transform-specific display data.

422

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

423

"shortStrValue": "A String", # A possible additional shorter value to display.

424

# For example a java_class_name_value of com.mypackage.MyDoFn

425

# will be stored with MyDoFn as the short_str_value and

426

# com.mypackage.MyDoFn as the java_class_name value.

427

# short_str_value can be displayed and java_class_name_value

428

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

429

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

430

"url": "A String", # An optional full URL.

431

"floatValue": 3.14, # Contains value if the data is of float type.

432

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

433

# language namespace (i.e. python module) which defines the display data.

434

# This allows a dax monitoring system to specially handle the data

435

# and perform custom rendering.

436

"javaClassValue": "A String", # Contains value if the data is of java class type.

437

"label": "A String", # An optional label to display in a dax UI for the element.

438

"boolValue": True or False, # Contains value if the data is of a boolean type.

439

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

440

"key": "A String", # The key identifying the display data.

441

# This is intended to be used as a label for the display data

442

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

443

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

444

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

445

},

446

],

447

"outputCollectionName": [ # User names for all collection outputs to this transform.

448

"A String",

449

],

450

"id": "A String", # SDK generated id of this transform instance.

451

},

452

],

453

"displayData": [ # Pipeline level display data.

454

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

455

"shortStrValue": "A String", # A possible additional shorter value to display.

456

# For example a java_class_name_value of com.mypackage.MyDoFn

457

# will be stored with MyDoFn as the short_str_value and

458

# com.mypackage.MyDoFn as the java_class_name value.

459

# short_str_value can be displayed and java_class_name_value

460

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

461

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

462

"url": "A String", # An optional full URL.

463

"floatValue": 3.14, # Contains value if the data is of float type.

464

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

465

# language namespace (i.e. python module) which defines the display data.

466

# This allows a dax monitoring system to specially handle the data

467

# and perform custom rendering.

468

"javaClassValue": "A String", # Contains value if the data is of java class type.

469

"label": "A String", # An optional label to display in a dax UI for the element.

470

"boolValue": True or False, # Contains value if the data is of a boolean type.

471

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

472

"key": "A String", # The key identifying the display data.

473

# This is intended to be used as a label for the display data

474

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

475

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

476

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

477

},

478

],

479

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

480

{ # Description of the composing transforms, names/ids, and input/outputs of a

481

# stage of execution. Some composing transforms and sources may have been

482

# generated by the Dataflow service during execution planning.

483

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

484

{ # Description of an interstitial value between transforms in an execution

485

# stage.

486

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

487

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

488

# source is most closely associated.

489

"name": "A String", # Dataflow service generated name for this source.

490

},

491

],

492

"kind": "A String", # Type of tranform this stage is executing.

493

"name": "A String", # Dataflow service generated name for this stage.

494

"outputSource": [ # Output sources for this stage.

495

{ # Description of an input or output of an execution stage.

496

"userName": "A String", # Human-readable name for this source; may be user or system generated.

497

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

498

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

499

"name": "A String", # Dataflow service generated name for this source.

500

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

501

},

502

],

503

"inputSource": [ # Input sources for this stage.

504

{ # Description of an input or output of an execution stage.

505

"userName": "A String", # Human-readable name for this source; may be user or system generated.

506

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

507

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

508

"name": "A String", # Dataflow service generated name for this source.

509

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

510

},

511

],

512

"componentTransform": [ # Transforms that comprise this execution stage.

513

{ # Description of a transform executed as part of an execution stage.

514

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

515

"originalTransform": "A String", # User name for the original user transform with which this transform is

516

# most closely associated.

517

"name": "A String", # Dataflow service generated name for this source.

518

},

519

],

520

"id": "A String", # Dataflow service generated id for this stage.

521

},

522

],

523

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

524

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

525

{ # Defines a particular step within a Cloud Dataflow job.

526

#

527

# A job consists of multiple steps, each of which performs some

528

# specific operation as part of the overall job. Data is typically

529

# passed from one step to another as part of the job.

530

#

531

# Here's an example of a sequence of steps which together implement a

532

# Map-Reduce job:

533

#

534

# * Read a collection of data from some source, parsing the

535

# collection's elements.

536

#

537

# * Validate the elements.

538

#

539

# * Apply a user-defined function to map each element to some value

540

# and extract an element-specific key value.

541

#

542

# * Group elements with the same key into a single element with

543

# that key, transforming a multiply-keyed collection into a

544

# uniquely-keyed collection.

545

#

546

# * Write the elements out to some data sink.

547

#

548

# Note that the Cloud Dataflow service may be used to run many different

549

# types of jobs, not just Map-Reduce.

550

"kind": "A String", # The kind of step in the Cloud Dataflow job.

551

"properties": { # Named properties associated with the step. Each kind of

552

# predefined step has its own required set of properties.

553

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

554

"a_key": "", # Properties of the object.

555

},

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

556

"name": "A String", # The name that identifies the step. This must be unique for each

557

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

558

},

559

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

560

"currentStateTime": "A String", # The timestamp associated with the current state.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

561

"tempFiles": [ # A set of files the system should be aware of that are used

562

# for temporary storage. These temporary files will be

563

# removed on job completion.

564

# No duplicates are allowed.

565

# No file patterns are supported.

566

#

567

# The supported files are:

568

#

569

# Google Cloud Storage:

570

#

571

# storage.googleapis.com/{bucket}/{object}

572

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

573

"A String",

574

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

575

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

576

# callers cannot mutate it.

577

{ # A message describing the state of a particular execution stage.

578

"executionStageName": "A String", # The name of the execution stage.

579

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

580

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

581

},

582

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

583

"type": "A String", # The type of Cloud Dataflow job.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

584

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

585

# Cloud Dataflow service.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

586

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

587

# of the job it replaced.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

588

#

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

589

# When sending a `CreateJobRequest`, you can update a job by specifying it

590

# here. The job named here is stopped, and its intermediate state is

591

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

592

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

593

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

594

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

595

"a_key": { # Contains information about how a particular

596

# google.dataflow.v1beta3.Step will be executed.

597

"stepName": [ # The steps associated with the execution stage.

598

# Note that stages may have several steps, and that a given step

599

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

600

"A String",

601

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

602

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

603

},

604

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

605

}

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

606

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

607

location: string, The location that contains this job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

608

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

Allowed values

1 - v1 error format

2 - v2 error format

replaceJobId: string, Deprecated. This field is now in the Job message.

613

view: string, The level of information requested in response.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

614

615

Returns:

616

An object of the form:

617

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

618

{ # Defines a job to be run by the Cloud Dataflow service.

619

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

620

# If this field is set, the service will ensure its uniqueness.

621

# The request to create a job will fail if the service has knowledge of a

622

# previously submitted job with the same client's ID and job name.

623

# The caller may use this field to ensure idempotence of job

624

# creation across retried attempts to create a job.

625

# By default, the field is empty and, in that case, the service ignores it.

626

"requestedState": "A String", # The job's requested state.

627

#

628

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

629

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

630

# also be used to directly set a job's requested state to

631

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

632

# job if it has not already reached a terminal state.

633

"name": "A String", # The user-specified Cloud Dataflow job name.

634

#

635

# Only one Job with a given name may exist in a project at any

636

# given time. If a caller attempts to create a Job with the same

637

# name as an already-existing Job, the attempt returns the

638

# existing Job.

639

#

640

# The name must match the regular expression

641

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

642

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

643

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

644

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

645

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

646

"currentState": "A String", # The current state of the job.

647

#

648

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

649

# specified.

650

#

651

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

652

# terminal state. After a job has reached a terminal state, no

653

# further state updates may be made.

654

#

655

# This field may be mutated by the Cloud Dataflow service;

656

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

657

"labels": { # User-defined labels for this job.

658

#

659

# The labels map can contain no more than 64 entries. Entries of the labels

660

# map are UTF8 strings that comply with the following restrictions:

661

#

662

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

663

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

664

# * Both keys and values are additionally constrained to be <= 128 bytes in

665

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

666

"a_key": "A String",

667

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

668

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

669

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

670

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

671

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

672

"id": "A String", # The unique ID of this job.

673

#

674

# This field is set by the Cloud Dataflow service when the Job is

675

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

676

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

677

"version": { # A structure describing which components and their versions of the service

678

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

679

"a_key": "", # Properties of the object.

680

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

681

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

682

# storage. The system will append the suffix "/temp-{JOBNAME} to

683

# this resource prefix, where {JOBNAME} is the value of the

684

# job_name field. The resulting bucket and object prefix is used

685

# as the prefix of the resources used to store temporary data

686

# needed during the job execution. NOTE: This will override the

687

# value in taskrunner_settings.

688

# The supported resource type is:

689

#

690

# Google Cloud Storage:

691

#

692

# storage.googleapis.com/{bucket}/{object}

693

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

694

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

695

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

696

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

697

"dataset": "A String", # The dataset for the current project where various workflow

698

# related tables are stored.

699

#

700

# The supported resource type is:

701

#

702

# Google BigQuery:

703

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

704

"experiments": [ # The list of experiments to enable.

705

"A String",

706

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

707

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

708

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

709

# options are passed through the service and are used to recreate the

710

# SDK pipeline options on the worker in a language agnostic and platform

711

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

712

"a_key": "", # Properties of the object.

713

},

714

"userAgent": { # A description of the process that generated the request.

715

"a_key": "", # Properties of the object.

716

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

717

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

718

# unspecified, the service will attempt to choose a reasonable

719

# default. This should be in the form of the API service name,

720

# e.g. "compute.googleapis.com".

721

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

722

# specified in order for the job to have workers.

723

{ # Describes one particular pool of Cloud Dataflow workers to be

724

# instantiated by the Cloud Dataflow service in order to perform the

725

# computations required by a job. Note that a workflow job may use

726

# multiple pools, in order to match the various computational

727

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

728

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

729

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

730

# using the standard Dataflow task runner. Users should ignore

731

# this field.

732

"workflowFileName": "A String", # The file to store the workflow in.

733

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

734

# will not be uploaded.

735

#

736

# The supported resource type is:

737

#

738

# Google Cloud Storage:

739

# storage.googleapis.com/{bucket}/{object}

740

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

741

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

742

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

743

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

744

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

745

# "shuffle/v1beta1".

746

"workerId": "A String", # The ID of the worker running this pipeline.

747

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

748

#

749

# When workers access Google Cloud APIs, they logically do so via

750

# relative URLs. If this field is specified, it supplies the base

751

# URL to use for resolving these relative URLs. The normative

752

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

753

# Locators".

754

#

755

# If not specified, the default value is "http://www.googleapis.com/"

756

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

757

# "dataflow/v1b3/projects".

758

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

759

# storage.

760

#

761

# The supported resource type is:

762

#

763

# Google Cloud Storage:

764

#

765

# storage.googleapis.com/{bucket}/{object}

766

# bucket.storage.googleapis.com/{object}

767

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

768

"vmId": "A String", # The ID string of the VM.

769

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

770

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

771

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

772

# access the Cloud Dataflow API.

773

"A String",

774

],

775

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

776

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

777

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

778

#

779

# When workers access Google Cloud APIs, they logically do so via

780

# relative URLs. If this field is specified, it supplies the base

781

# URL to use for resolving these relative URLs. The normative

782

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

783

# Locators".

784

#

785

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

786

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

787

# taskrunner; e.g. "wheel".

788

"languageHint": "A String", # The suggested backend language.

789

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

790

# console.

791

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

792

"logDir": "A String", # The directory on the VM to store logs.

793

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

794

"harnessCommand": "A String", # The command to launch the worker harness.

795

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

796

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

797

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

798

# The supported resource type is:

799

#

800

# Google Cloud Storage:

801

# storage.googleapis.com/{bucket}/{object}

802

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

803

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

804

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

805

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

806

# are supported.

807

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

808

# service will attempt to choose a reasonable default.

809

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

810

# the service will use the network "default".

811

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

812

# will attempt to choose a reasonable default.

813

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

814

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

815

"dataDisks": [ # Data disks that are used by a VM in this workflow.

816

{ # Describes the data disk used by a workflow job.

817

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

818

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

819

# attempt to choose a reasonable default.

820

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

821

# must be a disk type appropriate to the project and zone in which

822

# the workers will run. If unknown or unspecified, the service

823

# will attempt to choose a reasonable default.

824

#

825

# For example, the standard persistent disk type is a resource name

826

# typically ending in "pd-standard". If SSD persistent disks are

827

# available, the resource name typically ends with "pd-ssd". The

828

# actual valid values are defined the Google Compute Engine API,

829

# not by the Cloud Dataflow API; consult the Google Compute Engine

830

# documentation for more information about determining the set of

831

# available disk types for a particular project and zone.

832

#

833

# Google Compute Engine Disk types are local to a particular

834

# project in a particular zone, and so the resource name will

835

# typically look something like this:

836

#

837

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

838

},

839

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

840

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

841

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

842

# `TEARDOWN_NEVER`.

843

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

844

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

845

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

846

# down.

847

#

848

# If the workers are not torn down by the service, they will

849

# continue to run and use Google Compute Engine VM resources in the

850

# user's project until they are explicitly terminated by the user.

851

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

852

# policy except for small, manually supervised test jobs.

853

#

854

# If unknown or unspecified, the service will attempt to choose a reasonable

855

# default.

856

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

857

# Compute Engine API.

858

"ipConfiguration": "A String", # Configuration for VM IPs.

859

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

860

# service will choose a number of threads (according to the number of cores

861

# on the selected machine type for batch, or 1 by convention for streaming).

862

"poolArgs": { # Extra arguments for this worker pool.

863

"a_key": "", # Properties of the object. Contains field @type with type URL.

864

},

865

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

866

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

867

# attempt to choose a reasonable default.

868

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

869

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

870

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

871

# the form "regions/REGION/subnetworks/SUBNETWORK".

872

"packages": [ # Packages to be installed on workers.

873

{ # The packages that must be installed in order for a worker to run the

874

# steps of the Cloud Dataflow job that will be assigned to its worker

875

# pool.

876

#

877

# This is the mechanism by which the Cloud Dataflow SDK causes code to

878

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

879

# might use this to install jars containing the user's code and all of the

880

# various dependencies (libraries, data files, etc.) required in order

881

# for that code to run.

882

"location": "A String", # The resource to read the package from. The supported resource type is:

883

#

884

# Google Cloud Storage:

885

#

886

# storage.googleapis.com/{bucket}

887

# bucket.storage.googleapis.com/

888

"name": "A String", # The name of the package.

889

},

890

],

891

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

892

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

893

"algorithm": "A String", # The algorithm to use for autoscaling.

894

},

895

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

896

# select a default set of packages which are useful to worker

897

# harnesses written in a particular language.

898

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

899

# attempt to choose a reasonable default.

900

"metadata": { # Metadata to set on the Google Compute Engine VMs.

901

"a_key": "A String",

902

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

903

},

904

],

905

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

906

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

907

# A description of the user pipeline and stages through which it is executed.

908

# Created by Cloud Dataflow service. Only retrieved with

909

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

910

# form. This data is provided by the Dataflow service for ease of visualizing

911

# the pipeline and interpretting Dataflow provided metrics.

912

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

913

{ # Description of the type, names/ids, and input/outputs for a transform.

914

"kind": "A String", # Type of transform.

915

"name": "A String", # User provided name for this transform instance.

916

"inputCollectionName": [ # User names for all collection inputs to this transform.

917

"A String",

918

],

919

"displayData": [ # Transform-specific display data.

920

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

921

"shortStrValue": "A String", # A possible additional shorter value to display.

922

# For example a java_class_name_value of com.mypackage.MyDoFn

923

# will be stored with MyDoFn as the short_str_value and

924

# com.mypackage.MyDoFn as the java_class_name value.

925

# short_str_value can be displayed and java_class_name_value

926

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

927

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

928

"url": "A String", # An optional full URL.

929

"floatValue": 3.14, # Contains value if the data is of float type.

930

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

931

# language namespace (i.e. python module) which defines the display data.

932

# This allows a dax monitoring system to specially handle the data

933

# and perform custom rendering.

934

"javaClassValue": "A String", # Contains value if the data is of java class type.

935

"label": "A String", # An optional label to display in a dax UI for the element.

936

"boolValue": True or False, # Contains value if the data is of a boolean type.

937

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

938

"key": "A String", # The key identifying the display data.

939

# This is intended to be used as a label for the display data

940

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

941

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

942

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

943

},

944

],

945

"outputCollectionName": [ # User names for all collection outputs to this transform.

946

"A String",

947

],

948

"id": "A String", # SDK generated id of this transform instance.

949

},

950

],

951

"displayData": [ # Pipeline level display data.

952

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

953

"shortStrValue": "A String", # A possible additional shorter value to display.

954

# For example a java_class_name_value of com.mypackage.MyDoFn

955

# will be stored with MyDoFn as the short_str_value and

956

# com.mypackage.MyDoFn as the java_class_name value.

957

# short_str_value can be displayed and java_class_name_value

958

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

959

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

960

"url": "A String", # An optional full URL.

961

"floatValue": 3.14, # Contains value if the data is of float type.

962

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

963

# language namespace (i.e. python module) which defines the display data.

964

# This allows a dax monitoring system to specially handle the data

965

# and perform custom rendering.

966

"javaClassValue": "A String", # Contains value if the data is of java class type.

967

"label": "A String", # An optional label to display in a dax UI for the element.

968

"boolValue": True or False, # Contains value if the data is of a boolean type.

969

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

970

"key": "A String", # The key identifying the display data.

971

# This is intended to be used as a label for the display data

972

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

973

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

974

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

975

},

976

],

977

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

978

{ # Description of the composing transforms, names/ids, and input/outputs of a

979

# stage of execution. Some composing transforms and sources may have been

980

# generated by the Dataflow service during execution planning.

981

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

982

{ # Description of an interstitial value between transforms in an execution

983

# stage.

984

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

985

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

986

# source is most closely associated.

987

"name": "A String", # Dataflow service generated name for this source.

988

},

989

],

990

"kind": "A String", # Type of tranform this stage is executing.

991

"name": "A String", # Dataflow service generated name for this stage.

992

"outputSource": [ # Output sources for this stage.

993

{ # Description of an input or output of an execution stage.

994

"userName": "A String", # Human-readable name for this source; may be user or system generated.

995

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

996

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

997

"name": "A String", # Dataflow service generated name for this source.

998

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

999

},

1000

],

1001

"inputSource": [ # Input sources for this stage.

1002

{ # Description of an input or output of an execution stage.

1003

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1004

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1005

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

1006

"name": "A String", # Dataflow service generated name for this source.

1007

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1008

},

1009

],

1010

"componentTransform": [ # Transforms that comprise this execution stage.

1011

{ # Description of a transform executed as part of an execution stage.

1012

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1013

"originalTransform": "A String", # User name for the original user transform with which this transform is

1014

# most closely associated.

1015

"name": "A String", # Dataflow service generated name for this source.

1016

},

1017

],

1018

"id": "A String", # Dataflow service generated id for this stage.

1019

},

1020

],

1021

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1022

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1023

{ # Defines a particular step within a Cloud Dataflow job.

1024

#

1025

# A job consists of multiple steps, each of which performs some

1026

# specific operation as part of the overall job. Data is typically

1027

# passed from one step to another as part of the job.

1028

#

1029

# Here's an example of a sequence of steps which together implement a

1030

# Map-Reduce job:

1031

#

1032

# * Read a collection of data from some source, parsing the

1033

# collection's elements.

1034

#

1035

# * Validate the elements.

1036

#

1037

# * Apply a user-defined function to map each element to some value

1038

# and extract an element-specific key value.

1039

#

1040

# * Group elements with the same key into a single element with

1041

# that key, transforming a multiply-keyed collection into a

1042

# uniquely-keyed collection.

1043

#

1044

# * Write the elements out to some data sink.

1045

#

1046

# Note that the Cloud Dataflow service may be used to run many different

1047

# types of jobs, not just Map-Reduce.

1048

"kind": "A String", # The kind of step in the Cloud Dataflow job.

1049

"properties": { # Named properties associated with the step. Each kind of

1050

# predefined step has its own required set of properties.

1051

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1052

"a_key": "", # Properties of the object.

1053

},

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

1054

"name": "A String", # The name that identifies the step. This must be unique for each

1055

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1056

},

1057

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1058

"currentStateTime": "A String", # The timestamp associated with the current state.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1059

"tempFiles": [ # A set of files the system should be aware of that are used

1060

# for temporary storage. These temporary files will be

1061

# removed on job completion.

1062

# No duplicates are allowed.

1063

# No file patterns are supported.

1064

#

1065

# The supported files are:

1066

#

1067

# Google Cloud Storage:

1068

#

1069

# storage.googleapis.com/{bucket}/{object}

1070

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

1071

"A String",

1072

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1073

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

1074

# callers cannot mutate it.

1075

{ # A message describing the state of a particular execution stage.

1076

"executionStageName": "A String", # The name of the execution stage.

1077

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

1078

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

1079

},

1080

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1081

"type": "A String", # The type of Cloud Dataflow job.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1082

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1083

# Cloud Dataflow service.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

1084

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1085

# of the job it replaced.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1086

#

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

1087

# When sending a `CreateJobRequest`, you can update a job by specifying it

1088

# here. The job named here is stopped, and its intermediate state is

1089

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1090

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1091

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1092

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1093

"a_key": { # Contains information about how a particular

1094

# google.dataflow.v1beta3.Step will be executed.

1095

"stepName": [ # The steps associated with the execution stage.

1096

# Note that stages may have several steps, and that a given step

1097

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1098

"A String",

1099

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1100

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1101

},

1102

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1103

}</pre>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1107

<code class="details" id="get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1108

<pre>Gets the state of the specified Cloud Dataflow job.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1109

1110

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1111

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

1112

jobId: string, The job ID. (required)

1113

location: string, The location that contains this job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1114

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

Allowed values

1 - v1 error format

2 - v2 error format

view: string, The level of information requested in response.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1119

1120

Returns:

1121

An object of the form:

1122

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1123

{ # Defines a job to be run by the Cloud Dataflow service.

1124

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1125

# If this field is set, the service will ensure its uniqueness.

1126

# The request to create a job will fail if the service has knowledge of a

1127

# previously submitted job with the same client's ID and job name.

1128

# The caller may use this field to ensure idempotence of job

1129

# creation across retried attempts to create a job.

1130

# By default, the field is empty and, in that case, the service ignores it.

1131

"requestedState": "A String", # The job's requested state.

1132

#

1133

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1134

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1135

# also be used to directly set a job's requested state to

1136

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1137

# job if it has not already reached a terminal state.

1138

"name": "A String", # The user-specified Cloud Dataflow job name.

1139

#

1140

# Only one Job with a given name may exist in a project at any

1141

# given time. If a caller attempts to create a Job with the same

1142

# name as an already-existing Job, the attempt returns the

1143

# existing Job.

1144

#

1145

# The name must match the regular expression

1146

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1147

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1148

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1149

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1150

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1151

"currentState": "A String", # The current state of the job.

1152

#

1153

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1154

# specified.

1155

#

1156

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1157

# terminal state. After a job has reached a terminal state, no

1158

# further state updates may be made.

1159

#

1160

# This field may be mutated by the Cloud Dataflow service;

1161

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1162

"labels": { # User-defined labels for this job.

1163

#

1164

# The labels map can contain no more than 64 entries. Entries of the labels

1165

# map are UTF8 strings that comply with the following restrictions:

1166

#

1167

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1168

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1169

# * Both keys and values are additionally constrained to be <= 128 bytes in

1170

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1171

"a_key": "A String",

1172

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1173

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1174

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1175

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1176

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1177

"id": "A String", # The unique ID of this job.

1178

#

1179

# This field is set by the Cloud Dataflow service when the Job is

1180

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1181

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1182

"version": { # A structure describing which components and their versions of the service

1183

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1184

"a_key": "", # Properties of the object.

1185

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1186

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1187

# storage. The system will append the suffix "/temp-{JOBNAME} to

1188

# this resource prefix, where {JOBNAME} is the value of the

1189

# job_name field. The resulting bucket and object prefix is used

1190

# as the prefix of the resources used to store temporary data

1191

# needed during the job execution. NOTE: This will override the

1192

# value in taskrunner_settings.

1193

# The supported resource type is:

1194

#

1195

# Google Cloud Storage:

1196

#

1197

# storage.googleapis.com/{bucket}/{object}

1198

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1199

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1200

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1201

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1202

"dataset": "A String", # The dataset for the current project where various workflow

1203

# related tables are stored.

1204

#

1205

# The supported resource type is:

1206

#

1207

# Google BigQuery:

1208

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1209

"experiments": [ # The list of experiments to enable.

1210

"A String",

1211

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

1212

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1213

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1214

# options are passed through the service and are used to recreate the

1215

# SDK pipeline options on the worker in a language agnostic and platform

1216

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1217

"a_key": "", # Properties of the object.

1218

},

1219

"userAgent": { # A description of the process that generated the request.

1220

"a_key": "", # Properties of the object.

1221

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1222

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1223

# unspecified, the service will attempt to choose a reasonable

1224

# default. This should be in the form of the API service name,

1225

# e.g. "compute.googleapis.com".

1226

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1227

# specified in order for the job to have workers.

1228

{ # Describes one particular pool of Cloud Dataflow workers to be

1229

# instantiated by the Cloud Dataflow service in order to perform the

1230

# computations required by a job. Note that a workflow job may use

1231

# multiple pools, in order to match the various computational

1232

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1233

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1234

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1235

# using the standard Dataflow task runner. Users should ignore

1236

# this field.

1237

"workflowFileName": "A String", # The file to store the workflow in.

1238

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1239

# will not be uploaded.

1240

#

1241

# The supported resource type is:

1242

#

1243

# Google Cloud Storage:

1244

# storage.googleapis.com/{bucket}/{object}

1245

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

1246

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1247

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1248

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1249

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1250

# "shuffle/v1beta1".

1251

"workerId": "A String", # The ID of the worker running this pipeline.

1252

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1253

#

1254

# When workers access Google Cloud APIs, they logically do so via

1255

# relative URLs. If this field is specified, it supplies the base

1256

# URL to use for resolving these relative URLs. The normative

1257

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1258

# Locators".

1259

#

1260

# If not specified, the default value is "http://www.googleapis.com/"

1261

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1262

# "dataflow/v1b3/projects".

1263

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1264

# storage.

1265

#

1266

# The supported resource type is:

1267

#

1268

# Google Cloud Storage:

1269

#

1270

# storage.googleapis.com/{bucket}/{object}

1271

# bucket.storage.googleapis.com/{object}

1272

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1273

"vmId": "A String", # The ID string of the VM.

1274

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1275

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

1276

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1277

# access the Cloud Dataflow API.

1278

"A String",

1279

],

1280

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1281

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1282

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1283

#

1284

# When workers access Google Cloud APIs, they logically do so via

1285

# relative URLs. If this field is specified, it supplies the base

1286

# URL to use for resolving these relative URLs. The normative

1287

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1288

# Locators".

1289

#

1290

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1291

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1292

# taskrunner; e.g. "wheel".

1293

"languageHint": "A String", # The suggested backend language.

1294

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1295

# console.

1296

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1297

"logDir": "A String", # The directory on the VM to store logs.

1298

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

1299

"harnessCommand": "A String", # The command to launch the worker harness.

1300

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1301

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1302

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

1303

# The supported resource type is:

1304

#

1305

# Google Cloud Storage:

1306

# storage.googleapis.com/{bucket}/{object}

1307

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1308

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1309

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1310

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1311

# are supported.

1312

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1313

# service will attempt to choose a reasonable default.

1314

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1315

# the service will use the network "default".

1316

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1317

# will attempt to choose a reasonable default.

1318

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1319

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1320

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1321

{ # Describes the data disk used by a workflow job.

1322

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1323

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1324

# attempt to choose a reasonable default.

1325

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1326

# must be a disk type appropriate to the project and zone in which

1327

# the workers will run. If unknown or unspecified, the service

1328

# will attempt to choose a reasonable default.

1329

#

1330

# For example, the standard persistent disk type is a resource name

1331

# typically ending in "pd-standard". If SSD persistent disks are

1332

# available, the resource name typically ends with "pd-ssd". The

1333

# actual valid values are defined the Google Compute Engine API,

1334

# not by the Cloud Dataflow API; consult the Google Compute Engine

1335

# documentation for more information about determining the set of

1336

# available disk types for a particular project and zone.

1337

#

1338

# Google Compute Engine Disk types are local to a particular

1339

# project in a particular zone, and so the resource name will

1340

# typically look something like this:

1341

#

1342

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1343

},

1344

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1345

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1346

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1347

# `TEARDOWN_NEVER`.

1348

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1349

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1350

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1351

# down.

1352

#

1353

# If the workers are not torn down by the service, they will

1354

# continue to run and use Google Compute Engine VM resources in the

1355

# user's project until they are explicitly terminated by the user.

1356

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1357

# policy except for small, manually supervised test jobs.

1358

#

1359

# If unknown or unspecified, the service will attempt to choose a reasonable

1360

# default.

1361

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1362

# Compute Engine API.

1363

"ipConfiguration": "A String", # Configuration for VM IPs.

1364

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1365

# service will choose a number of threads (according to the number of cores

1366

# on the selected machine type for batch, or 1 by convention for streaming).

1367

"poolArgs": { # Extra arguments for this worker pool.

1368

"a_key": "", # Properties of the object. Contains field @type with type URL.

1369

},

1370

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1371

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1372

# attempt to choose a reasonable default.

1373

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1374

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1375

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1376

# the form "regions/REGION/subnetworks/SUBNETWORK".

1377

"packages": [ # Packages to be installed on workers.

1378

{ # The packages that must be installed in order for a worker to run the

1379

# steps of the Cloud Dataflow job that will be assigned to its worker

1380

# pool.

1381

#

1382

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1383

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1384

# might use this to install jars containing the user's code and all of the

1385

# various dependencies (libraries, data files, etc.) required in order

1386

# for that code to run.

1387

"location": "A String", # The resource to read the package from. The supported resource type is:

1388

#

1389

# Google Cloud Storage:

1390

#

1391

# storage.googleapis.com/{bucket}

1392

# bucket.storage.googleapis.com/

1393

"name": "A String", # The name of the package.

1394

},

1395

],

1396

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1397

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1398

"algorithm": "A String", # The algorithm to use for autoscaling.

1399

},

1400

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1401

# select a default set of packages which are useful to worker

1402

# harnesses written in a particular language.

1403

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1404

# attempt to choose a reasonable default.

1405

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1406

"a_key": "A String",

1407

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1408

},

1409

],

1410

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1411

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1412

# A description of the user pipeline and stages through which it is executed.

1413

# Created by Cloud Dataflow service. Only retrieved with

1414

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1415

# form. This data is provided by the Dataflow service for ease of visualizing

1416

# the pipeline and interpretting Dataflow provided metrics.

1417

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1418

{ # Description of the type, names/ids, and input/outputs for a transform.

1419

"kind": "A String", # Type of transform.

1420

"name": "A String", # User provided name for this transform instance.

1421

"inputCollectionName": [ # User names for all collection inputs to this transform.

1422

"A String",

1423

],

1424

"displayData": [ # Transform-specific display data.

1425

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1426

"shortStrValue": "A String", # A possible additional shorter value to display.

1427

# For example a java_class_name_value of com.mypackage.MyDoFn

1428

# will be stored with MyDoFn as the short_str_value and

1429

# com.mypackage.MyDoFn as the java_class_name value.

1430

# short_str_value can be displayed and java_class_name_value

1431

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1432

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1433

"url": "A String", # An optional full URL.

1434

"floatValue": 3.14, # Contains value if the data is of float type.

1435

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1436

# language namespace (i.e. python module) which defines the display data.

1437

# This allows a dax monitoring system to specially handle the data

1438

# and perform custom rendering.

1439

"javaClassValue": "A String", # Contains value if the data is of java class type.

1440

"label": "A String", # An optional label to display in a dax UI for the element.

1441

"boolValue": True or False, # Contains value if the data is of a boolean type.

1442

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1443

"key": "A String", # The key identifying the display data.

1444

# This is intended to be used as a label for the display data

1445

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1446

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1447

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1448

},

1449

],

1450

"outputCollectionName": [ # User names for all collection outputs to this transform.

1451

"A String",

1452

],

1453

"id": "A String", # SDK generated id of this transform instance.

1454

},

1455

],

1456

"displayData": [ # Pipeline level display data.

1457

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1458

"shortStrValue": "A String", # A possible additional shorter value to display.

1459

# For example a java_class_name_value of com.mypackage.MyDoFn

1460

# will be stored with MyDoFn as the short_str_value and

1461

# com.mypackage.MyDoFn as the java_class_name value.

1462

# short_str_value can be displayed and java_class_name_value

1463

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1464

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1465

"url": "A String", # An optional full URL.

1466

"floatValue": 3.14, # Contains value if the data is of float type.

1467

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1468

# language namespace (i.e. python module) which defines the display data.

1469

# This allows a dax monitoring system to specially handle the data

1470

# and perform custom rendering.

1471

"javaClassValue": "A String", # Contains value if the data is of java class type.

1472

"label": "A String", # An optional label to display in a dax UI for the element.

1473

"boolValue": True or False, # Contains value if the data is of a boolean type.

1474

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1475

"key": "A String", # The key identifying the display data.

1476

# This is intended to be used as a label for the display data

1477

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1478

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1479

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1480

},

1481

],

1482

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

1483

{ # Description of the composing transforms, names/ids, and input/outputs of a

1484

# stage of execution. Some composing transforms and sources may have been

1485

# generated by the Dataflow service during execution planning.

1486

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

1487

{ # Description of an interstitial value between transforms in an execution

1488

# stage.

1489

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1490

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1491

# source is most closely associated.

1492

"name": "A String", # Dataflow service generated name for this source.

1493

},

1494

],

1495

"kind": "A String", # Type of tranform this stage is executing.

1496

"name": "A String", # Dataflow service generated name for this stage.

1497

"outputSource": [ # Output sources for this stage.

1498

{ # Description of an input or output of an execution stage.

1499

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1500

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1501

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

1502

"name": "A String", # Dataflow service generated name for this source.

1503

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1504

},

1505

],

1506

"inputSource": [ # Input sources for this stage.

1507

{ # Description of an input or output of an execution stage.

1508

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1509

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1510

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

1511

"name": "A String", # Dataflow service generated name for this source.

1512

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1513

},

1514

],

1515

"componentTransform": [ # Transforms that comprise this execution stage.

1516

{ # Description of a transform executed as part of an execution stage.

1517

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1518

"originalTransform": "A String", # User name for the original user transform with which this transform is

1519

# most closely associated.

1520

"name": "A String", # Dataflow service generated name for this source.

1521

},

1522

],

1523

"id": "A String", # Dataflow service generated id for this stage.

1524

},

1525

],

1526

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1527

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1528

{ # Defines a particular step within a Cloud Dataflow job.

1529

#

1530

# A job consists of multiple steps, each of which performs some

1531

# specific operation as part of the overall job. Data is typically

1532

# passed from one step to another as part of the job.

1533

#

1534

# Here's an example of a sequence of steps which together implement a

1535

# Map-Reduce job:

1536

#

1537

# * Read a collection of data from some source, parsing the

1538

# collection's elements.

1539

#

1540

# * Validate the elements.

1541

#

1542

# * Apply a user-defined function to map each element to some value

1543

# and extract an element-specific key value.

1544

#

1545

# * Group elements with the same key into a single element with

1546

# that key, transforming a multiply-keyed collection into a

1547

# uniquely-keyed collection.

1548

#

1549

# * Write the elements out to some data sink.

1550

#

1551

# Note that the Cloud Dataflow service may be used to run many different

1552

# types of jobs, not just Map-Reduce.

1553

"kind": "A String", # The kind of step in the Cloud Dataflow job.

1554

"properties": { # Named properties associated with the step. Each kind of

1555

# predefined step has its own required set of properties.

1556

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1557

"a_key": "", # Properties of the object.

1558

},

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

1559

"name": "A String", # The name that identifies the step. This must be unique for each

1560

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1561

},

1562

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1563

"currentStateTime": "A String", # The timestamp associated with the current state.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1564

"tempFiles": [ # A set of files the system should be aware of that are used

1565

# for temporary storage. These temporary files will be

1566

# removed on job completion.

1567

# No duplicates are allowed.

1568

# No file patterns are supported.

1569

#

1570

# The supported files are:

1571

#

1572

# Google Cloud Storage:

1573

#

1574

# storage.googleapis.com/{bucket}/{object}

1575

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

1576

"A String",

1577

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1578

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

1579

# callers cannot mutate it.

1580

{ # A message describing the state of a particular execution stage.

1581

"executionStageName": "A String", # The name of the execution stage.

1582

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

1583

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

1584

},

1585

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1586

"type": "A String", # The type of Cloud Dataflow job.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1587

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1588

# Cloud Dataflow service.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

1589

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1590

# of the job it replaced.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1591

#

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

1592

# When sending a `CreateJobRequest`, you can update a job by specifying it

1593

# here. The job named here is stopped, and its intermediate state is

1594

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1595

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1596

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1597

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1598

"a_key": { # Contains information about how a particular

1599

# google.dataflow.v1beta3.Step will be executed.

1600

"stepName": [ # The steps associated with the execution stage.

1601

# Note that stages may have several steps, and that a given step

1602

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1603

"A String",

1604

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1605

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1606

},

1607

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1608

}</pre>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1612

<code class="details" id="getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</code>

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1613

<pre>Request the job status.

1614

1615

Args:

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1616

projectId: string, A project id. (required)

1617

jobId: string, The job to get messages for. (required)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1618

startTime: string, Return only metric data that has changed since this time.

1619

Default is to return all information about all metrics for the job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1620

location: string, The location which contains the job specified by job_id.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1621

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1622

Allowed values

1623

1 - v1 error format

1624

2 - v2 error format

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1625

1626

Returns:

1627

An object of the form:

1628

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1629

{ # JobMetrics contains a collection of metrics descibing the detailed progress

1630

# of a Dataflow job. Metrics correspond to user-defined and system-defined

1631

# metrics in the job.

1632

#

1633

# This resource captures only the most recent values of each metric;

1634

# time-series data can be queried for them (under the same metric names)

1635

# from Cloud Monitoring.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1636

"metrics": [ # All metrics for this job.

1637

{ # Describes the state of a metric.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1638

"meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1639

# This holds the count of the aggregated values and is used in combination

1640

# with mean_sum above to obtain the actual mean aggregate value.

1641

# The only possible value type is Long.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1642

"updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are

1643

# reporting work progress; it will be filled in responses from the

1644

# metrics API.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1645

"set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only

1646

# possible value type is a list of Values whose type can be Long, Double,

1647

# or String, according to the metric's type. All Values in the list must

1648

# be of the same type.

1649

"name": { # Identifies a metric, by describing the source which generated the # Name of the metric.

1650

# metric.

1651

"origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;

1652

# will be "dataflow" for metrics defined by the Dataflow service or SDK.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1653

"name": "A String", # Worker-defined metric name.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1654

"context": { # Zero or more labeled fields which identify the part of the job this

1655

# metric is associated with, such as the name of a step or collection.

1656

#

1657

# For example, built-in counters associated with steps will have

1658

# context['step'] = <step-name>. Counters associated with PCollections

1659

# in the SDK will have context['pcollection'] = <pcollection-name>.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1660

"a_key": "A String",

1661

},

1662

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1663

"cumulative": True or False, # True if this metric is reported as the total cumulative aggregate

1664

# value accumulated since the worker started working on this WorkItem.

1665

# By default this is false, indicating that this metric is reported

1666

# as a delta that is not associated with any WorkItem.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1667

"kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are

1668

# "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".

1669

# The specified aggregation kind is case-insensitive.

1670

#

1671

# If omitted, this is not an aggregated value but instead

1672

# a single metric sample value.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1673

"scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",

1674

# "And", and "Or". The possible value types are Long, Double, and Boolean.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1675

"meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.

1676

# This holds the sum of the aggregated values and is used in combination

1677

# with mean_count below to obtain the actual mean aggregate value.

1678

# The only possible value types are Long and Double.

1679

"distribution": "", # A struct value describing properties of a distribution of numeric values.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1680

"internal": "", # Worker-computed aggregate value for internal use by the Dataflow

1681

# service.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1682

},

1683

],

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1684

"metricTime": "A String", # Timestamp as of which metric values are current.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

}</pre>

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1689

<code class="details" id="list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1690

<pre>List the jobs of a project.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1691

1692

Args:

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1693

projectId: string, The project which owns the jobs. (required)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1694

pageSize: integer, If there are many jobs, limit response to at most this many.

1695

The actual number of jobs returned will be the lesser of max_responses

1696

and an unspecified server-defined limit.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1697

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

Allowed values

1 - v1 error format

2 - v2 error format

pageToken: string, Set this to the 'next_page_token' field of a previous response

1702

to request additional results in a long list.

1703

location: string, The location that contains this job.

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

1704

filter: string, The kind of filter to use.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1705

view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1706

1707

Returns:

1708

An object of the form:

1709

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1710

{ # Response to a request to list Cloud Dataflow jobs. This may be a partial

1711

# response, depending on the page size in the ListJobsRequest.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1712

"nextPageToken": "A String", # Set if there may be more results than fit in this response.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1713

"failedLocation": [ # Zero or more messages describing locations that failed to respond.

1714

{ # Indicates which location failed to respond to a request for data.

1715

"name": "A String", # The name of the failed location.

1716

},

1717

],

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1718

"jobs": [ # A subset of the requested job information.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1719

{ # Defines a job to be run by the Cloud Dataflow service.

1720

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1721

# If this field is set, the service will ensure its uniqueness.

1722

# The request to create a job will fail if the service has knowledge of a

1723

# previously submitted job with the same client's ID and job name.

1724

# The caller may use this field to ensure idempotence of job

1725

# creation across retried attempts to create a job.

1726

# By default, the field is empty and, in that case, the service ignores it.

1727

"requestedState": "A String", # The job's requested state.

1728

#

1729

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1730

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1731

# also be used to directly set a job's requested state to

1732

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1733

# job if it has not already reached a terminal state.

1734

"name": "A String", # The user-specified Cloud Dataflow job name.

1735

#

1736

# Only one Job with a given name may exist in a project at any

1737

# given time. If a caller attempts to create a Job with the same

1738

# name as an already-existing Job, the attempt returns the

1739

# existing Job.

1740

#

1741

# The name must match the regular expression

1742

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1743

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1744

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1745

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1746

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1747

"currentState": "A String", # The current state of the job.

1748

#

1749

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1750

# specified.

1751

#

1752

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1753

# terminal state. After a job has reached a terminal state, no

1754

# further state updates may be made.

1755

#

1756

# This field may be mutated by the Cloud Dataflow service;

1757

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1758

"labels": { # User-defined labels for this job.

1759

#

1760

# The labels map can contain no more than 64 entries. Entries of the labels

1761

# map are UTF8 strings that comply with the following restrictions:

1762

#

1763

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1764

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1765

# * Both keys and values are additionally constrained to be <= 128 bytes in

1766

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1767

"a_key": "A String",

1768

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1769

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1770

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1771

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

1772

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1773

"id": "A String", # The unique ID of this job.

1774

#

1775

# This field is set by the Cloud Dataflow service when the Job is

1776

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1777

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1778

"version": { # A structure describing which components and their versions of the service

1779

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1780

"a_key": "", # Properties of the object.

1781

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1782

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1783

# storage. The system will append the suffix "/temp-{JOBNAME} to

1784

# this resource prefix, where {JOBNAME} is the value of the

1785

# job_name field. The resulting bucket and object prefix is used

1786

# as the prefix of the resources used to store temporary data

1787

# needed during the job execution. NOTE: This will override the

1788

# value in taskrunner_settings.

1789

# The supported resource type is:

1790

#

1791

# Google Cloud Storage:

1792

#

1793

# storage.googleapis.com/{bucket}/{object}

1794

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1795

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1796

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1797

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1798

"dataset": "A String", # The dataset for the current project where various workflow

1799

# related tables are stored.

1800

#

1801

# The supported resource type is:

1802

#

1803

# Google BigQuery:

1804

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1805

"experiments": [ # The list of experiments to enable.

1806

"A String",

1807

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

1808

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1809

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1810

# options are passed through the service and are used to recreate the

1811

# SDK pipeline options on the worker in a language agnostic and platform

1812

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1813

"a_key": "", # Properties of the object.

1814

},

1815

"userAgent": { # A description of the process that generated the request.

1816

"a_key": "", # Properties of the object.

1817

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1818

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1819

# unspecified, the service will attempt to choose a reasonable

1820

# default. This should be in the form of the API service name,

1821

# e.g. "compute.googleapis.com".

1822

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1823

# specified in order for the job to have workers.

1824

{ # Describes one particular pool of Cloud Dataflow workers to be

1825

# instantiated by the Cloud Dataflow service in order to perform the

1826

# computations required by a job. Note that a workflow job may use

1827

# multiple pools, in order to match the various computational

1828

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1829

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1830

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1831

# using the standard Dataflow task runner. Users should ignore

1832

# this field.

1833

"workflowFileName": "A String", # The file to store the workflow in.

1834

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1835

# will not be uploaded.

1836

#

1837

# The supported resource type is:

1838

#

1839

# Google Cloud Storage:

1840

# storage.googleapis.com/{bucket}/{object}

1841

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

1842

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1843

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1844

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1845

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1846

# "shuffle/v1beta1".

1847

"workerId": "A String", # The ID of the worker running this pipeline.

1848

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1849

#

1850

# When workers access Google Cloud APIs, they logically do so via

1851

# relative URLs. If this field is specified, it supplies the base

1852

# URL to use for resolving these relative URLs. The normative

1853

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1854

# Locators".

1855

#

1856

# If not specified, the default value is "http://www.googleapis.com/"

1857

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1858

# "dataflow/v1b3/projects".

1859

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1860

# storage.

1861

#

1862

# The supported resource type is:

1863

#

1864

# Google Cloud Storage:

1865

#

1866

# storage.googleapis.com/{bucket}/{object}

1867

# bucket.storage.googleapis.com/{object}

1868

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1869

"vmId": "A String", # The ID string of the VM.

1870

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1871

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

1872

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1873

# access the Cloud Dataflow API.

1874

"A String",

1875

],

1876

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1877

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1878

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1879

#

1880

# When workers access Google Cloud APIs, they logically do so via

1881

# relative URLs. If this field is specified, it supplies the base

1882

# URL to use for resolving these relative URLs. The normative

1883

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1884

# Locators".

1885

#

1886

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1887

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1888

# taskrunner; e.g. "wheel".

1889

"languageHint": "A String", # The suggested backend language.

1890

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1891

# console.

1892

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1893

"logDir": "A String", # The directory on the VM to store logs.

1894

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

1895

"harnessCommand": "A String", # The command to launch the worker harness.

1896

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1897

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1898

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

1899

# The supported resource type is:

1900

#

1901

# Google Cloud Storage:

1902

# storage.googleapis.com/{bucket}/{object}

1903

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1904

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1905

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1906

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1907

# are supported.

1908

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1909

# service will attempt to choose a reasonable default.

1910

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1911

# the service will use the network "default".

1912

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1913

# will attempt to choose a reasonable default.

1914

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1915

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1916

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1917

{ # Describes the data disk used by a workflow job.

1918

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1919

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1920

# attempt to choose a reasonable default.

1921

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1922

# must be a disk type appropriate to the project and zone in which

1923

# the workers will run. If unknown or unspecified, the service

1924

# will attempt to choose a reasonable default.

1925

#

1926

# For example, the standard persistent disk type is a resource name

1927

# typically ending in "pd-standard". If SSD persistent disks are

1928

# available, the resource name typically ends with "pd-ssd". The

1929

# actual valid values are defined the Google Compute Engine API,

1930

# not by the Cloud Dataflow API; consult the Google Compute Engine

1931

# documentation for more information about determining the set of

1932

# available disk types for a particular project and zone.

1933

#

1934

# Google Compute Engine Disk types are local to a particular

1935

# project in a particular zone, and so the resource name will

1936

# typically look something like this:

1937

#

1938

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

1939

},

1940

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1941

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1942

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1943

# `TEARDOWN_NEVER`.

1944

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1945

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1946

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1947

# down.

1948

#

1949

# If the workers are not torn down by the service, they will

1950

# continue to run and use Google Compute Engine VM resources in the

1951

# user's project until they are explicitly terminated by the user.

1952

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1953

# policy except for small, manually supervised test jobs.

1954

#

1955

# If unknown or unspecified, the service will attempt to choose a reasonable

1956

# default.

1957

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1958

# Compute Engine API.

1959

"ipConfiguration": "A String", # Configuration for VM IPs.

1960

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1961

# service will choose a number of threads (according to the number of cores

1962

# on the selected machine type for batch, or 1 by convention for streaming).

1963

"poolArgs": { # Extra arguments for this worker pool.

1964

"a_key": "", # Properties of the object. Contains field @type with type URL.

1965

},

1966

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1967

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1968

# attempt to choose a reasonable default.

1969

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1970

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

1971

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1972

# the form "regions/REGION/subnetworks/SUBNETWORK".

1973

"packages": [ # Packages to be installed on workers.

1974

{ # The packages that must be installed in order for a worker to run the

1975

# steps of the Cloud Dataflow job that will be assigned to its worker

1976

# pool.

1977

#

1978

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1979

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1980

# might use this to install jars containing the user's code and all of the

1981

# various dependencies (libraries, data files, etc.) required in order

1982

# for that code to run.

1983

"location": "A String", # The resource to read the package from. The supported resource type is:

1984

#

1985

# Google Cloud Storage:

1986

#

1987

# storage.googleapis.com/{bucket}

1988

# bucket.storage.googleapis.com/

1989

"name": "A String", # The name of the package.

1990

},

1991

],

1992

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1993

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1994

"algorithm": "A String", # The algorithm to use for autoscaling.

1995

},

1996

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1997

# select a default set of packages which are useful to worker

1998

# harnesses written in a particular language.

1999

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2000

# attempt to choose a reasonable default.

2001

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2002

"a_key": "A String",

2003

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2004

},

2005

],

2006

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2007

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2008

# A description of the user pipeline and stages through which it is executed.

2009

# Created by Cloud Dataflow service. Only retrieved with

2010

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2011

# form. This data is provided by the Dataflow service for ease of visualizing

2012

# the pipeline and interpretting Dataflow provided metrics.

2013

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2014

{ # Description of the type, names/ids, and input/outputs for a transform.

2015

"kind": "A String", # Type of transform.

2016

"name": "A String", # User provided name for this transform instance.

2017

"inputCollectionName": [ # User names for all collection inputs to this transform.

2018

"A String",

2019

],

2020

"displayData": [ # Transform-specific display data.

2021

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2022

"shortStrValue": "A String", # A possible additional shorter value to display.

2023

# For example a java_class_name_value of com.mypackage.MyDoFn

2024

# will be stored with MyDoFn as the short_str_value and

2025

# com.mypackage.MyDoFn as the java_class_name value.

2026

# short_str_value can be displayed and java_class_name_value

2027

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2028

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2029

"url": "A String", # An optional full URL.

2030

"floatValue": 3.14, # Contains value if the data is of float type.

2031

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2032

# language namespace (i.e. python module) which defines the display data.

2033

# This allows a dax monitoring system to specially handle the data

2034

# and perform custom rendering.

2035

"javaClassValue": "A String", # Contains value if the data is of java class type.

2036

"label": "A String", # An optional label to display in a dax UI for the element.

2037

"boolValue": True or False, # Contains value if the data is of a boolean type.

2038

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2039

"key": "A String", # The key identifying the display data.

2040

# This is intended to be used as a label for the display data

2041

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2042

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2043

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2044

},

2045

],

2046

"outputCollectionName": [ # User names for all collection outputs to this transform.

2047

"A String",

2048

],

2049

"id": "A String", # SDK generated id of this transform instance.

2050

},

2051

],

2052

"displayData": [ # Pipeline level display data.

2053

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2054

"shortStrValue": "A String", # A possible additional shorter value to display.

2055

# For example a java_class_name_value of com.mypackage.MyDoFn

2056

# will be stored with MyDoFn as the short_str_value and

2057

# com.mypackage.MyDoFn as the java_class_name value.

2058

# short_str_value can be displayed and java_class_name_value

2059

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2060

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2061

"url": "A String", # An optional full URL.

2062

"floatValue": 3.14, # Contains value if the data is of float type.

2063

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2064

# language namespace (i.e. python module) which defines the display data.

2065

# This allows a dax monitoring system to specially handle the data

2066

# and perform custom rendering.

2067

"javaClassValue": "A String", # Contains value if the data is of java class type.

2068

"label": "A String", # An optional label to display in a dax UI for the element.

2069

"boolValue": True or False, # Contains value if the data is of a boolean type.

2070

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2071

"key": "A String", # The key identifying the display data.

2072

# This is intended to be used as a label for the display data

2073

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2074

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2075

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2076

},

2077

],

2078

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2079

{ # Description of the composing transforms, names/ids, and input/outputs of a

2080

# stage of execution. Some composing transforms and sources may have been

2081

# generated by the Dataflow service during execution planning.

2082

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2083

{ # Description of an interstitial value between transforms in an execution

2084

# stage.

2085

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2086

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2087

# source is most closely associated.

2088

"name": "A String", # Dataflow service generated name for this source.

2089

},

2090

],

2091

"kind": "A String", # Type of tranform this stage is executing.

2092

"name": "A String", # Dataflow service generated name for this stage.

2093

"outputSource": [ # Output sources for this stage.

2094

{ # Description of an input or output of an execution stage.

2095

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2096

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2097

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2098

"name": "A String", # Dataflow service generated name for this source.

2099

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2100

},

2101

],

2102

"inputSource": [ # Input sources for this stage.

2103

{ # Description of an input or output of an execution stage.

2104

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2105

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2106

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2107

"name": "A String", # Dataflow service generated name for this source.

2108

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2109

},

2110

],

2111

"componentTransform": [ # Transforms that comprise this execution stage.

2112

{ # Description of a transform executed as part of an execution stage.

2113

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2114

"originalTransform": "A String", # User name for the original user transform with which this transform is

2115

# most closely associated.

2116

"name": "A String", # Dataflow service generated name for this source.

2117

},

2118

],

2119

"id": "A String", # Dataflow service generated id for this stage.

2120

},

2121

],

2122

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2123

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2124

{ # Defines a particular step within a Cloud Dataflow job.

2125

#

2126

# A job consists of multiple steps, each of which performs some

2127

# specific operation as part of the overall job. Data is typically

2128

# passed from one step to another as part of the job.

2129

#

2130

# Here's an example of a sequence of steps which together implement a

2131

# Map-Reduce job:

2132

#

2133

# * Read a collection of data from some source, parsing the

2134

# collection's elements.

2135

#

2136

# * Validate the elements.

2137

#

2138

# * Apply a user-defined function to map each element to some value

2139

# and extract an element-specific key value.

2140

#

2141

# * Group elements with the same key into a single element with

2142

# that key, transforming a multiply-keyed collection into a

2143

# uniquely-keyed collection.

2144

#

2145

# * Write the elements out to some data sink.

2146

#

2147

# Note that the Cloud Dataflow service may be used to run many different

2148

# types of jobs, not just Map-Reduce.

2149

"kind": "A String", # The kind of step in the Cloud Dataflow job.

2150

"properties": { # Named properties associated with the step. Each kind of

2151

# predefined step has its own required set of properties.

2152

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2153

"a_key": "", # Properties of the object.

2154

},

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2155

"name": "A String", # The name that identifies the step. This must be unique for each

2156

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2157

},

2158

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2159

"currentStateTime": "A String", # The timestamp associated with the current state.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2160

"tempFiles": [ # A set of files the system should be aware of that are used

2161

# for temporary storage. These temporary files will be

2162

# removed on job completion.

2163

# No duplicates are allowed.

2164

# No file patterns are supported.

2165

#

2166

# The supported files are:

2167

#

2168

# Google Cloud Storage:

2169

#

2170

# storage.googleapis.com/{bucket}/{object}

2171

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

2172

"A String",

2173

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2174

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

2175

# callers cannot mutate it.

2176

{ # A message describing the state of a particular execution stage.

2177

"executionStageName": "A String", # The name of the execution stage.

2178

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

2179

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

2180

},

2181

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2182

"type": "A String", # The type of Cloud Dataflow job.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2183

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2184

# Cloud Dataflow service.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2185

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2186

# of the job it replaced.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2187

#

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2188

# When sending a `CreateJobRequest`, you can update a job by specifying it

2189

# here. The job named here is stopped, and its intermediate state is

2190

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2191

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2192

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2193

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2194

"a_key": { # Contains information about how a particular

2195

# google.dataflow.v1beta3.Step will be executed.

2196

"stepName": [ # The steps associated with the execution stage.

2197

# Note that stages may have several steps, and that a given step

2198

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2199

"A String",

2200

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2201

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2202

},

2203

},

2204

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

],

}</pre>

</div>

<code class="details" id="list_next">list_next(previous_request, previous_response)</code>

2211

<pre>Retrieves the next page of results.

2212

2213

Args:

2214

previous_request: The request for the previous page. (required)

2215

previous_response: The response from the request for the previous page. (required)

2216

2217

Returns:

2218

A request object that you can call 'execute()' on to request the next

2219

page. Returns None if there are no more items in the collection.

</pre>

</div>

Jon Wayne Parrott

2017-01-06 09:58:29 -0800

[diff] [blame]

2224

<code class="details" id="update">update(projectId, jobId, body, location=None, x__xgafv=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2225

<pre>Updates the state of an existing Cloud Dataflow job.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2226

2227

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2228

projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)

2229

jobId: string, The job ID. (required)

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2230

body: object, The request body. (required)

2231

The object takes the form of:

2232

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2233

{ # Defines a job to be run by the Cloud Dataflow service.

2234

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2235

# If this field is set, the service will ensure its uniqueness.

2236

# The request to create a job will fail if the service has knowledge of a

2237

# previously submitted job with the same client's ID and job name.

2238

# The caller may use this field to ensure idempotence of job

2239

# creation across retried attempts to create a job.

2240

# By default, the field is empty and, in that case, the service ignores it.

2241

"requestedState": "A String", # The job's requested state.

2242

#

2243

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2244

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2245

# also be used to directly set a job's requested state to

2246

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2247

# job if it has not already reached a terminal state.

2248

"name": "A String", # The user-specified Cloud Dataflow job name.

2249

#

2250

# Only one Job with a given name may exist in a project at any

2251

# given time. If a caller attempts to create a Job with the same

2252

# name as an already-existing Job, the attempt returns the

2253

# existing Job.

2254

#

2255

# The name must match the regular expression

2256

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2257

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2258

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2259

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2260

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2261

"currentState": "A String", # The current state of the job.

2262

#

2263

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2264

# specified.

2265

#

2266

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2267

# terminal state. After a job has reached a terminal state, no

2268

# further state updates may be made.

2269

#

2270

# This field may be mutated by the Cloud Dataflow service;

2271

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2272

"labels": { # User-defined labels for this job.

2273

#

2274

# The labels map can contain no more than 64 entries. Entries of the labels

2275

# map are UTF8 strings that comply with the following restrictions:

2276

#

2277

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2278

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2279

# * Both keys and values are additionally constrained to be <= 128 bytes in

2280

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2281

"a_key": "A String",

2282

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2283

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2284

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2285

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2286

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2287

"id": "A String", # The unique ID of this job.

2288

#

2289

# This field is set by the Cloud Dataflow service when the Job is

2290

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2291

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2292

"version": { # A structure describing which components and their versions of the service

2293

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2294

"a_key": "", # Properties of the object.

2295

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2296

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2297

# storage. The system will append the suffix "/temp-{JOBNAME} to

2298

# this resource prefix, where {JOBNAME} is the value of the

2299

# job_name field. The resulting bucket and object prefix is used

2300

# as the prefix of the resources used to store temporary data

2301

# needed during the job execution. NOTE: This will override the

2302

# value in taskrunner_settings.

2303

# The supported resource type is:

2304

#

2305

# Google Cloud Storage:

2306

#

2307

# storage.googleapis.com/{bucket}/{object}

2308

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2309

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2310

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2311

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2312

"dataset": "A String", # The dataset for the current project where various workflow

2313

# related tables are stored.

2314

#

2315

# The supported resource type is:

2316

#

2317

# Google BigQuery:

2318

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2319

"experiments": [ # The list of experiments to enable.

2320

"A String",

2321

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

2322

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2323

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2324

# options are passed through the service and are used to recreate the

2325

# SDK pipeline options on the worker in a language agnostic and platform

2326

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2327

"a_key": "", # Properties of the object.

2328

},

2329

"userAgent": { # A description of the process that generated the request.

2330

"a_key": "", # Properties of the object.

2331

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2332

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2333

# unspecified, the service will attempt to choose a reasonable

2334

# default. This should be in the form of the API service name,

2335

# e.g. "compute.googleapis.com".

2336

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2337

# specified in order for the job to have workers.

2338

{ # Describes one particular pool of Cloud Dataflow workers to be

2339

# instantiated by the Cloud Dataflow service in order to perform the

2340

# computations required by a job. Note that a workflow job may use

2341

# multiple pools, in order to match the various computational

2342

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2343

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2344

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2345

# using the standard Dataflow task runner. Users should ignore

2346

# this field.

2347

"workflowFileName": "A String", # The file to store the workflow in.

2348

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2349

# will not be uploaded.

2350

#

2351

# The supported resource type is:

2352

#

2353

# Google Cloud Storage:

2354

# storage.googleapis.com/{bucket}/{object}

2355

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

2356

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2357

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2358

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2359

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2360

# "shuffle/v1beta1".

2361

"workerId": "A String", # The ID of the worker running this pipeline.

2362

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2363

#

2364

# When workers access Google Cloud APIs, they logically do so via

2365

# relative URLs. If this field is specified, it supplies the base

2366

# URL to use for resolving these relative URLs. The normative

2367

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2368

# Locators".

2369

#

2370

# If not specified, the default value is "http://www.googleapis.com/"

2371

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2372

# "dataflow/v1b3/projects".

2373

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2374

# storage.

2375

#

2376

# The supported resource type is:

2377

#

2378

# Google Cloud Storage:

2379

#

2380

# storage.googleapis.com/{bucket}/{object}

2381

# bucket.storage.googleapis.com/{object}

2382

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2383

"vmId": "A String", # The ID string of the VM.

2384

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2385

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

2386

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2387

# access the Cloud Dataflow API.

2388

"A String",

2389

],

2390

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2391

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2392

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2393

#

2394

# When workers access Google Cloud APIs, they logically do so via

2395

# relative URLs. If this field is specified, it supplies the base

2396

# URL to use for resolving these relative URLs. The normative

2397

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2398

# Locators".

2399

#

2400

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2401

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2402

# taskrunner; e.g. "wheel".

2403

"languageHint": "A String", # The suggested backend language.

2404

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2405

# console.

2406

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2407

"logDir": "A String", # The directory on the VM to store logs.

2408

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

2409

"harnessCommand": "A String", # The command to launch the worker harness.

2410

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2411

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2412

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

2413

# The supported resource type is:

2414

#

2415

# Google Cloud Storage:

2416

# storage.googleapis.com/{bucket}/{object}

2417

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2418

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2419

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2420

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2421

# are supported.

2422

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2423

# service will attempt to choose a reasonable default.

2424

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2425

# the service will use the network "default".

2426

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2427

# will attempt to choose a reasonable default.

2428

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2429

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2430

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2431

{ # Describes the data disk used by a workflow job.

2432

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2433

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2434

# attempt to choose a reasonable default.

2435

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2436

# must be a disk type appropriate to the project and zone in which

2437

# the workers will run. If unknown or unspecified, the service

2438

# will attempt to choose a reasonable default.

2439

#

2440

# For example, the standard persistent disk type is a resource name

2441

# typically ending in "pd-standard". If SSD persistent disks are

2442

# available, the resource name typically ends with "pd-ssd". The

2443

# actual valid values are defined the Google Compute Engine API,

2444

# not by the Cloud Dataflow API; consult the Google Compute Engine

2445

# documentation for more information about determining the set of

2446

# available disk types for a particular project and zone.

2447

#

2448

# Google Compute Engine Disk types are local to a particular

2449

# project in a particular zone, and so the resource name will

2450

# typically look something like this:

2451

#

2452

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2453

},

2454

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2455

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2456

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2457

# `TEARDOWN_NEVER`.

2458

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2459

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2460

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2461

# down.

2462

#

2463

# If the workers are not torn down by the service, they will

2464

# continue to run and use Google Compute Engine VM resources in the

2465

# user's project until they are explicitly terminated by the user.

2466

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2467

# policy except for small, manually supervised test jobs.

2468

#

2469

# If unknown or unspecified, the service will attempt to choose a reasonable

2470

# default.

2471

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2472

# Compute Engine API.

2473

"ipConfiguration": "A String", # Configuration for VM IPs.

2474

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2475

# service will choose a number of threads (according to the number of cores

2476

# on the selected machine type for batch, or 1 by convention for streaming).

2477

"poolArgs": { # Extra arguments for this worker pool.

2478

"a_key": "", # Properties of the object. Contains field @type with type URL.

2479

},

2480

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2481

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2482

# attempt to choose a reasonable default.

2483

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2484

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2485

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2486

# the form "regions/REGION/subnetworks/SUBNETWORK".

2487

"packages": [ # Packages to be installed on workers.

2488

{ # The packages that must be installed in order for a worker to run the

2489

# steps of the Cloud Dataflow job that will be assigned to its worker

2490

# pool.

2491

#

2492

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2493

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2494

# might use this to install jars containing the user's code and all of the

2495

# various dependencies (libraries, data files, etc.) required in order

2496

# for that code to run.

2497

"location": "A String", # The resource to read the package from. The supported resource type is:

2498

#

2499

# Google Cloud Storage:

2500

#

2501

# storage.googleapis.com/{bucket}

2502

# bucket.storage.googleapis.com/

2503

"name": "A String", # The name of the package.

2504

},

2505

],

2506

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

2507

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

2508

"algorithm": "A String", # The algorithm to use for autoscaling.

2509

},

2510

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

2511

# select a default set of packages which are useful to worker

2512

# harnesses written in a particular language.

2513

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

2514

# attempt to choose a reasonable default.

2515

"metadata": { # Metadata to set on the Google Compute Engine VMs.

2516

"a_key": "A String",

2517

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2518

},

2519

],

2520

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2521

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

2522

# A description of the user pipeline and stages through which it is executed.

2523

# Created by Cloud Dataflow service. Only retrieved with

2524

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

2525

# form. This data is provided by the Dataflow service for ease of visualizing

2526

# the pipeline and interpretting Dataflow provided metrics.

2527

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

2528

{ # Description of the type, names/ids, and input/outputs for a transform.

2529

"kind": "A String", # Type of transform.

2530

"name": "A String", # User provided name for this transform instance.

2531

"inputCollectionName": [ # User names for all collection inputs to this transform.

2532

"A String",

2533

],

2534

"displayData": [ # Transform-specific display data.

2535

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2536

"shortStrValue": "A String", # A possible additional shorter value to display.

2537

# For example a java_class_name_value of com.mypackage.MyDoFn

2538

# will be stored with MyDoFn as the short_str_value and

2539

# com.mypackage.MyDoFn as the java_class_name value.

2540

# short_str_value can be displayed and java_class_name_value

2541

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2542

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2543

"url": "A String", # An optional full URL.

2544

"floatValue": 3.14, # Contains value if the data is of float type.

2545

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2546

# language namespace (i.e. python module) which defines the display data.

2547

# This allows a dax monitoring system to specially handle the data

2548

# and perform custom rendering.

2549

"javaClassValue": "A String", # Contains value if the data is of java class type.

2550

"label": "A String", # An optional label to display in a dax UI for the element.

2551

"boolValue": True or False, # Contains value if the data is of a boolean type.

2552

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2553

"key": "A String", # The key identifying the display data.

2554

# This is intended to be used as a label for the display data

2555

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2556

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2557

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2558

},

2559

],

2560

"outputCollectionName": [ # User names for all collection outputs to this transform.

2561

"A String",

2562

],

2563

"id": "A String", # SDK generated id of this transform instance.

2564

},

2565

],

2566

"displayData": [ # Pipeline level display data.

2567

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2568

"shortStrValue": "A String", # A possible additional shorter value to display.

2569

# For example a java_class_name_value of com.mypackage.MyDoFn

2570

# will be stored with MyDoFn as the short_str_value and

2571

# com.mypackage.MyDoFn as the java_class_name value.

2572

# short_str_value can be displayed and java_class_name_value

2573

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2574

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2575

"url": "A String", # An optional full URL.

2576

"floatValue": 3.14, # Contains value if the data is of float type.

2577

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

2578

# language namespace (i.e. python module) which defines the display data.

2579

# This allows a dax monitoring system to specially handle the data

2580

# and perform custom rendering.

2581

"javaClassValue": "A String", # Contains value if the data is of java class type.

2582

"label": "A String", # An optional label to display in a dax UI for the element.

2583

"boolValue": True or False, # Contains value if the data is of a boolean type.

2584

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2585

"key": "A String", # The key identifying the display data.

2586

# This is intended to be used as a label for the display data

2587

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2588

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2589

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2590

},

2591

],

2592

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

2593

{ # Description of the composing transforms, names/ids, and input/outputs of a

2594

# stage of execution. Some composing transforms and sources may have been

2595

# generated by the Dataflow service during execution planning.

2596

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

2597

{ # Description of an interstitial value between transforms in an execution

2598

# stage.

2599

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2600

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2601

# source is most closely associated.

2602

"name": "A String", # Dataflow service generated name for this source.

2603

},

2604

],

2605

"kind": "A String", # Type of tranform this stage is executing.

2606

"name": "A String", # Dataflow service generated name for this stage.

2607

"outputSource": [ # Output sources for this stage.

2608

{ # Description of an input or output of an execution stage.

2609

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2610

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2611

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2612

"name": "A String", # Dataflow service generated name for this source.

2613

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2614

},

2615

],

2616

"inputSource": [ # Input sources for this stage.

2617

{ # Description of an input or output of an execution stage.

2618

"userName": "A String", # Human-readable name for this source; may be user or system generated.

2619

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

2620

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2621

"name": "A String", # Dataflow service generated name for this source.

2622

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2623

},

2624

],

2625

"componentTransform": [ # Transforms that comprise this execution stage.

2626

{ # Description of a transform executed as part of an execution stage.

2627

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

2628

"originalTransform": "A String", # User name for the original user transform with which this transform is

2629

# most closely associated.

2630

"name": "A String", # Dataflow service generated name for this source.

2631

},

2632

],

2633

"id": "A String", # Dataflow service generated id for this stage.

2634

},

2635

],

2636

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2637

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2638

{ # Defines a particular step within a Cloud Dataflow job.

2639

#

2640

# A job consists of multiple steps, each of which performs some

2641

# specific operation as part of the overall job. Data is typically

2642

# passed from one step to another as part of the job.

2643

#

2644

# Here's an example of a sequence of steps which together implement a

2645

# Map-Reduce job:

2646

#

2647

# * Read a collection of data from some source, parsing the

2648

# collection's elements.

2649

#

2650

# * Validate the elements.

2651

#

2652

# * Apply a user-defined function to map each element to some value

2653

# and extract an element-specific key value.

2654

#

2655

# * Group elements with the same key into a single element with

2656

# that key, transforming a multiply-keyed collection into a

2657

# uniquely-keyed collection.

2658

#

2659

# * Write the elements out to some data sink.

2660

#

2661

# Note that the Cloud Dataflow service may be used to run many different

2662

# types of jobs, not just Map-Reduce.

2663

"kind": "A String", # The kind of step in the Cloud Dataflow job.

2664

"properties": { # Named properties associated with the step. Each kind of

2665

# predefined step has its own required set of properties.

2666

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2667

"a_key": "", # Properties of the object.

2668

},

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2669

"name": "A String", # The name that identifies the step. This must be unique for each

2670

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2671

},

2672

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2673

"currentStateTime": "A String", # The timestamp associated with the current state.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2674

"tempFiles": [ # A set of files the system should be aware of that are used

2675

# for temporary storage. These temporary files will be

2676

# removed on job completion.

2677

# No duplicates are allowed.

2678

# No file patterns are supported.

2679

#

2680

# The supported files are:

2681

#

2682

# Google Cloud Storage:

2683

#

2684

# storage.googleapis.com/{bucket}/{object}

2685

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

2686

"A String",

2687

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2688

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

2689

# callers cannot mutate it.

2690

{ # A message describing the state of a particular execution stage.

2691

"executionStageName": "A String", # The name of the execution stage.

2692

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

2693

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

2694

},

2695

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2696

"type": "A String", # The type of Cloud Dataflow job.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2697

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

2698

# Cloud Dataflow service.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2699

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

2700

# of the job it replaced.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2701

#

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

2702

# When sending a `CreateJobRequest`, you can update a job by specifying it

2703

# here. The job named here is stopped, and its intermediate state is

2704

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2705

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

2706

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2707

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2708

"a_key": { # Contains information about how a particular

2709

# google.dataflow.v1beta3.Step will be executed.

2710

"stepName": [ # The steps associated with the execution stage.

2711

# Note that stages may have several steps, and that a given step

2712

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2713

"A String",

2714

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2715

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2716

},

2717

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2718

}

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2719

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2720

location: string, The location that contains this job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2721

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2722

Allowed values

2723

1 - v1 error format

2724

2 - v2 error format

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2725

2726

Returns:

2727

An object of the form:

2728

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2729

{ # Defines a job to be run by the Cloud Dataflow service.

2730

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

2731

# If this field is set, the service will ensure its uniqueness.

2732

# The request to create a job will fail if the service has knowledge of a

2733

# previously submitted job with the same client's ID and job name.

2734

# The caller may use this field to ensure idempotence of job

2735

# creation across retried attempts to create a job.

2736

# By default, the field is empty and, in that case, the service ignores it.

2737

"requestedState": "A String", # The job's requested state.

2738

#

2739

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

2740

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

2741

# also be used to directly set a job's requested state to

2742

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

2743

# job if it has not already reached a terminal state.

2744

"name": "A String", # The user-specified Cloud Dataflow job name.

2745

#

2746

# Only one Job with a given name may exist in a project at any

2747

# given time. If a caller attempts to create a Job with the same

2748

# name as an already-existing Job, the attempt returns the

2749

# existing Job.

2750

#

2751

# The name must match the regular expression

2752

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2753

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2754

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

2755

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

2756

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2757

"currentState": "A String", # The current state of the job.

2758

#

2759

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

2760

# specified.

2761

#

2762

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

2763

# terminal state. After a job has reached a terminal state, no

2764

# further state updates may be made.

2765

#

2766

# This field may be mutated by the Cloud Dataflow service;

2767

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2768

"labels": { # User-defined labels for this job.

2769

#

2770

# The labels map can contain no more than 64 entries. Entries of the labels

2771

# map are UTF8 strings that comply with the following restrictions:

2772

#

2773

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

2774

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

2775

# * Both keys and values are additionally constrained to be <= 128 bytes in

2776

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2777

"a_key": "A String",

2778

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2779

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

2780

# corresponding name prefixes of the new job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2781

"a_key": "A String",

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

2782

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2783

"id": "A String", # The unique ID of this job.

2784

#

2785

# This field is set by the Cloud Dataflow service when the Job is

2786

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2787

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

2788

"version": { # A structure describing which components and their versions of the service

2789

# are required in order to run the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2790

"a_key": "", # Properties of the object.

2791

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2792

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2793

# storage. The system will append the suffix "/temp-{JOBNAME} to

2794

# this resource prefix, where {JOBNAME} is the value of the

2795

# job_name field. The resulting bucket and object prefix is used

2796

# as the prefix of the resources used to store temporary data

2797

# needed during the job execution. NOTE: This will override the

2798

# value in taskrunner_settings.

2799

# The supported resource type is:

2800

#

2801

# Google Cloud Storage:

2802

#

2803

# storage.googleapis.com/{bucket}/{object}

2804

# bucket.storage.googleapis.com/{object}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2805

"internalExperiments": { # Experimental settings.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

2806

"a_key": "", # Properties of the object. Contains field @type with type URL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2807

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2808

"dataset": "A String", # The dataset for the current project where various workflow

2809

# related tables are stored.

2810

#

2811

# The supported resource type is:

2812

#

2813

# Google BigQuery:

2814

# bigquery.googleapis.com/{dataset}

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2815

"experiments": [ # The list of experiments to enable.

2816

"A String",

2817

],

Sai Cheemalapati

2016-10-12 14:05:53 -0700

[diff] [blame]

2818

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2819

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

2820

# options are passed through the service and are used to recreate the

2821

# SDK pipeline options on the worker in a language agnostic and platform

2822

# independent way.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2823

"a_key": "", # Properties of the object.

2824

},

2825

"userAgent": { # A description of the process that generated the request.

2826

"a_key": "", # Properties of the object.

2827

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2828

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

2829

# unspecified, the service will attempt to choose a reasonable

2830

# default. This should be in the form of the API service name,

2831

# e.g. "compute.googleapis.com".

2832

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

2833

# specified in order for the job to have workers.

2834

{ # Describes one particular pool of Cloud Dataflow workers to be

2835

# instantiated by the Cloud Dataflow service in order to perform the

2836

# computations required by a job. Note that a workflow job may use

2837

# multiple pools, in order to match the various computational

2838

# requirements of the various stages of the job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2839

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2840

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

2841

# using the standard Dataflow task runner. Users should ignore

2842

# this field.

2843

"workflowFileName": "A String", # The file to store the workflow in.

2844

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

2845

# will not be uploaded.

2846

#

2847

# The supported resource type is:

2848

#

2849

# Google Cloud Storage:

2850

# storage.googleapis.com/{bucket}/{object}

2851

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

2852

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2853

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

2854

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

2855

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

2856

# "shuffle/v1beta1".

2857

"workerId": "A String", # The ID of the worker running this pipeline.

2858

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

2859

#

2860

# When workers access Google Cloud APIs, they logically do so via

2861

# relative URLs. If this field is specified, it supplies the base

2862

# URL to use for resolving these relative URLs. The normative

2863

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2864

# Locators".

2865

#

2866

# If not specified, the default value is "http://www.googleapis.com/"

2867

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

2868

# "dataflow/v1b3/projects".

2869

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

2870

# storage.

2871

#

2872

# The supported resource type is:

2873

#

2874

# Google Cloud Storage:

2875

#

2876

# storage.googleapis.com/{bucket}/{object}

2877

# bucket.storage.googleapis.com/{object}

2878

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2879

"vmId": "A String", # The ID string of the VM.

2880

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

2881

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

2882

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

2883

# access the Cloud Dataflow API.

2884

"A String",

2885

],

2886

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

2887

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2888

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

2889

#

2890

# When workers access Google Cloud APIs, they logically do so via

2891

# relative URLs. If this field is specified, it supplies the base

2892

# URL to use for resolving these relative URLs. The normative

2893

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

2894

# Locators".

2895

#

2896

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2897

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

2898

# taskrunner; e.g. "wheel".

2899

"languageHint": "A String", # The suggested backend language.

2900

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

2901

# console.

2902

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

2903

"logDir": "A String", # The directory on the VM to store logs.

2904

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

2905

"harnessCommand": "A String", # The command to launch the worker harness.

2906

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

2907

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2908

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame]

2909

# The supported resource type is:

2910

#

2911

# Google Cloud Storage:

2912

# storage.googleapis.com/{bucket}/{object}

2913

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2914

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2915

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2916

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

2917

# are supported.

2918

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

2919

# service will attempt to choose a reasonable default.

2920

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

2921

# the service will use the network "default".

2922

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

2923

# will attempt to choose a reasonable default.

2924

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

2925

# attempt to choose a reasonable default.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2926

"dataDisks": [ # Data disks that are used by a VM in this workflow.

2927

{ # Describes the data disk used by a workflow job.

2928

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2929

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

2930

# attempt to choose a reasonable default.

2931

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

2932

# must be a disk type appropriate to the project and zone in which

2933

# the workers will run. If unknown or unspecified, the service

2934

# will attempt to choose a reasonable default.

2935

#

2936

# For example, the standard persistent disk type is a resource name

2937

# typically ending in "pd-standard". If SSD persistent disks are

2938

# available, the resource name typically ends with "pd-ssd". The

2939

# actual valid values are defined the Google Compute Engine API,

2940

# not by the Cloud Dataflow API; consult the Google Compute Engine

2941

# documentation for more information about determining the set of

2942

# available disk types for a particular project and zone.

2943

#

2944

# Google Compute Engine Disk types are local to a particular

2945

# project in a particular zone, and so the resource name will

2946

# typically look something like this:

2947

#

2948

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

2949

},

2950

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2951

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

2952

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

2953

# `TEARDOWN_NEVER`.

2954

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

2955

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

2956

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

2957

# down.

2958

#

2959

# If the workers are not torn down by the service, they will

2960

# continue to run and use Google Compute Engine VM resources in the

2961

# user's project until they are explicitly terminated by the user.

2962

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

2963

# policy except for small, manually supervised test jobs.

2964

#

2965

# If unknown or unspecified, the service will attempt to choose a reasonable

2966

# default.

2967

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

2968

# Compute Engine API.

2969

"ipConfiguration": "A String", # Configuration for VM IPs.

2970

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

2971

# service will choose a number of threads (according to the number of cores

2972

# on the selected machine type for batch, or 1 by convention for streaming).

2973

"poolArgs": { # Extra arguments for this worker pool.

2974

"a_key": "", # Properties of the object. Contains field @type with type URL.

2975

},

2976

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

2977

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

2978

# attempt to choose a reasonable default.

2979

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

2980

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

2981

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

2982

# the form "regions/REGION/subnetworks/SUBNETWORK".

2983

"packages": [ # Packages to be installed on workers.

2984

{ # The packages that must be installed in order for a worker to run the

2985

# steps of the Cloud Dataflow job that will be assigned to its worker

2986

# pool.

2987

#

2988

# This is the mechanism by which the Cloud Dataflow SDK causes code to

2989

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

2990

# might use this to install jars containing the user's code and all of the

2991

# various dependencies (libraries, data files, etc.) required in order

2992

# for that code to run.

2993

"location": "A String", # The resource to read the package from. The supported resource type is:

2994

#

2995

# Google Cloud Storage:

2996

#

2997

# storage.googleapis.com/{bucket}

2998

# bucket.storage.googleapis.com/

2999

"name": "A String", # The name of the package.

3000

},

3001

],

3002

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

3003

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

3004

"algorithm": "A String", # The algorithm to use for autoscaling.

3005

},

3006

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

3007

# select a default set of packages which are useful to worker

3008

# harnesses written in a particular language.

3009

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

3010

# attempt to choose a reasonable default.

3011

"metadata": { # Metadata to set on the Google Compute Engine VMs.

3012

"a_key": "A String",

3013

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3014

},

3015

],

3016

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3017

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

3018

# A description of the user pipeline and stages through which it is executed.

3019

# Created by Cloud Dataflow service. Only retrieved with

3020

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

3021

# form. This data is provided by the Dataflow service for ease of visualizing

3022

# the pipeline and interpretting Dataflow provided metrics.

3023

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

3024

{ # Description of the type, names/ids, and input/outputs for a transform.

3025

"kind": "A String", # Type of transform.

3026

"name": "A String", # User provided name for this transform instance.

3027

"inputCollectionName": [ # User names for all collection inputs to this transform.

3028

"A String",

3029

],

3030

"displayData": [ # Transform-specific display data.

3031

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3032

"shortStrValue": "A String", # A possible additional shorter value to display.

3033

# For example a java_class_name_value of com.mypackage.MyDoFn

3034

# will be stored with MyDoFn as the short_str_value and

3035

# com.mypackage.MyDoFn as the java_class_name value.

3036

# short_str_value can be displayed and java_class_name_value

3037

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

3038

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3039

"url": "A String", # An optional full URL.

3040

"floatValue": 3.14, # Contains value if the data is of float type.

3041

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

3042

# language namespace (i.e. python module) which defines the display data.

3043

# This allows a dax monitoring system to specially handle the data

3044

# and perform custom rendering.

3045

"javaClassValue": "A String", # Contains value if the data is of java class type.

3046

"label": "A String", # An optional label to display in a dax UI for the element.

3047

"boolValue": True or False, # Contains value if the data is of a boolean type.

3048

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

3049

"key": "A String", # The key identifying the display data.

3050

# This is intended to be used as a label for the display data

3051

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3052

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

3053

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3054

},

3055

],

3056

"outputCollectionName": [ # User names for all collection outputs to this transform.

3057

"A String",

3058

],

3059

"id": "A String", # SDK generated id of this transform instance.

3060

},

3061

],

3062

"displayData": [ # Pipeline level display data.

3063

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3064

"shortStrValue": "A String", # A possible additional shorter value to display.

3065

# For example a java_class_name_value of com.mypackage.MyDoFn

3066

# will be stored with MyDoFn as the short_str_value and

3067

# com.mypackage.MyDoFn as the java_class_name value.

3068

# short_str_value can be displayed and java_class_name_value

3069

# will be displayed as a tooltip.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

3070

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3071

"url": "A String", # An optional full URL.

3072

"floatValue": 3.14, # Contains value if the data is of float type.

3073

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

3074

# language namespace (i.e. python module) which defines the display data.

3075

# This allows a dax monitoring system to specially handle the data

3076

# and perform custom rendering.

3077

"javaClassValue": "A String", # Contains value if the data is of java class type.

3078

"label": "A String", # An optional label to display in a dax UI for the element.

3079

"boolValue": True or False, # Contains value if the data is of a boolean type.

3080

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

3081

"key": "A String", # The key identifying the display data.

3082

# This is intended to be used as a label for the display data

3083

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3084

"int64Value": "A String", # Contains value if the data is of int64 type.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

3085

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3086

},

3087

],

3088

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

3089

{ # Description of the composing transforms, names/ids, and input/outputs of a

3090

# stage of execution. Some composing transforms and sources may have been

3091

# generated by the Dataflow service during execution planning.

3092

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

3093

{ # Description of an interstitial value between transforms in an execution

3094

# stage.

3095

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3096

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3097

# source is most closely associated.

3098

"name": "A String", # Dataflow service generated name for this source.

3099

},

3100

],

3101

"kind": "A String", # Type of tranform this stage is executing.

3102

"name": "A String", # Dataflow service generated name for this stage.

3103

"outputSource": [ # Output sources for this stage.

3104

{ # Description of an input or output of an execution stage.

3105

"userName": "A String", # Human-readable name for this source; may be user or system generated.

3106

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3107

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

3108

"name": "A String", # Dataflow service generated name for this source.

3109

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3110

},

3111

],

3112

"inputSource": [ # Input sources for this stage.

3113

{ # Description of an input or output of an execution stage.

3114

"userName": "A String", # Human-readable name for this source; may be user or system generated.

3115

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

3116

# source is most closely associated.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

3117

"name": "A String", # Dataflow service generated name for this source.

3118

"sizeBytes": "A String", # Size of the source, if measurable.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3119

},

3120

],

3121

"componentTransform": [ # Transforms that comprise this execution stage.

3122

{ # Description of a transform executed as part of an execution stage.

3123

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

3124

"originalTransform": "A String", # User name for the original user transform with which this transform is

3125

# most closely associated.

3126

"name": "A String", # Dataflow service generated name for this source.

3127

},

3128

],

3129

"id": "A String", # Dataflow service generated id for this stage.

3130

},

3131

],

3132

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3133

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3134

{ # Defines a particular step within a Cloud Dataflow job.

3135

#

3136

# A job consists of multiple steps, each of which performs some

3137

# specific operation as part of the overall job. Data is typically

3138

# passed from one step to another as part of the job.

3139

#

3140

# Here's an example of a sequence of steps which together implement a

3141

# Map-Reduce job:

3142

#

3143

# * Read a collection of data from some source, parsing the

3144

# collection's elements.

3145

#

3146

# * Validate the elements.

3147

#

3148

# * Apply a user-defined function to map each element to some value

3149

# and extract an element-specific key value.

3150

#

3151

# * Group elements with the same key into a single element with

3152

# that key, transforming a multiply-keyed collection into a

3153

# uniquely-keyed collection.

3154

#

3155

# * Write the elements out to some data sink.

3156

#

3157

# Note that the Cloud Dataflow service may be used to run many different

3158

# types of jobs, not just Map-Reduce.

3159

"kind": "A String", # The kind of step in the Cloud Dataflow job.

3160

"properties": { # Named properties associated with the step. Each kind of

3161

# predefined step has its own required set of properties.

3162

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3163

"a_key": "", # Properties of the object.

3164

},

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

3165

"name": "A String", # The name that identifies the step. This must be unique for each

3166

# step with respect to all other steps in the Cloud Dataflow job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3167

},

3168

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

3169

"currentStateTime": "A String", # The timestamp associated with the current state.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3170

"tempFiles": [ # A set of files the system should be aware of that are used

3171

# for temporary storage. These temporary files will be

3172

# removed on job completion.

3173

# No duplicates are allowed.

3174

# No file patterns are supported.

3175

#

3176

# The supported files are:

3177

#

3178

# Google Cloud Storage:

3179

#

3180

# storage.googleapis.com/{bucket}/{object}

3181

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-02-19 16:02:29 -0800

[diff] [blame]

3182

"A String",

3183

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

3184

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

3185

# callers cannot mutate it.

3186

{ # A message describing the state of a particular execution stage.

3187

"executionStageName": "A String", # The name of the execution stage.

3188

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

3189

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

3190

},

3191

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3192

"type": "A String", # The type of Cloud Dataflow job.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame^]

3193

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

3194

# Cloud Dataflow service.

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

3195

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

3196

# of the job it replaced.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3197

#

Thomas Coffee

2017-03-27 10:39:26 -0700

[diff] [blame]

3198

# When sending a `CreateJobRequest`, you can update a job by specifying it

3199

# here. The job named here is stopped, and its intermediate state is

3200

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3201

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

3202

# isn't contained in the submitted job.

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3203

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

3204

"a_key": { # Contains information about how a particular

3205

# google.dataflow.v1beta3.Step will be executed.

3206

"stepName": [ # The steps associated with the execution stage.

3207

# Note that stages may have several steps, and that a given step

3208

# might be run by more than one stage.

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

3209

"A String",

3210

],

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

3211

},

Nathaniel Manista

2015-06-15 16:44:50 +0000

[diff] [blame]

3212

},

3213

},

Takashi Matsuo

2015-09-11 13:55:40 -0700

[diff] [blame]

3214

}</pre>

Nathaniel Manista