Blame - docs/dyn/dataflow_v1b3.projects.templates.html - platform/external/python/google-api-python-client

2017-03-13 12:12:03 -0400

[diff] [blame]

79

<p class="firstline">Creates a Cloud Dataflow job from a template.</p>

80

81

<code><a href="#get">get(projectId, gcsPath=None, x__xgafv=None, view=None)</a></code></p>

82

<p class="firstline">Get the template associated with a template.</p>

83

84

<code><a href="#launch">launch(projectId, body, dryRun=None, gcsPath=None, x__xgafv=None)</a></code></p>

85

<p class="firstline">Launch a template.</p>

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

86

<h3>Method Details</h3>

87

88

<code class="details" id="create">create(projectId, body, x__xgafv=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

89

<pre>Creates a Cloud Dataflow job from a template.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

90

91

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

92

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

93

body: object, The request body. (required)

94

The object takes the form of:

95

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

96

{ # A request to create a Cloud Dataflow job from a template.

97

"environment": { # The environment values to set at runtime. # The runtime environment for the job.

98

"bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.

99

# Use with caution.

100

"tempLocation": "A String", # The Cloud Storage path to use for temporary files.

101

# Must be a valid Cloud Storage URL, beginning with `gs://`.

102

"serviceAccountEmail": "A String", # The email address of the service account to run the job as.

103

"zone": "A String", # The Compute Engine [availability zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)

104

# for launching worker instances to run your pipeline.

105

"maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made

106

# available to your pipeline during execution, from 1 to 1000.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

107

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

108

"gcsPath": "A String", # Required. A Cloud Storage path to the template from which to

109

# create the job.

110

# Must be a valid Cloud Storage URL, beginning with `gs://`.

111

"parameters": { # The runtime parameters to pass to the job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

112

"a_key": "A String",

113

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

114

"jobName": "A String", # Required. The job name to use for the created job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

115

}

116

117

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

118

Allowed values

119

1 - v1 error format

120

2 - v2 error format

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

121

122

Returns:

123

An object of the form:

124

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

125

{ # Defines a job to be run by the Cloud Dataflow service.

126

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

127

# If this field is set, the service will ensure its uniqueness.

128

# The request to create a job will fail if the service has knowledge of a

129

# previously submitted job with the same client's ID and job name.

130

# The caller may use this field to ensure idempotence of job

131

# creation across retried attempts to create a job.

132

# By default, the field is empty and, in that case, the service ignores it.

133

"requestedState": "A String", # The job's requested state.

134

#

135

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

136

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

137

# also be used to directly set a job's requested state to

138

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

139

# job if it has not already reached a terminal state.

140

"name": "A String", # The user-specified Cloud Dataflow job name.

141

#

142

# Only one Job with a given name may exist in a project at any

143

# given time. If a caller attempts to create a Job with the same

144

# name as an already-existing Job, the attempt returns the

145

# existing Job.

146

#

147

# The name must match the regular expression

148

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

149

"currentStateTime": "A String", # The timestamp associated with the current state.

150

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

151

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

152

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

153

"labels": { # User-defined labels for this job.

154

#

155

# The labels map can contain no more than 64 entries. Entries of the labels

156

# map are UTF8 strings that comply with the following restrictions:

157

#

158

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

159

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

160

# * Both keys and values are additionally constrained to be <= 128 bytes in

161

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

162

"a_key": "A String",

163

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

164

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

165

# corresponding name prefixes of the new job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

166

"a_key": "A String",

167

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

168

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

169

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

170

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

171

"version": { # A structure describing which components and their versions of the service

172

# are required in order to run the job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

173

"a_key": "", # Properties of the object.

174

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

175

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

176

# storage. The system will append the suffix "/temp-{JOBNAME} to

177

# this resource prefix, where {JOBNAME} is the value of the

178

# job_name field. The resulting bucket and object prefix is used

179

# as the prefix of the resources used to store temporary data

180

# needed during the job execution. NOTE: This will override the

181

# value in taskrunner_settings.

182

# The supported resource type is:

183

#

184

# Google Cloud Storage:

185

#

186

# storage.googleapis.com/{bucket}/{object}

187

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

188

"internalExperiments": { # Experimental settings.

189

"a_key": "", # Properties of the object. Contains field @type with type URL.

190

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

191

"dataset": "A String", # The dataset for the current project where various workflow

192

# related tables are stored.

193

#

194

# The supported resource type is:

195

#

196

# Google BigQuery:

197

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

198

"experiments": [ # The list of experiments to enable.

199

"A String",

200

],

Sai Cheemalapati

ea3a5e1

2016-10-12 14:05:53 -0700

[diff] [blame]

201

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

202

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

203

# options are passed through the service and are used to recreate the

204

# SDK pipeline options on the worker in a language agnostic and platform

205

# independent way.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

206

"a_key": "", # Properties of the object.

207

},

208

"userAgent": { # A description of the process that generated the request.

209

"a_key": "", # Properties of the object.

210

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

211

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

212

# unspecified, the service will attempt to choose a reasonable

213

# default. This should be in the form of the API service name,

214

# e.g. "compute.googleapis.com".

215

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

216

# specified in order for the job to have workers.

217

{ # Describes one particular pool of Cloud Dataflow workers to be

218

# instantiated by the Cloud Dataflow service in order to perform the

219

# computations required by a job. Note that a workflow job may use

220

# multiple pools, in order to match the various computational

221

# requirements of the various stages of the job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

222

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

223

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

224

# using the standard Dataflow task runner. Users should ignore

225

# this field.

226

"workflowFileName": "A String", # The file to store the workflow in.

227

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

228

# will not be uploaded.

229

#

230

# The supported resource type is:

231

#

232

# Google Cloud Storage:

233

# storage.googleapis.com/{bucket}/{object}

234

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

235

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

236

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

237

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

238

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

239

"vmId": "A String", # The ID string of the VM.

240

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

241

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

242

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

243

# access the Cloud Dataflow API.

244

"A String",

245

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

246

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

247

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

248

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

249

# "shuffle/v1beta1".

250

"workerId": "A String", # The ID of the worker running this pipeline.

251

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

252

#

253

# When workers access Google Cloud APIs, they logically do so via

254

# relative URLs. If this field is specified, it supplies the base

255

# URL to use for resolving these relative URLs. The normative

256

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

257

# Locators".

258

#

259

# If not specified, the default value is "http://www.googleapis.com/"

260

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

261

# "dataflow/v1b3/projects".

262

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

263

# storage.

264

#

265

# The supported resource type is:

266

#

267

# Google Cloud Storage:

268

#

269

# storage.googleapis.com/{bucket}/{object}

270

# bucket.storage.googleapis.com/{object}

271

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

272

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

273

# taskrunner; e.g. "wheel".

274

"languageHint": "A String", # The suggested backend language.

275

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

276

# console.

277

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

278

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

279

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

280

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

281

#

282

# When workers access Google Cloud APIs, they logically do so via

283

# relative URLs. If this field is specified, it supplies the base

284

# URL to use for resolving these relative URLs. The normative

285

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

286

# Locators".

287

#

288

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

289

"harnessCommand": "A String", # The command to launch the worker harness.

290

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

291

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

292

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

293

# The supported resource type is:

294

#

295

# Google Cloud Storage:

296

# storage.googleapis.com/{bucket}/{object}

297

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

298

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

299

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

300

# are supported.

301

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

302

# service will attempt to choose a reasonable default.

303

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

304

# the service will use the network "default".

305

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

306

# will attempt to choose a reasonable default.

307

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

308

# attempt to choose a reasonable default.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

309

"dataDisks": [ # Data disks that are used by a VM in this workflow.

310

{ # Describes the data disk used by a workflow job.

311

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

312

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

313

# attempt to choose a reasonable default.

314

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

315

# must be a disk type appropriate to the project and zone in which

316

# the workers will run. If unknown or unspecified, the service

317

# will attempt to choose a reasonable default.

318

#

319

# For example, the standard persistent disk type is a resource name

320

# typically ending in "pd-standard". If SSD persistent disks are

321

# available, the resource name typically ends with "pd-ssd". The

322

# actual valid values are defined the Google Compute Engine API,

323

# not by the Cloud Dataflow API; consult the Google Compute Engine

324

# documentation for more information about determining the set of

325

# available disk types for a particular project and zone.

326

#

327

# Google Compute Engine Disk types are local to a particular

328

# project in a particular zone, and so the resource name will

329

# typically look something like this:

330

#

331

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

332

},

333

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

334

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

335

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

336

# `TEARDOWN_NEVER`.

337

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

338

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

339

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

340

# down.

341

#

342

# If the workers are not torn down by the service, they will

343

# continue to run and use Google Compute Engine VM resources in the

344

# user's project until they are explicitly terminated by the user.

345

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

346

# policy except for small, manually supervised test jobs.

347

#

348

# If unknown or unspecified, the service will attempt to choose a reasonable

349

# default.

350

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

351

# Compute Engine API.

352

"ipConfiguration": "A String", # Configuration for VM IPs.

353

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

354

# service will choose a number of threads (according to the number of cores

355

# on the selected machine type for batch, or 1 by convention for streaming).

356

"poolArgs": { # Extra arguments for this worker pool.

357

"a_key": "", # Properties of the object. Contains field @type with type URL.

358

},

359

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

360

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

361

# attempt to choose a reasonable default.

362

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

363

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

364

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

365

# the form "regions/REGION/subnetworks/SUBNETWORK".

366

"packages": [ # Packages to be installed on workers.

367

{ # The packages that must be installed in order for a worker to run the

368

# steps of the Cloud Dataflow job that will be assigned to its worker

369

# pool.

370

#

371

# This is the mechanism by which the Cloud Dataflow SDK causes code to

372

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

373

# might use this to install jars containing the user's code and all of the

374

# various dependencies (libraries, data files, etc.) required in order

375

# for that code to run.

376

"location": "A String", # The resource to read the package from. The supported resource type is:

377

#

378

# Google Cloud Storage:

379

#

380

# storage.googleapis.com/{bucket}

381

# bucket.storage.googleapis.com/

382

"name": "A String", # The name of the package.

383

},

384

],

385

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

386

"algorithm": "A String", # The algorithm to use for autoscaling.

387

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

388

},

389

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

390

# select a default set of packages which are useful to worker

391

# harnesses written in a particular language.

392

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

393

# attempt to choose a reasonable default.

394

"metadata": { # Metadata to set on the Google Compute Engine VMs.

395

"a_key": "A String",

396

},

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

397

},

398

],

399

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

400

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

401

# of the job it replaced.

402

#

403

# When sending a `CreateJobRequest`, you can update a job by specifying it

404

# here. The job named here is stopped, and its intermediate state is

405

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

406

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

407

# A description of the user pipeline and stages through which it is executed.

408

# Created by Cloud Dataflow service. Only retrieved with

409

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

410

# form. This data is provided by the Dataflow service for ease of visualizing

411

# the pipeline and interpretting Dataflow provided metrics.

412

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

413

{ # Description of the type, names/ids, and input/outputs for a transform.

414

"kind": "A String", # Type of transform.

415

"name": "A String", # User provided name for this transform instance.

416

"inputCollectionName": [ # User names for all collection inputs to this transform.

417

"A String",

418

],

419

"displayData": [ # Transform-specific display data.

420

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

421

"shortStrValue": "A String", # A possible additional shorter value to display.

422

# For example a java_class_name_value of com.mypackage.MyDoFn

423

# will be stored with MyDoFn as the short_str_value and

424

# com.mypackage.MyDoFn as the java_class_name value.

425

# short_str_value can be displayed and java_class_name_value

426

# will be displayed as a tooltip.

427

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

428

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

429

"url": "A String", # An optional full URL.

430

"floatValue": 3.14, # Contains value if the data is of float type.

431

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

432

# language namespace (i.e. python module) which defines the display data.

433

# This allows a dax monitoring system to specially handle the data

434

# and perform custom rendering.

435

"javaClassValue": "A String", # Contains value if the data is of java class type.

436

"label": "A String", # An optional label to display in a dax UI for the element.

437

"boolValue": True or False, # Contains value if the data is of a boolean type.

438

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

439

"key": "A String", # The key identifying the display data.

440

# This is intended to be used as a label for the display data

441

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

442

"int64Value": "A String", # Contains value if the data is of int64 type.

443

},

444

],

445

"outputCollectionName": [ # User names for all collection outputs to this transform.

446

"A String",

447

],

448

"id": "A String", # SDK generated id of this transform instance.

449

},

450

],

451

"displayData": [ # Pipeline level display data.

452

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

453

"shortStrValue": "A String", # A possible additional shorter value to display.

454

# For example a java_class_name_value of com.mypackage.MyDoFn

455

# will be stored with MyDoFn as the short_str_value and

456

# com.mypackage.MyDoFn as the java_class_name value.

457

# short_str_value can be displayed and java_class_name_value

458

# will be displayed as a tooltip.

459

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

460

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

461

"url": "A String", # An optional full URL.

462

"floatValue": 3.14, # Contains value if the data is of float type.

463

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

464

# language namespace (i.e. python module) which defines the display data.

465

# This allows a dax monitoring system to specially handle the data

466

# and perform custom rendering.

467

"javaClassValue": "A String", # Contains value if the data is of java class type.

468

"label": "A String", # An optional label to display in a dax UI for the element.

469

"boolValue": True or False, # Contains value if the data is of a boolean type.

470

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

471

"key": "A String", # The key identifying the display data.

472

# This is intended to be used as a label for the display data

473

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

474

"int64Value": "A String", # Contains value if the data is of int64 type.

475

},

476

],

477

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

478

{ # Description of the composing transforms, names/ids, and input/outputs of a

479

# stage of execution. Some composing transforms and sources may have been

480

# generated by the Dataflow service during execution planning.

481

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

482

{ # Description of an interstitial value between transforms in an execution

483

# stage.

484

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

485

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

486

# source is most closely associated.

487

"name": "A String", # Dataflow service generated name for this source.

488

},

489

],

490

"kind": "A String", # Type of tranform this stage is executing.

491

"name": "A String", # Dataflow service generated name for this stage.

492

"outputSource": [ # Output sources for this stage.

493

{ # Description of an input or output of an execution stage.

494

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

495

"sizeBytes": "A String", # Size of the source, if measurable.

496

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

497

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

498

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

499

},

500

],

501

"inputSource": [ # Input sources for this stage.

502

{ # Description of an input or output of an execution stage.

503

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

504

"sizeBytes": "A String", # Size of the source, if measurable.

505

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

506

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

507

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

508

},

509

],

510

"componentTransform": [ # Transforms that comprise this execution stage.

511

{ # Description of a transform executed as part of an execution stage.

512

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

513

"originalTransform": "A String", # User name for the original user transform with which this transform is

514

# most closely associated.

515

"name": "A String", # Dataflow service generated name for this source.

516

},

517

],

518

"id": "A String", # Dataflow service generated id for this stage.

519

},

520

],

521

},

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

522

"steps": [ # The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

523

{ # Defines a particular step within a Cloud Dataflow job.

524

#

525

# A job consists of multiple steps, each of which performs some

526

# specific operation as part of the overall job. Data is typically

527

# passed from one step to another as part of the job.

528

#

529

# Here's an example of a sequence of steps which together implement a

530

# Map-Reduce job:

531

#

532

# * Read a collection of data from some source, parsing the

533

# collection's elements.

534

#

535

# * Validate the elements.

536

#

537

# * Apply a user-defined function to map each element to some value

538

# and extract an element-specific key value.

539

#

540

# * Group elements with the same key into a single element with

541

# that key, transforming a multiply-keyed collection into a

542

# uniquely-keyed collection.

543

#

544

# * Write the elements out to some data sink.

545

#

546

# Note that the Cloud Dataflow service may be used to run many different

547

# types of jobs, not just Map-Reduce.

548

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

549

"name": "A String", # The name that identifies the step. This must be unique for each

550

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

551

"properties": { # Named properties associated with the step. Each kind of

552

# predefined step has its own required set of properties.

553

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

554

"a_key": "", # Properties of the object.

555

},

556

},

557

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

558

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

559

"tempFiles": [ # A set of files the system should be aware of that are used

560

# for temporary storage. These temporary files will be

561

# removed on job completion.

562

# No duplicates are allowed.

563

# No file patterns are supported.

564

#

565

# The supported files are:

566

#

567

# Google Cloud Storage:

568

#

569

# storage.googleapis.com/{bucket}/{object}

570

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

571

"A String",

572

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

573

"type": "A String", # The type of Cloud Dataflow job.

574

"id": "A String", # The unique ID of this job.

575

#

576

# This field is set by the Cloud Dataflow service when the Job is

577

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

578

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

579

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

580

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

581

# specified.

582

#

583

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

584

# terminal state. After a job has reached a terminal state, no

585

# further state updates may be made.

586

#

587

# This field may be mutated by the Cloud Dataflow service;

588

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

589

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

590

# isn't contained in the submitted job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

591

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

592

"a_key": { # Contains information about how a particular

593

# google.dataflow.v1beta3.Step will be executed.

594

"stepName": [ # The steps associated with the execution stage.

595

# Note that stages may have several steps, and that a given step

596

# might be run by more than one stage.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

"A String",

],

},

},

},

}</pre>

</div>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

605

606

<code class="details" id="get">get(projectId, gcsPath=None, x__xgafv=None, view=None)</code>

607

<pre>Get the template associated with a template.

608

609

Args:

610

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

611

gcsPath: string, Required. A Cloud Storage path to the template from which to

612

create the job.

613

Must be a valid Cloud Storage URL, beginning with `gs://`.

614

x__xgafv: string, V1 error format.

Allowed values

1 - v1 error format

2 - v2 error format

view: string, The view to retrieve. Defaults to METADATA_ONLY.

619

620

Returns:

621

An object of the form:

622

623

{ # The response to a GetTemplate request.

624

"status": { # The `Status` type defines a logical error model that is suitable for different # The status of the get template request. Any problems with the

625

# request will be indicated in the error_details.

626

# programming environments, including REST APIs and RPC APIs. It is used by

627

# [gRPC](https://github.com/grpc). The error model is designed to be:

628

#

629

# - Simple to use and understand for most users

630

# - Flexible enough to meet unexpected needs

#

# # Overview

#

# The `Status` message contains three pieces of data: error code, error message,

635

# and error details. The error code should be an enum value of

636

# google.rpc.Code, but it may accept additional error codes if needed. The

637

# error message should be a developer-facing English message that helps

638

# developers *understand* and *resolve* the error. If a localized user-facing

639

# error message is needed, put the localized message in the error details or

640

# localize it in the client. The optional error details may contain arbitrary

641

# information about the error. There is a predefined set of error detail types

642

# in the package `google.rpc` which can be used for common error conditions.

#

# # Language mapping

#

# The `Status` message is the logical representation of the error model, but it

647

# is not necessarily the actual wire format. When the `Status` message is

648

# exposed in different client libraries and different wire protocols, it can be

649

# mapped differently. For example, it will likely be mapped to some exceptions

650

# in Java, but more likely mapped to some error codes in C.

#

# # Other uses

#

# The error model and the `Status` message can be used in a variety of

655

# environments, either with or without APIs, to provide a

656

# consistent developer experience across different environments.

657

#

658

# Example uses of this error model include:

659

#

660

# - Partial errors. If a service needs to return partial errors to the client,

661

# it may embed the `Status` in the normal response to indicate the partial

662

# errors.

663

#

664

# - Workflow errors. A typical workflow has multiple steps. Each step may

665

# have a `Status` message for error reporting purpose.

666

#

667

# - Batch operations. If a client uses batch request and batch response, the

668

# `Status` message should be used directly inside batch response, one for

669

# each error sub-response.

670

#

671

# - Asynchronous operations. If an API call embeds asynchronous operation

672

# results in its response, the status of those operations should be

673

# represented directly using the `Status` message.

674

#

675

# - Logging. If some API errors are stored in logs, the message `Status` could

676

# be used directly after any stripping needed for security/privacy reasons.

677

"message": "A String", # A developer-facing error message, which should be in English. Any

678

# user-facing error message should be localized and sent in the

679

# google.rpc.Status.details field, or localized by the client.

680

"code": 42, # The status code, which should be an enum value of google.rpc.Code.

681

"details": [ # A list of messages that carry the error details. There will be a

682

# common set of message types for APIs to use.

683

{

684

"a_key": "", # Properties of the object. Contains field @type with type URL.

},

],

},

"metadata": { # Metadata describing a template. # The template metadata describing the template name, available

689

# parameters, etc.

690

"bypassTempDirValidation": True or False, # If true, will bypass the validation that the temp directory is

691

# writable. This should only be used with templates for pipelines

692

# that are guaranteed not to need to write to the temp directory,

693

# which is subject to change based on the optimizer.

694

"name": "A String", # Required. The name of the template.

695

"parameters": [ # The parameters for the template.

696

{ # Metadata for a specific parameter.

697

"regexes": [ # Optional. Regexes that the parameter must match.

698

"A String",

699

],

700

"helpText": "A String", # Required. The help text to display for the parameter.

701

"name": "A String", # Required. The name of the parameter.

702

"isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.

703

"label": "A String", # Required. The label to display for the parameter.

704

},

705

],

706

"description": "A String", # Optional. A description of the template.

},

}</pre>

</div>

<code class="details" id="launch">launch(projectId, body, dryRun=None, gcsPath=None, x__xgafv=None)</code>

713

<pre>Launch a template.

714

715

Args:

716

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

717

body: object, The request body. (required)

718

The object takes the form of:

719

720

{ # Parameters to provide to the template being launched.

721

"environment": { # The environment values to set at runtime. # The runtime environment for the job.

722

"bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.

723

# Use with caution.

724

"tempLocation": "A String", # The Cloud Storage path to use for temporary files.

725

# Must be a valid Cloud Storage URL, beginning with `gs://`.

726

"serviceAccountEmail": "A String", # The email address of the service account to run the job as.

727

"zone": "A String", # The Compute Engine [availability zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)

728

# for launching worker instances to run your pipeline.

729

"maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made

730

# available to your pipeline during execution, from 1 to 1000.

731

},

732

"parameters": { # The runtime parameters to pass to the job.

733

"a_key": "A String",

734

},

735

"jobName": "A String", # Required. The job name to use for the created job.

736

}

737

738

dryRun: boolean, Whether or not the job should actually be executed after

739

validating parameters. Defaults to false. Validation errors do

740

not cause the HTTP request to fail if true.

741

gcsPath: string, Required. A Cloud Storage path to the template from which to create

742

the job.

743

Must be valid Cloud Storage URL, beginning with 'gs://'.

744

x__xgafv: string, V1 error format.

Allowed values

1 - v1 error format

2 - v2 error format

Returns:

An object of the form:

751

752

{ # Response to the request to launch a template.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

753

"job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and

754

# the job was successfully launched.

755

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

756

# If this field is set, the service will ensure its uniqueness.

757

# The request to create a job will fail if the service has knowledge of a

758

# previously submitted job with the same client's ID and job name.

759

# The caller may use this field to ensure idempotence of job

760

# creation across retried attempts to create a job.

761

# By default, the field is empty and, in that case, the service ignores it.

762

"requestedState": "A String", # The job's requested state.

763

#

764

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

765

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

766

# also be used to directly set a job's requested state to

767

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

768

# job if it has not already reached a terminal state.

769

"name": "A String", # The user-specified Cloud Dataflow job name.

770

#

771

# Only one Job with a given name may exist in a project at any

772

# given time. If a caller attempts to create a Job with the same

773

# name as an already-existing Job, the attempt returns the

774

# existing Job.

775

#

776

# The name must match the regular expression

777

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

778

"currentStateTime": "A String", # The timestamp associated with the current state.

779

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

780

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

781

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

782

"labels": { # User-defined labels for this job.

783

#

784

# The labels map can contain no more than 64 entries. Entries of the labels

785

# map are UTF8 strings that comply with the following restrictions:

786

#

787

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

788

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

789

# * Both keys and values are additionally constrained to be <= 128 bytes in

790

# size.

791

"a_key": "A String",

792

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

793

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

794

# corresponding name prefixes of the new job.

795

"a_key": "A String",

796

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

797

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

798

# Cloud Dataflow service.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

799

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

800

"version": { # A structure describing which components and their versions of the service

801

# are required in order to run the job.

802

"a_key": "", # Properties of the object.

803

},

804

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

805

# storage. The system will append the suffix "/temp-{JOBNAME} to

806

# this resource prefix, where {JOBNAME} is the value of the

807

# job_name field. The resulting bucket and object prefix is used

808

# as the prefix of the resources used to store temporary data

809

# needed during the job execution. NOTE: This will override the

810

# value in taskrunner_settings.

811

# The supported resource type is:

812

#

813

# Google Cloud Storage:

814

#

815

# storage.googleapis.com/{bucket}/{object}

816

# bucket.storage.googleapis.com/{object}

817

"internalExperiments": { # Experimental settings.

818

"a_key": "", # Properties of the object. Contains field @type with type URL.

819

},

820

"dataset": "A String", # The dataset for the current project where various workflow

821

# related tables are stored.

822

#

823

# The supported resource type is:

824

#

825

# Google BigQuery:

826

# bigquery.googleapis.com/{dataset}

827

"experiments": [ # The list of experiments to enable.

828

"A String",

829

],

830

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

831

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

832

# options are passed through the service and are used to recreate the

833

# SDK pipeline options on the worker in a language agnostic and platform

834

# independent way.

835

"a_key": "", # Properties of the object.

836

},

837

"userAgent": { # A description of the process that generated the request.

838

"a_key": "", # Properties of the object.

839

},

840

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

841

# unspecified, the service will attempt to choose a reasonable

842

# default. This should be in the form of the API service name,

843

# e.g. "compute.googleapis.com".

844

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

845

# specified in order for the job to have workers.

846

{ # Describes one particular pool of Cloud Dataflow workers to be

847

# instantiated by the Cloud Dataflow service in order to perform the

848

# computations required by a job. Note that a workflow job may use

849

# multiple pools, in order to match the various computational

850

# requirements of the various stages of the job.

851

"diskSourceImage": "A String", # Fully qualified source image for disks.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

852

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

853

# using the standard Dataflow task runner. Users should ignore

854

# this field.

855

"workflowFileName": "A String", # The file to store the workflow in.

856

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

857

# will not be uploaded.

858

#

859

# The supported resource type is:

860

#

861

# Google Cloud Storage:

862

# storage.googleapis.com/{bucket}/{object}

863

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

864

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

865

# taskrunner; e.g. "root".

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

866

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

867

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

868

"vmId": "A String", # The ID string of the VM.

869

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

870

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

871

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

872

# access the Cloud Dataflow API.

873

"A String",

874

],

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

875

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

876

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

877

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

878

# "shuffle/v1beta1".

879

"workerId": "A String", # The ID of the worker running this pipeline.

880

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

881

#

882

# When workers access Google Cloud APIs, they logically do so via

883

# relative URLs. If this field is specified, it supplies the base

884

# URL to use for resolving these relative URLs. The normative

885

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

886

# Locators".

887

#

888

# If not specified, the default value is "http://www.googleapis.com/"

889

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

890

# "dataflow/v1b3/projects".

891

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

892

# storage.

893

#

894

# The supported resource type is:

895

#

896

# Google Cloud Storage:

897

#

898

# storage.googleapis.com/{bucket}/{object}

899

# bucket.storage.googleapis.com/{object}

900

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

901

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

902

# taskrunner; e.g. "wheel".

903

"languageHint": "A String", # The suggested backend language.

904

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

905

# console.

906

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

907

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

908

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

909

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

910

#

911

# When workers access Google Cloud APIs, they logically do so via

912

# relative URLs. If this field is specified, it supplies the base

913

# URL to use for resolving these relative URLs. The normative

914

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

915

# Locators".

916

#

917

# If not specified, the default value is "http://www.googleapis.com/"

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

918

"harnessCommand": "A String", # The command to launch the worker harness.

919

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

920

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

921

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

922

# The supported resource type is:

923

#

924

# Google Cloud Storage:

925

# storage.googleapis.com/{bucket}/{object}

926

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

927

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

928

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

929

# are supported.

930

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

931

# service will attempt to choose a reasonable default.

932

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

933

# the service will use the network "default".

934

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

935

# will attempt to choose a reasonable default.

936

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

937

# attempt to choose a reasonable default.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

938

"dataDisks": [ # Data disks that are used by a VM in this workflow.

939

{ # Describes the data disk used by a workflow job.

940

"mountPoint": "A String", # Directory in a VM where disk is mounted.

941

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

942

# attempt to choose a reasonable default.

943

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

944

# must be a disk type appropriate to the project and zone in which

945

# the workers will run. If unknown or unspecified, the service

946

# will attempt to choose a reasonable default.

947

#

948

# For example, the standard persistent disk type is a resource name

949

# typically ending in "pd-standard". If SSD persistent disks are

950

# available, the resource name typically ends with "pd-ssd". The

951

# actual valid values are defined the Google Compute Engine API,

952

# not by the Cloud Dataflow API; consult the Google Compute Engine

953

# documentation for more information about determining the set of

954

# available disk types for a particular project and zone.

955

#

956

# Google Compute Engine Disk types are local to a particular

957

# project in a particular zone, and so the resource name will

958

# typically look something like this:

959

#

960

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

961

},

962

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

963

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

964

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

965

# `TEARDOWN_NEVER`.

966

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

967

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

968

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

969

# down.

970

#

971

# If the workers are not torn down by the service, they will

972

# continue to run and use Google Compute Engine VM resources in the

973

# user's project until they are explicitly terminated by the user.

974

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

975

# policy except for small, manually supervised test jobs.

976

#

977

# If unknown or unspecified, the service will attempt to choose a reasonable

978

# default.

979

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

980

# Compute Engine API.

981

"ipConfiguration": "A String", # Configuration for VM IPs.

982

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

983

# service will choose a number of threads (according to the number of cores

984

# on the selected machine type for batch, or 1 by convention for streaming).

985

"poolArgs": { # Extra arguments for this worker pool.

986

"a_key": "", # Properties of the object. Contains field @type with type URL.

987

},

988

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

989

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

990

# attempt to choose a reasonable default.

991

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

992

# harness, residing in Google Container Registry.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

993

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

994

# the form "regions/REGION/subnetworks/SUBNETWORK".

995

"packages": [ # Packages to be installed on workers.

996

{ # The packages that must be installed in order for a worker to run the

997

# steps of the Cloud Dataflow job that will be assigned to its worker

998

# pool.

999

#

1000

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1001

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1002

# might use this to install jars containing the user's code and all of the

1003

# various dependencies (libraries, data files, etc.) required in order

1004

# for that code to run.

1005

"location": "A String", # The resource to read the package from. The supported resource type is:

1006

#

1007

# Google Cloud Storage:

1008

#

1009

# storage.googleapis.com/{bucket}

1010

# bucket.storage.googleapis.com/

1011

"name": "A String", # The name of the package.

1012

},

1013

],

1014

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1015

"algorithm": "A String", # The algorithm to use for autoscaling.

1016

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1017

},

1018

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1019

# select a default set of packages which are useful to worker

1020

# harnesses written in a particular language.

1021

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1022

# attempt to choose a reasonable default.

1023

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1024

"a_key": "A String",

1025

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1026

},

1027

],

1028

},

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1029

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1030

# of the job it replaced.

1031

#

1032

# When sending a `CreateJobRequest`, you can update a job by specifying it

1033

# here. The job named here is stopped, and its intermediate state is

1034

# transferred to this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1035

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1036

# A description of the user pipeline and stages through which it is executed.

1037

# Created by Cloud Dataflow service. Only retrieved with

1038

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1039

# form. This data is provided by the Dataflow service for ease of visualizing

1040

# the pipeline and interpretting Dataflow provided metrics.

1041

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1042

{ # Description of the type, names/ids, and input/outputs for a transform.

1043

"kind": "A String", # Type of transform.

1044

"name": "A String", # User provided name for this transform instance.

1045

"inputCollectionName": [ # User names for all collection inputs to this transform.

1046

"A String",

1047

],

1048

"displayData": [ # Transform-specific display data.

1049

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1050

"shortStrValue": "A String", # A possible additional shorter value to display.

1051

# For example a java_class_name_value of com.mypackage.MyDoFn

1052

# will be stored with MyDoFn as the short_str_value and

1053

# com.mypackage.MyDoFn as the java_class_name value.

1054

# short_str_value can be displayed and java_class_name_value

1055

# will be displayed as a tooltip.

1056

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1057

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1058

"url": "A String", # An optional full URL.

1059

"floatValue": 3.14, # Contains value if the data is of float type.

1060

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1061

# language namespace (i.e. python module) which defines the display data.

1062

# This allows a dax monitoring system to specially handle the data

1063

# and perform custom rendering.

1064

"javaClassValue": "A String", # Contains value if the data is of java class type.

1065

"label": "A String", # An optional label to display in a dax UI for the element.

1066

"boolValue": True or False, # Contains value if the data is of a boolean type.

1067

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1068

"key": "A String", # The key identifying the display data.

1069

# This is intended to be used as a label for the display data

1070

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1071

"int64Value": "A String", # Contains value if the data is of int64 type.

1072

},

1073

],

1074

"outputCollectionName": [ # User names for all collection outputs to this transform.

1075

"A String",

1076

],

1077

"id": "A String", # SDK generated id of this transform instance.

1078

},

1079

],

1080

"displayData": [ # Pipeline level display data.

1081

{ # Data provided with a pipeline or transform to provide descriptive info.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1082

"shortStrValue": "A String", # A possible additional shorter value to display.

1083

# For example a java_class_name_value of com.mypackage.MyDoFn

1084

# will be stored with MyDoFn as the short_str_value and

1085

# com.mypackage.MyDoFn as the java_class_name value.

1086

# short_str_value can be displayed and java_class_name_value

1087

# will be displayed as a tooltip.

1088

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1089

"durationValue": "A String", # Contains value if the data is of duration type.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1090

"url": "A String", # An optional full URL.

1091

"floatValue": 3.14, # Contains value if the data is of float type.

1092

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1093

# language namespace (i.e. python module) which defines the display data.

1094

# This allows a dax monitoring system to specially handle the data

1095

# and perform custom rendering.

1096

"javaClassValue": "A String", # Contains value if the data is of java class type.

1097

"label": "A String", # An optional label to display in a dax UI for the element.

1098

"boolValue": True or False, # Contains value if the data is of a boolean type.

1099

"strValue": "A String", # Contains value if the data is of string type.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1100

"key": "A String", # The key identifying the display data.

1101

# This is intended to be used as a label for the display data

1102

# when viewed in a dax monitoring system.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1103

"int64Value": "A String", # Contains value if the data is of int64 type.

1104

},

1105

],

1106

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

1107

{ # Description of the composing transforms, names/ids, and input/outputs of a

1108

# stage of execution. Some composing transforms and sources may have been

1109

# generated by the Dataflow service during execution planning.

1110

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

1111

{ # Description of an interstitial value between transforms in an execution

1112

# stage.

1113

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1114

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1115

# source is most closely associated.

1116

"name": "A String", # Dataflow service generated name for this source.

1117

},

1118

],

1119

"kind": "A String", # Type of tranform this stage is executing.

1120

"name": "A String", # Dataflow service generated name for this stage.

1121

"outputSource": [ # Output sources for this stage.

1122

{ # Description of an input or output of an execution stage.

1123

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1124

"sizeBytes": "A String", # Size of the source, if measurable.

1125

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1126

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1127

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1128

},

1129

],

1130

"inputSource": [ # Input sources for this stage.

1131

{ # Description of an input or output of an execution stage.

1132

"userName": "A String", # Human-readable name for this source; may be user or system generated.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1133

"sizeBytes": "A String", # Size of the source, if measurable.

1134

"name": "A String", # Dataflow service generated name for this source.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1135

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1136

# source is most closely associated.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1137

},

1138

],

1139

"componentTransform": [ # Transforms that comprise this execution stage.

1140

{ # Description of a transform executed as part of an execution stage.

1141

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1142

"originalTransform": "A String", # User name for the original user transform with which this transform is

1143

# most closely associated.

1144

"name": "A String", # Dataflow service generated name for this source.

1145

},

1146

],

1147

"id": "A String", # Dataflow service generated id for this stage.

},

],

},

"steps": [ # The top-level steps that constitute the entire job.

1152

{ # Defines a particular step within a Cloud Dataflow job.

1153

#

1154

# A job consists of multiple steps, each of which performs some

1155

# specific operation as part of the overall job. Data is typically

1156

# passed from one step to another as part of the job.

1157

#

1158

# Here's an example of a sequence of steps which together implement a

1159

# Map-Reduce job:

1160

#

1161

# * Read a collection of data from some source, parsing the

1162

# collection's elements.

1163

#

1164

# * Validate the elements.

1165

#

1166

# * Apply a user-defined function to map each element to some value

1167

# and extract an element-specific key value.

1168

#

1169

# * Group elements with the same key into a single element with

1170

# that key, transforming a multiply-keyed collection into a

1171

# uniquely-keyed collection.

1172

#

1173

# * Write the elements out to some data sink.

1174

#

1175

# Note that the Cloud Dataflow service may be used to run many different

1176

# types of jobs, not just Map-Reduce.

1177

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1178

"name": "A String", # The name that identifies the step. This must be unique for each

1179

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1180

"properties": { # Named properties associated with the step. Each kind of

1181

# predefined step has its own required set of properties.

1182

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

1183

"a_key": "", # Properties of the object.

1184

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1185

},

1186

],

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1187

"location": "A String", # The location that contains this job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1188

"tempFiles": [ # A set of files the system should be aware of that are used

1189

# for temporary storage. These temporary files will be

1190

# removed on job completion.

1191

# No duplicates are allowed.

1192

# No file patterns are supported.

1193

#

1194

# The supported files are:

1195

#

1196

# Google Cloud Storage:

1197

#

1198

# storage.googleapis.com/{bucket}/{object}

1199

# bucket.storage.googleapis.com/{object}

1200

"A String",

1201

],

1202

"type": "A String", # The type of Cloud Dataflow job.

1203

"id": "A String", # The unique ID of this job.

1204

#

1205

# This field is set by the Cloud Dataflow service when the Job is

1206

# created, and is immutable for the life of the job.

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1207

"currentState": "A String", # The current state of the job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1208

#

Sai Cheemalapati

2017-03-24 15:06:46 -0700

[diff] [blame^]

1209

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1210

# specified.

1211

#

1212

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1213

# terminal state. After a job has reached a terminal state, no

1214

# further state updates may be made.

1215

#

1216

# This field may be mutated by the Cloud Dataflow service;

1217

# callers cannot mutate it.

Sai Cheemalapati