Blame - docs/dyn/dataflow_v1b3.projects.templates.html - platform/external/python/google-api-python-client

2016-08-16 12:44:29 -0700

[diff] [blame]

76

<h2>Instance Methods</h2>

77

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

78

<code><a href="#create">create(projectId, body=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

79

<p class="firstline">Creates a Cloud Dataflow job from a template.</p>

80

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

81

<code><a href="#get">get(projectId, view=None, gcsPath=None, location=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

82

<p class="firstline">Get the template associated with a template.</p>

83

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

84

<code><a href="#launch">launch(projectId, body=None, validateOnly=None, gcsPath=None, location=None, dynamicTemplate_gcsPath=None, dynamicTemplate_stagingLocation=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

85

<p class="firstline">Launch a template.</p>

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

86

<h3>Method Details</h3>

87

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

88

<code class="details" id="create">create(projectId, body=None, x__xgafv=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

89

<pre>Creates a Cloud Dataflow job from a template.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

90

91

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

92

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

93

body: object, The request body.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

94

The object takes the form of:

95

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

96

{ # A request to create a Cloud Dataflow job from a template.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

97

"environment": { # The environment values to set at runtime. # The runtime environment for the job.

98

"workerRegion": "A String", # The Compute Engine region

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

99

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

100

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

101

# with worker_zone. If neither worker_region nor worker_zone is specified,

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

102

# default to the control plane's region.

103

"numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.

104

"zone": "A String", # The Compute Engine [availability

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

105

# zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

106

# for launching worker instances to run your pipeline.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

107

# In the future, worker_zone will take precedence.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

108

"workerZone": "A String", # The Compute Engine zone

109

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

110

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

111

# with worker_region. If neither worker_region nor worker_zone is specified,

112

# a zone in the control plane's region is chosen based on available capacity.

113

# If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.

114

"additionalUserLabels": { # Additional user labels to be specified for the job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

115

# Keys and values should follow the restrictions specified in the [labeling

116

# restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)

117

# page.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

118

"a_key": "A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

119

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

120

"additionalExperiments": [ # Additional experiment flags for the job.

121

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

122

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

123

"maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made

124

# available to your pipeline during execution, from 1 to 1000.

125

"serviceAccountEmail": "A String", # The email address of the service account to run the job as.

126

"machineType": "A String", # The machine type to use for the job. Defaults to the value from the

127

# template if not specified.

128

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

129

# the form "regions/REGION/subnetworks/SUBNETWORK".

130

"ipConfiguration": "A String", # Configuration for VM IPs.

131

"kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

132

# Key format is:

133

# projects/<project>/locations/<location>/keyRings/<keyring>/cryptoKeys/<key>

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

134

"bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.

135

# Use with caution.

136

"tempLocation": "A String", # The Cloud Storage path to use for temporary files.

137

# Must be a valid Cloud Storage URL, beginning with `gs://`.

138

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

139

# the service will use the network "default".

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

140

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

141

"location": "A String", # The [regional endpoint]

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

142

# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to

143

# which to direct the request.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

144

"parameters": { # The runtime parameters to pass to the job.

145

"a_key": "A String",

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

146

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

147

"jobName": "A String", # Required. The job name to use for the created job.

148

"gcsPath": "A String", # Required. A Cloud Storage path to the template from which to

149

# create the job.

150

# Must be a valid Cloud Storage URL, beginning with `gs://`.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

151

}

152

153

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

154

Allowed values

155

1 - v1 error format

156

2 - v2 error format

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

157

158

Returns:

159

An object of the form:

160

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

161

{ # Defines a job to be run by the Cloud Dataflow service.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

162

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

163

# If this field is set, the service will ensure its uniqueness.

164

# The request to create a job will fail if the service has knowledge of a

165

# previously submitted job with the same client's ID and job name.

166

# The caller may use this field to ensure idempotence of job

167

# creation across retried attempts to create a job.

168

# By default, the field is empty and, in that case, the service ignores it.

169

"id": "A String", # The unique ID of this job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

170

#

171

# This field is set by the Cloud Dataflow service when the Job is

172

# created, and is immutable for the life of the job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

173

"currentStateTime": "A String", # The timestamp associated with the current state.

174

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

175

# corresponding name prefixes of the new job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

176

"a_key": "A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

177

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

178

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

179

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

180

# options are passed through the service and are used to recreate the

181

# SDK pipeline options on the worker in a language agnostic and platform

182

# independent way.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

183

"a_key": "", # Properties of the object.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

184

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

185

"flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.

186

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

187

# specified in order for the job to have workers.

188

{ # Describes one particular pool of Cloud Dataflow workers to be

189

# instantiated by the Cloud Dataflow service in order to perform the

190

# computations required by a job. Note that a workflow job may use

191

# multiple pools, in order to match the various computational

192

# requirements of the various stages of the job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

193

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

194

# select a default set of packages which are useful to worker

195

# harnesses written in a particular language.

196

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

197

# the service will use the network "default".

198

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

199

# will attempt to choose a reasonable default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

200

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

201

# execute the job. If zero or unspecified, the service will

202

# attempt to choose a reasonable default.

203

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

204

# service will choose a number of threads (according to the number of cores

205

# on the selected machine type for batch, or 1 by convention for streaming).

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

206

"diskSourceImage": "A String", # Fully qualified source image for disks.

207

"packages": [ # Packages to be installed on workers.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

208

{ # The packages that must be installed in order for a worker to run the

209

# steps of the Cloud Dataflow job that will be assigned to its worker

210

# pool.

211

#

212

# This is the mechanism by which the Cloud Dataflow SDK causes code to

213

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

214

# might use this to install jars containing the user's code and all of the

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

215

# various dependencies (libraries, data files, etc.) required in order

216

# for that code to run.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

217

"location": "A String", # The resource to read the package from. The supported resource type is:

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

218

#

219

# Google Cloud Storage:

220

#

221

# storage.googleapis.com/{bucket}

222

# bucket.storage.googleapis.com/

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

223

"name": "A String", # The name of the package.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

224

},

225

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

226

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

227

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

228

# `TEARDOWN_NEVER`.

229

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

230

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

231

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

232

# down.

233

#

234

# If the workers are not torn down by the service, they will

235

# continue to run and use Google Compute Engine VM resources in the

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

236

# user's project until they are explicitly terminated by the user.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

237

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

238

# policy except for small, manually supervised test jobs.

239

#

240

# If unknown or unspecified, the service will attempt to choose a reasonable

241

# default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

242

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

243

# Compute Engine API.

244

"poolArgs": { # Extra arguments for this worker pool.

245

"a_key": "", # Properties of the object. Contains field @type with type URL.

246

},

247

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

248

# attempt to choose a reasonable default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

249

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

250

# harness, residing in Google Container Registry.

251

#

252

# Deprecated for the Fn API path. Use sdk_harness_container_images instead.

253

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

254

# attempt to choose a reasonable default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

255

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

256

# service will attempt to choose a reasonable default.

257

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

258

# are supported.

259

"dataDisks": [ # Data disks that are used by a VM in this workflow.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

260

{ # Describes the data disk used by a workflow job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

261

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

262

# attempt to choose a reasonable default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

263

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

264

# must be a disk type appropriate to the project and zone in which

265

# the workers will run. If unknown or unspecified, the service

266

# will attempt to choose a reasonable default.

267

#

268

# For example, the standard persistent disk type is a resource name

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

269

# typically ending in "pd-standard". If SSD persistent disks are

270

# available, the resource name typically ends with "pd-ssd". The

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

271

# actual valid values are defined the Google Compute Engine API,

272

# not by the Cloud Dataflow API; consult the Google Compute Engine

273

# documentation for more information about determining the set of

274

# available disk types for a particular project and zone.

275

#

276

# Google Compute Engine Disk types are local to a particular

277

# project in a particular zone, and so the resource name will

278

# typically look something like this:

279

#

280

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

281

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

282

},

283

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

284

"sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

285

# only be set in the Fn API path. For non-cross-language pipelines this

286

# should have only one entry. Cross-language pipelines will have two or more

287

# entries.

288

{ # Defines a SDK harness container for executing Dataflow pipelines.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

289

"containerImage": "A String", # A docker container image that resides in Google Container Registry.

290

"useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

291

# container instance with this image. If false (or unset) recommends using

292

# more than one core per SDK container instance with this image for

293

# efficiency. Note that Dataflow service may choose to override this property

294

# if needed.

295

},

296

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

297

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

298

# the form "regions/REGION/subnetworks/SUBNETWORK".

299

"ipConfiguration": "A String", # Configuration for VM IPs.

300

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

301

# using the standard Dataflow task runner. Users should ignore

302

# this field.

303

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

304

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

305

# taskrunner; e.g. "wheel".

306

"harnessCommand": "A String", # The command to launch the worker harness.

307

"logDir": "A String", # The directory on the VM to store logs.

308

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

309

# access the Cloud Dataflow API.

310

"A String",

311

],

312

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

313

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

314

# will not be uploaded.

315

#

316

# The supported resource type is:

317

#

318

# Google Cloud Storage:

319

# storage.googleapis.com/{bucket}/{object}

320

# bucket.storage.googleapis.com/{object}

321

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

322

"workflowFileName": "A String", # The file to store the workflow in.

323

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

324

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

325

# temporary storage.

326

#

327

# The supported resource type is:

328

#

329

# Google Cloud Storage:

330

# storage.googleapis.com/{bucket}/{object}

331

# bucket.storage.googleapis.com/{object}

332

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

333

"languageHint": "A String", # The suggested backend language.

334

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

335

#

336

# When workers access Google Cloud APIs, they logically do so via

337

# relative URLs. If this field is specified, it supplies the base

338

# URL to use for resolving these relative URLs. The normative

339

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

340

# Locators".

341

#

342

# If not specified, the default value is "http://www.googleapis.com/"

343

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

344

# console.

345

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

346

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

347

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

348

#

349

# When workers access Google Cloud APIs, they logically do so via

350

# relative URLs. If this field is specified, it supplies the base

351

# URL to use for resolving these relative URLs. The normative

352

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

353

# Locators".

354

#

355

# If not specified, the default value is "http://www.googleapis.com/"

356

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

357

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

358

# "dataflow/v1b3/projects".

359

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

360

# "shuffle/v1beta1".

361

"workerId": "A String", # The ID of the worker running this pipeline.

362

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

363

# storage.

364

#

365

# The supported resource type is:

366

#

367

# Google Cloud Storage:

368

#

369

# storage.googleapis.com/{bucket}/{object}

370

# bucket.storage.googleapis.com/{object}

371

},

372

"vmId": "A String", # The ID string of the VM.

373

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

374

# taskrunner; e.g. "root".

375

},

376

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

377

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

378

"algorithm": "A String", # The algorithm to use for autoscaling.

379

},

380

"metadata": { # Metadata to set on the Google Compute Engine VMs.

381

"a_key": "A String",

382

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

383

},

384

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

385

"dataset": "A String", # The dataset for the current project where various workflow

386

# related tables are stored.

387

#

388

# The supported resource type is:

389

#

390

# Google BigQuery:

391

# bigquery.googleapis.com/{dataset}

392

"internalExperiments": { # Experimental settings.

393

"a_key": "", # Properties of the object. Contains field @type with type URL.

394

},

395

"workerRegion": "A String", # The Compute Engine region

396

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

397

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

398

# with worker_zone. If neither worker_region nor worker_zone is specified,

399

# default to the control plane's region.

400

"serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data

401

# at rest, AKA a Customer Managed Encryption Key (CMEK).

402

#

403

# Format:

404

# projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY

405

"userAgent": { # A description of the process that generated the request.

406

"a_key": "", # Properties of the object.

407

},

408

"workerZone": "A String", # The Compute Engine zone

409

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

410

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

411

# with worker_region. If neither worker_region nor worker_zone is specified,

412

# a zone in the control plane's region is chosen based on available capacity.

413

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

414

# unspecified, the service will attempt to choose a reasonable

415

# default. This should be in the form of the API service name,

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

416

# e.g. "compute.googleapis.com".

417

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

418

# storage. The system will append the suffix "/temp-{JOBNAME} to

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

419

# this resource prefix, where {JOBNAME} is the value of the

420

# job_name field. The resulting bucket and object prefix is used

421

# as the prefix of the resources used to store temporary data

422

# needed during the job execution. NOTE: This will override the

423

# value in taskrunner_settings.

424

# The supported resource type is:

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

425

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

426

# Google Cloud Storage:

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

427

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

428

# storage.googleapis.com/{bucket}/{object}

429

# bucket.storage.googleapis.com/{object}

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

430

"experiments": [ # The list of experiments to enable.

431

"A String",

432

],

433

"version": { # A structure describing which components and their versions of the service

434

# are required in order to run the job.

435

"a_key": "", # Properties of the object.

436

},

437

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

438

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

439

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

440

# callers cannot mutate it.

441

{ # A message describing the state of a particular execution stage.

442

"executionStageName": "A String", # The name of the execution stage.

443

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

444

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

445

},

446

],

447

"jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs

448

# by the metadata values provided here. Populated for ListJobs and all GetJob

449

# views SUMMARY and higher.

450

# ListJob response and Job SUMMARY view.

451

"bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.

452

{ # Metadata for a BigTable connector used by the job.

453

"tableId": "A String", # TableId accessed in the connection.

454

"projectId": "A String", # ProjectId accessed in the connection.

455

"instanceId": "A String", # InstanceId accessed in the connection.

456

},

457

],

458

"spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.

459

{ # Metadata for a Spanner connector used by the job.

460

"databaseId": "A String", # DatabaseId accessed in the connection.

461

"instanceId": "A String", # InstanceId accessed in the connection.

462

"projectId": "A String", # ProjectId accessed in the connection.

463

},

464

],

465

"datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.

466

{ # Metadata for a Datastore connector used by the job.

467

"projectId": "A String", # ProjectId accessed in the connection.

468

"namespace": "A String", # Namespace used in the connection.

469

},

470

],

471

"sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.

472

"versionDisplayName": "A String", # A readable string describing the version of the SDK.

473

"sdkSupportStatus": "A String", # The support status for this SDK version.

474

"version": "A String", # The version of the SDK used to run the job.

475

},

476

"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.

477

{ # Metadata for a BigQuery connector used by the job.

478

"table": "A String", # Table accessed in the connection.

479

"dataset": "A String", # Dataset accessed in the connection.

480

"projectId": "A String", # Project accessed in the connection.

481

"query": "A String", # Query used to access data in the connection.

482

},

483

],

484

"fileDetails": [ # Identification of a File source used in the Dataflow job.

485

{ # Metadata for a File connector used by the job.

486

"filePattern": "A String", # File Pattern used to access files by the connector.

487

},

488

],

489

"pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.

490

{ # Metadata for a PubSub connector used by the job.

491

"subscription": "A String", # Subscription used in the connection.

492

"topic": "A String", # Topic accessed in the connection.

},

],

},

"createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given

497

# snapshot.

498

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

499

"type": "A String", # The type of Cloud Dataflow job.

500

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

501

# A description of the user pipeline and stages through which it is executed.

502

# Created by Cloud Dataflow service. Only retrieved with

503

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

504

# form. This data is provided by the Dataflow service for ease of visualizing

505

# the pipeline and interpreting Dataflow provided metrics.

506

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

507

{ # Description of the composing transforms, names/ids, and input/outputs of a

508

# stage of execution. Some composing transforms and sources may have been

509

# generated by the Dataflow service during execution planning.

510

"id": "A String", # Dataflow service generated id for this stage.

511

"componentTransform": [ # Transforms that comprise this execution stage.

512

{ # Description of a transform executed as part of an execution stage.

513

"originalTransform": "A String", # User name for the original user transform with which this transform is

514

# most closely associated.

515

"name": "A String", # Dataflow service generated name for this source.

516

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

517

},

518

],

519

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

520

{ # Description of an interstitial value between transforms in an execution

521

# stage.

522

"name": "A String", # Dataflow service generated name for this source.

523

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

524

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

525

# source is most closely associated.

526

},

527

],

528

"kind": "A String", # Type of tranform this stage is executing.

529

"outputSource": [ # Output sources for this stage.

530

{ # Description of an input or output of an execution stage.

531

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

532

# source is most closely associated.

533

"name": "A String", # Dataflow service generated name for this source.

534

"sizeBytes": "A String", # Size of the source, if measurable.

535

"userName": "A String", # Human-readable name for this source; may be user or system generated.

536

},

537

],

538

"name": "A String", # Dataflow service generated name for this stage.

539

"inputSource": [ # Input sources for this stage.

540

{ # Description of an input or output of an execution stage.

541

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

542

# source is most closely associated.

543

"name": "A String", # Dataflow service generated name for this source.

544

"sizeBytes": "A String", # Size of the source, if measurable.

545

"userName": "A String", # Human-readable name for this source; may be user or system generated.

},

],

},

],

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

551

{ # Description of the type, names/ids, and input/outputs for a transform.

552

"kind": "A String", # Type of transform.

553

"inputCollectionName": [ # User names for all collection inputs to this transform.

554

"A String",

555

],

556

"name": "A String", # User provided name for this transform instance.

557

"id": "A String", # SDK generated id of this transform instance.

558

"displayData": [ # Transform-specific display data.

559

{ # Data provided with a pipeline or transform to provide descriptive info.

560

"timestampValue": "A String", # Contains value if the data is of timestamp type.

561

"boolValue": True or False, # Contains value if the data is of a boolean type.

562

"javaClassValue": "A String", # Contains value if the data is of java class type.

563

"strValue": "A String", # Contains value if the data is of string type.

564

"int64Value": "A String", # Contains value if the data is of int64 type.

565

"durationValue": "A String", # Contains value if the data is of duration type.

566

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

567

# language namespace (i.e. python module) which defines the display data.

568

# This allows a dax monitoring system to specially handle the data

569

# and perform custom rendering.

570

"floatValue": 3.14, # Contains value if the data is of float type.

571

"key": "A String", # The key identifying the display data.

572

# This is intended to be used as a label for the display data

573

# when viewed in a dax monitoring system.

574

"shortStrValue": "A String", # A possible additional shorter value to display.

575

# For example a java_class_name_value of com.mypackage.MyDoFn

576

# will be stored with MyDoFn as the short_str_value and

577

# com.mypackage.MyDoFn as the java_class_name value.

578

# short_str_value can be displayed and java_class_name_value

579

# will be displayed as a tooltip.

580

"url": "A String", # An optional full URL.

581

"label": "A String", # An optional label to display in a dax UI for the element.

582

},

583

],

584

"outputCollectionName": [ # User names for all collection outputs to this transform.

"A String",

],

},

],

"displayData": [ # Pipeline level display data.

590

{ # Data provided with a pipeline or transform to provide descriptive info.

591

"timestampValue": "A String", # Contains value if the data is of timestamp type.

592

"boolValue": True or False, # Contains value if the data is of a boolean type.

593

"javaClassValue": "A String", # Contains value if the data is of java class type.

594

"strValue": "A String", # Contains value if the data is of string type.

595

"int64Value": "A String", # Contains value if the data is of int64 type.

596

"durationValue": "A String", # Contains value if the data is of duration type.

597

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

598

# language namespace (i.e. python module) which defines the display data.

599

# This allows a dax monitoring system to specially handle the data

600

# and perform custom rendering.

601

"floatValue": 3.14, # Contains value if the data is of float type.

602

"key": "A String", # The key identifying the display data.

603

# This is intended to be used as a label for the display data

604

# when viewed in a dax monitoring system.

605

"shortStrValue": "A String", # A possible additional shorter value to display.

606

# For example a java_class_name_value of com.mypackage.MyDoFn

607

# will be stored with MyDoFn as the short_str_value and

608

# com.mypackage.MyDoFn as the java_class_name value.

609

# short_str_value can be displayed and java_class_name_value

610

# will be displayed as a tooltip.

611

"url": "A String", # An optional full URL.

612

"label": "A String", # An optional label to display in a dax UI for the element.

},

],

},

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

617

# of the job it replaced.

618

#

619

# When sending a `CreateJobRequest`, you can update a job by specifying it

620

# here. The job named here is stopped, and its intermediate state is

621

# transferred to this job.

622

"tempFiles": [ # A set of files the system should be aware of that are used

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

623

# for temporary storage. These temporary files will be

624

# removed on job completion.

625

# No duplicates are allowed.

626

# No file patterns are supported.

627

#

628

# The supported files are:

629

#

630

# Google Cloud Storage:

631

#

632

# storage.googleapis.com/{bucket}/{object}

633

# bucket.storage.googleapis.com/{object}

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

634

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

635

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

636

"name": "A String", # The user-specified Cloud Dataflow job name.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

637

#

638

# Only one Job with a given name may exist in a project at any

639

# given time. If a caller attempts to create a Job with the same

640

# name as an already-existing Job, the attempt returns the

641

# existing Job.

642

#

643

# The name must match the regular expression

644

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

645

"steps": [ # Exactly one of step or steps_location should be specified.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

646

#

647

# The top-level steps that constitute the entire job.

648

{ # Defines a particular step within a Cloud Dataflow job.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

649

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

650

# A job consists of multiple steps, each of which performs some

651

# specific operation as part of the overall job. Data is typically

652

# passed from one step to another as part of the job.

653

#

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

654

# Here's an example of a sequence of steps which together implement a

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

655

# Map-Reduce job:

656

#

657

# * Read a collection of data from some source, parsing the

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

658

# collection's elements.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

659

#

660

# * Validate the elements.

661

#

662

# * Apply a user-defined function to map each element to some value

663

# and extract an element-specific key value.

664

#

665

# * Group elements with the same key into a single element with

666

# that key, transforming a multiply-keyed collection into a

667

# uniquely-keyed collection.

668

#

669

# * Write the elements out to some data sink.

670

#

671

# Note that the Cloud Dataflow service may be used to run many different

672

# types of jobs, not just Map-Reduce.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

673

"name": "A String", # The name that identifies the step. This must be unique for each

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

674

# step with respect to all other steps in the Cloud Dataflow job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

675

"kind": "A String", # The kind of step in the Cloud Dataflow job.

676

"properties": { # Named properties associated with the step. Each kind of

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

677

# predefined step has its own required set of properties.

678

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

679

"a_key": "", # Properties of the object.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

680

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

681

},

682

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

683

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

684

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

685

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

686

# isn't contained in the submitted job.

687

"stages": { # A mapping from each stage to the information about that stage.

688

"a_key": { # Contains information about how a particular

689

# google.dataflow.v1beta3.Step will be executed.

690

"stepName": [ # The steps associated with the execution stage.

691

# Note that stages may have several steps, and that a given step

692

# might be run by more than one stage.

"A String",

],

},

},

},

"currentState": "A String", # The current state of the job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

699

#

700

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

701

# specified.

702

#

703

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

704

# terminal state. After a job has reached a terminal state, no

705

# further state updates may be made.

706

#

707

# This field may be mutated by the Cloud Dataflow service;

708

# callers cannot mutate it.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

709

"location": "A String", # The [regional endpoint]

710

# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that

711

# contains this job.

712

"startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).

713

# Flexible resource scheduling jobs are started with some delay after job

714

# creation, so start_time is unset before start and is updated when the

715

# job is started by the Cloud Dataflow service. For other jobs, start_time

716

# always equals to create_time and is immutable and set by the Cloud Dataflow

717

# service.

718

"stepsLocation": "A String", # The GCS location where the steps are stored.

719

"labels": { # User-defined labels for this job.

720

#

721

# The labels map can contain no more than 64 entries. Entries of the labels

722

# map are UTF8 strings that comply with the following restrictions:

723

#

724

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

725

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

726

# * Both keys and values are additionally constrained to be <= 128 bytes in

727

# size.

728

"a_key": "A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

729

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

730

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

731

# Cloud Dataflow service.

732

"requestedState": "A String", # The job's requested state.

733

#

734

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

735

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

736

# also be used to directly set a job's requested state to

737

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

738

# job if it has not already reached a terminal state.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

}</pre>

</div>

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

743

<code class="details" id="get">get(projectId, view=None, gcsPath=None, location=None, x__xgafv=None)</code>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

744

<pre>Get the template associated with a template.

745

746

Args:

747

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

748

view: string, The view to retrieve. Defaults to METADATA_ONLY.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

749

gcsPath: string, Required. A Cloud Storage path to the template from which to

750

create the job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

751

Must be valid Cloud Storage URL, beginning with 'gs://'.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

752

location: string, The [regional endpoint]

753

(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to

754

which to direct the request.

755

x__xgafv: string, V1 error format.

756

Allowed values

757

1 - v1 error format

758

2 - v2 error format

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

759

760

Returns:

761

An object of the form:

762

763

{ # The response to a GetTemplate request.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

764

"status": { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

765

# request will be indicated in the error_details.

766

# different programming environments, including REST APIs and RPC APIs. It is

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

767

# used by [gRPC](https://github.com/grpc). Each `Status` message contains

768

# three pieces of data: error code, error message, and error details.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

769

#

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

770

# You can find out more about this error model and how to work with it in the

771

# [API Design Guide](https://cloud.google.com/apis/design/errors).

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

772

"message": "A String", # A developer-facing error message, which should be in English. Any

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

773

# user-facing error message should be localized and sent in the

774

# google.rpc.Status.details field, or localized by the client.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

775

"details": [ # A list of messages that carry the error details. There is a common set of

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

776

# message types for APIs to use.

777

{

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

778

"a_key": "", # Properties of the object. Contains field @type with type URL.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

779

},

780

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

781

"code": 42, # The status code, which should be an enum value of google.rpc.Code.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

782

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

783

"templateType": "A String", # Template Type.

784

"metadata": { # Metadata describing a template. # The template metadata describing the template name, available

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

785

# parameters, etc.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

786

"name": "A String", # Required. The name of the template.

787

"parameters": [ # The parameters for the template.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

788

{ # Metadata for a specific parameter.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

789

"label": "A String", # Required. The label to display for the parameter.

790

"paramType": "A String", # Optional. The type of the parameter.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

791

# Used for selecting input picker.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

792

"helpText": "A String", # Required. The help text to display for the parameter.

793

"name": "A String", # Required. The name of the parameter.

794

"regexes": [ # Optional. Regexes that the parameter must match.

795

"A String",

796

],

797

"isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

798

},

799

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

800

"description": "A String", # Optional. A description of the template.

801

},

802

"runtimeMetadata": { # RuntimeMetadata describing a runtime environment. # Describes the runtime metadata with SDKInfo and available parameters.

803

"sdkInfo": { # SDK Information. # SDK Info for the template.

804

"language": "A String", # Required. The SDK Language.

805

"version": "A String", # Optional. The SDK version.

806

},

807

"parameters": [ # The parameters for the template.

808

{ # Metadata for a specific parameter.

809

"label": "A String", # Required. The label to display for the parameter.

810

"paramType": "A String", # Optional. The type of the parameter.

811

# Used for selecting input picker.

812

"helpText": "A String", # Required. The help text to display for the parameter.

813

"name": "A String", # Required. The name of the parameter.

814

"regexes": [ # Optional. Regexes that the parameter must match.

815

"A String",

816

],

817

"isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.

818

},

819

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

},

}</pre>

</div>

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

825

<code class="details" id="launch">launch(projectId, body=None, validateOnly=None, gcsPath=None, location=None, dynamicTemplate_gcsPath=None, dynamicTemplate_stagingLocation=None, x__xgafv=None)</code>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

826

<pre>Launch a template.

827

828

Args:

829

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

830

body: object, The request body.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

831

The object takes the form of:

832

833

{ # Parameters to provide to the template being launched.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

834

"transformNameMapping": { # Only applicable when updating a pipeline. Map of transform name prefixes of

835

# the job to be replaced to the corresponding name prefixes of the new job.

836

"a_key": "A String",

837

},

838

"environment": { # The environment values to set at runtime. # The runtime environment for the job.

839

"workerRegion": "A String", # The Compute Engine region

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

840

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

841

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

842

# with worker_zone. If neither worker_region nor worker_zone is specified,

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

843

# default to the control plane's region.

844

"numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.

845

"zone": "A String", # The Compute Engine [availability

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

846

# zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)

847

# for launching worker instances to run your pipeline.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

848

# In the future, worker_zone will take precedence.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

849

"workerZone": "A String", # The Compute Engine zone

850

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

851

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

852

# with worker_region. If neither worker_region nor worker_zone is specified,

853

# a zone in the control plane's region is chosen based on available capacity.

854

# If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.

855

"additionalUserLabels": { # Additional user labels to be specified for the job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

856

# Keys and values should follow the restrictions specified in the [labeling

857

# restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)

858

# page.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

859

"a_key": "A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

860

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

861

"additionalExperiments": [ # Additional experiment flags for the job.

862

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

863

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

864

"maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made

865

# available to your pipeline during execution, from 1 to 1000.

866

"serviceAccountEmail": "A String", # The email address of the service account to run the job as.

867

"machineType": "A String", # The machine type to use for the job. Defaults to the value from the

868

# template if not specified.

869

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

870

# the form "regions/REGION/subnetworks/SUBNETWORK".

871

"ipConfiguration": "A String", # Configuration for VM IPs.

872

"kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

873

# Key format is:

874

# projects/<project>/locations/<location>/keyRings/<keyring>/cryptoKeys/<key>

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

875

"bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.

876

# Use with caution.

877

"tempLocation": "A String", # The Cloud Storage path to use for temporary files.

878

# Must be a valid Cloud Storage URL, beginning with `gs://`.

879

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

880

# the service will use the network "default".

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

881

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

882

"update": True or False, # If set, replace the existing pipeline with the name specified by jobName

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

883

# with this pipeline, preserving state.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

884

"parameters": { # The runtime parameters to pass to the job.

885

"a_key": "A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

886

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

887

"jobName": "A String", # Required. The job name to use for the created job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

888

}

889

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

890

validateOnly: boolean, If true, the request is validated but not actually executed.

891

Defaults to false.

892

gcsPath: string, A Cloud Storage path to the template from which to create

893

the job.

894

Must be valid Cloud Storage URL, beginning with 'gs://'.

895

location: string, The [regional endpoint]

896

(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to

897

which to direct the request.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

898

dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.

899

The file must be a Json serialized DynamicTemplateFieSpec object.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

900

dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.

901

Must be a valid Cloud Storage URL, beginning with `gs://`.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

902

x__xgafv: string, V1 error format.

903

Allowed values

904

1 - v1 error format

905

2 - v2 error format

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

906

907

Returns:

908

An object of the form:

909

910

{ # Response to the request to launch a template.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

911

"job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

912

# the job was successfully launched.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

913

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

914

# If this field is set, the service will ensure its uniqueness.

915

# The request to create a job will fail if the service has knowledge of a

916

# previously submitted job with the same client's ID and job name.

917

# The caller may use this field to ensure idempotence of job

918

# creation across retried attempts to create a job.

919

# By default, the field is empty and, in that case, the service ignores it.

920

"id": "A String", # The unique ID of this job.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

921

#

922

# This field is set by the Cloud Dataflow service when the Job is

923

# created, and is immutable for the life of the job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

924

"currentStateTime": "A String", # The timestamp associated with the current state.

925

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

926

# corresponding name prefixes of the new job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

927

"a_key": "A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

928

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

929

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

930

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

931

# options are passed through the service and are used to recreate the

932

# SDK pipeline options on the worker in a language agnostic and platform

933

# independent way.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

934

"a_key": "", # Properties of the object.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

935

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

936

"flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.

937

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

938

# specified in order for the job to have workers.

939

{ # Describes one particular pool of Cloud Dataflow workers to be

940

# instantiated by the Cloud Dataflow service in order to perform the

941

# computations required by a job. Note that a workflow job may use

942

# multiple pools, in order to match the various computational

943

# requirements of the various stages of the job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

944

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

945

# select a default set of packages which are useful to worker

946

# harnesses written in a particular language.

947

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

948

# the service will use the network "default".

949

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

950

# will attempt to choose a reasonable default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

951

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

952

# execute the job. If zero or unspecified, the service will

953

# attempt to choose a reasonable default.

954

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

955

# service will choose a number of threads (according to the number of cores

956

# on the selected machine type for batch, or 1 by convention for streaming).

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

957

"diskSourceImage": "A String", # Fully qualified source image for disks.

958

"packages": [ # Packages to be installed on workers.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

959

{ # The packages that must be installed in order for a worker to run the

960

# steps of the Cloud Dataflow job that will be assigned to its worker

961

# pool.

962

#

963

# This is the mechanism by which the Cloud Dataflow SDK causes code to

964

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

965

# might use this to install jars containing the user's code and all of the

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

966

# various dependencies (libraries, data files, etc.) required in order

967

# for that code to run.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

968

"location": "A String", # The resource to read the package from. The supported resource type is:

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

969

#

970

# Google Cloud Storage:

971

#

972

# storage.googleapis.com/{bucket}

973

# bucket.storage.googleapis.com/

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

974

"name": "A String", # The name of the package.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

975

},

976

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

977

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

978

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

979

# `TEARDOWN_NEVER`.

980

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

981

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

982

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

983

# down.

984

#

985

# If the workers are not torn down by the service, they will

986

# continue to run and use Google Compute Engine VM resources in the

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

987

# user's project until they are explicitly terminated by the user.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

988

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

989

# policy except for small, manually supervised test jobs.

990

#

991

# If unknown or unspecified, the service will attempt to choose a reasonable

992

# default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

993

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

994

# Compute Engine API.

995

"poolArgs": { # Extra arguments for this worker pool.

996

"a_key": "", # Properties of the object. Contains field @type with type URL.

997

},

998

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

999

# attempt to choose a reasonable default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1000

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1001

# harness, residing in Google Container Registry.

1002

#

1003

# Deprecated for the Fn API path. Use sdk_harness_container_images instead.

1004

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1005

# attempt to choose a reasonable default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1006

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1007

# service will attempt to choose a reasonable default.

1008

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1009

# are supported.

1010

"dataDisks": [ # Data disks that are used by a VM in this workflow.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1011

{ # Describes the data disk used by a workflow job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1012

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1013

# attempt to choose a reasonable default.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1014

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1015

# must be a disk type appropriate to the project and zone in which

1016

# the workers will run. If unknown or unspecified, the service

1017

# will attempt to choose a reasonable default.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1018

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1019

# For example, the standard persistent disk type is a resource name

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1020

# typically ending in "pd-standard". If SSD persistent disks are

1021

# available, the resource name typically ends with "pd-ssd". The

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1022

# actual valid values are defined the Google Compute Engine API,

1023

# not by the Cloud Dataflow API; consult the Google Compute Engine

1024

# documentation for more information about determining the set of

1025

# available disk types for a particular project and zone.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1026

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1027

# Google Compute Engine Disk types are local to a particular

1028

# project in a particular zone, and so the resource name will

1029

# typically look something like this:

1030

#

1031

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1032

"mountPoint": "A String", # Directory in a VM where disk is mounted.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1033

},

1034

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1035

"sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1036

# only be set in the Fn API path. For non-cross-language pipelines this

1037

# should have only one entry. Cross-language pipelines will have two or more

1038

# entries.

1039

{ # Defines a SDK harness container for executing Dataflow pipelines.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1040

"containerImage": "A String", # A docker container image that resides in Google Container Registry.

1041

"useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1042

# container instance with this image. If false (or unset) recommends using

1043

# more than one core per SDK container instance with this image for

1044

# efficiency. Note that Dataflow service may choose to override this property

1045

# if needed.

1046

},

1047

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1048

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1049

# the form "regions/REGION/subnetworks/SUBNETWORK".

1050

"ipConfiguration": "A String", # Configuration for VM IPs.

1051

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1052

# using the standard Dataflow task runner. Users should ignore

1053

# this field.

1054

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1055

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1056

# taskrunner; e.g. "wheel".

1057

"harnessCommand": "A String", # The command to launch the worker harness.

1058

"logDir": "A String", # The directory on the VM to store logs.

1059

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1060

# access the Cloud Dataflow API.

1061

"A String",

1062

],

1063

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

1064

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1065

# will not be uploaded.

1066

#

1067

# The supported resource type is:

1068

#

1069

# Google Cloud Storage:

1070

# storage.googleapis.com/{bucket}/{object}

1071

# bucket.storage.googleapis.com/{object}

1072

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1073

"workflowFileName": "A String", # The file to store the workflow in.

1074

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1075

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1076

# temporary storage.

1077

#

1078

# The supported resource type is:

1079

#

1080

# Google Cloud Storage:

1081

# storage.googleapis.com/{bucket}/{object}

1082

# bucket.storage.googleapis.com/{object}

1083

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1084

"languageHint": "A String", # The suggested backend language.

1085

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1086

#

1087

# When workers access Google Cloud APIs, they logically do so via

1088

# relative URLs. If this field is specified, it supplies the base

1089

# URL to use for resolving these relative URLs. The normative

1090

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1091

# Locators".

1092

#

1093

# If not specified, the default value is "http://www.googleapis.com/"

1094

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1095

# console.

1096

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

1097

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1098

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1099

#

1100

# When workers access Google Cloud APIs, they logically do so via

1101

# relative URLs. If this field is specified, it supplies the base

1102

# URL to use for resolving these relative URLs. The normative

1103

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1104

# Locators".

1105

#

1106

# If not specified, the default value is "http://www.googleapis.com/"

1107

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1108

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1109

# "dataflow/v1b3/projects".

1110

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1111

# "shuffle/v1beta1".

1112

"workerId": "A String", # The ID of the worker running this pipeline.

1113

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1114

# storage.

1115

#

1116

# The supported resource type is:

1117

#

1118

# Google Cloud Storage:

1119

#

1120

# storage.googleapis.com/{bucket}/{object}

1121

# bucket.storage.googleapis.com/{object}

1122

},

1123

"vmId": "A String", # The ID string of the VM.

1124

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1125

# taskrunner; e.g. "root".

1126

},

1127

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1128

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1129

"algorithm": "A String", # The algorithm to use for autoscaling.

1130

},

1131

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1132

"a_key": "A String",

1133

},

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1134

},

1135

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1136

"dataset": "A String", # The dataset for the current project where various workflow

1137

# related tables are stored.

1138

#

1139

# The supported resource type is:

1140

#

1141

# Google BigQuery:

1142

# bigquery.googleapis.com/{dataset}

1143

"internalExperiments": { # Experimental settings.

1144

"a_key": "", # Properties of the object. Contains field @type with type URL.

1145

},

1146

"workerRegion": "A String", # The Compute Engine region

1147

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

1148

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

1149

# with worker_zone. If neither worker_region nor worker_zone is specified,

1150

# default to the control plane's region.

1151

"serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data

1152

# at rest, AKA a Customer Managed Encryption Key (CMEK).

1153

#

1154

# Format:

1155

# projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY

1156

"userAgent": { # A description of the process that generated the request.

1157

"a_key": "", # Properties of the object.

1158

},

1159

"workerZone": "A String", # The Compute Engine zone

1160

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

1161

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

1162

# with worker_region. If neither worker_region nor worker_zone is specified,

1163

# a zone in the control plane's region is chosen based on available capacity.

1164

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1165

# unspecified, the service will attempt to choose a reasonable

1166

# default. This should be in the form of the API service name,

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1167

# e.g. "compute.googleapis.com".

1168

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1169

# storage. The system will append the suffix "/temp-{JOBNAME} to

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1170

# this resource prefix, where {JOBNAME} is the value of the

1171

# job_name field. The resulting bucket and object prefix is used

1172

# as the prefix of the resources used to store temporary data

1173

# needed during the job execution. NOTE: This will override the

1174

# value in taskrunner_settings.

1175

# The supported resource type is:

1176

#

1177

# Google Cloud Storage:

1178

#

1179

# storage.googleapis.com/{bucket}/{object}

1180

# bucket.storage.googleapis.com/{object}

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1181

"experiments": [ # The list of experiments to enable.

1182

"A String",

1183

],

1184

"version": { # A structure describing which components and their versions of the service

1185

# are required in order to run the job.

1186

"a_key": "", # Properties of the object.

1187

},

1188

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1189

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1190

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

1191

# callers cannot mutate it.

1192

{ # A message describing the state of a particular execution stage.

1193

"executionStageName": "A String", # The name of the execution stage.

1194

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

1195

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

1196

},

1197

],

1198

"jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs

1199

# by the metadata values provided here. Populated for ListJobs and all GetJob

1200

# views SUMMARY and higher.

1201

# ListJob response and Job SUMMARY view.

1202

"bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.

1203

{ # Metadata for a BigTable connector used by the job.

1204

"tableId": "A String", # TableId accessed in the connection.

1205

"projectId": "A String", # ProjectId accessed in the connection.

1206

"instanceId": "A String", # InstanceId accessed in the connection.

1207

},

1208

],

1209

"spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.

1210

{ # Metadata for a Spanner connector used by the job.

1211

"databaseId": "A String", # DatabaseId accessed in the connection.

1212

"instanceId": "A String", # InstanceId accessed in the connection.

1213

"projectId": "A String", # ProjectId accessed in the connection.

1214

},

1215

],

1216

"datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.

1217

{ # Metadata for a Datastore connector used by the job.

1218

"projectId": "A String", # ProjectId accessed in the connection.

1219

"namespace": "A String", # Namespace used in the connection.

1220

},

1221

],

1222

"sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.

1223

"versionDisplayName": "A String", # A readable string describing the version of the SDK.

1224

"sdkSupportStatus": "A String", # The support status for this SDK version.

1225

"version": "A String", # The version of the SDK used to run the job.

1226

},

1227

"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.

1228

{ # Metadata for a BigQuery connector used by the job.

1229

"table": "A String", # Table accessed in the connection.

1230

"dataset": "A String", # Dataset accessed in the connection.

1231

"projectId": "A String", # Project accessed in the connection.

1232

"query": "A String", # Query used to access data in the connection.

1233

},

1234

],

1235

"fileDetails": [ # Identification of a File source used in the Dataflow job.

1236

{ # Metadata for a File connector used by the job.

1237

"filePattern": "A String", # File Pattern used to access files by the connector.

1238

},

1239

],

1240

"pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.

1241

{ # Metadata for a PubSub connector used by the job.

1242

"subscription": "A String", # Subscription used in the connection.

1243

"topic": "A String", # Topic accessed in the connection.

},

],

},

"createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given

1248

# snapshot.

1249

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1250

"type": "A String", # The type of Cloud Dataflow job.

1251

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

1252

# A description of the user pipeline and stages through which it is executed.

1253

# Created by Cloud Dataflow service. Only retrieved with

1254

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

1255

# form. This data is provided by the Dataflow service for ease of visualizing

1256

# the pipeline and interpreting Dataflow provided metrics.

1257

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

1258

{ # Description of the composing transforms, names/ids, and input/outputs of a

1259

# stage of execution. Some composing transforms and sources may have been

1260

# generated by the Dataflow service during execution planning.

1261

"id": "A String", # Dataflow service generated id for this stage.

1262

"componentTransform": [ # Transforms that comprise this execution stage.

1263

{ # Description of a transform executed as part of an execution stage.

1264

"originalTransform": "A String", # User name for the original user transform with which this transform is

1265

# most closely associated.

1266

"name": "A String", # Dataflow service generated name for this source.

1267

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1268

},

1269

],

1270

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

1271

{ # Description of an interstitial value between transforms in an execution

1272

# stage.

1273

"name": "A String", # Dataflow service generated name for this source.

1274

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1275

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1276

# source is most closely associated.

1277

},

1278

],

1279

"kind": "A String", # Type of tranform this stage is executing.

1280

"outputSource": [ # Output sources for this stage.

1281

{ # Description of an input or output of an execution stage.

1282

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1283

# source is most closely associated.

1284

"name": "A String", # Dataflow service generated name for this source.

1285

"sizeBytes": "A String", # Size of the source, if measurable.

1286

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1287

},

1288

],

1289

"name": "A String", # Dataflow service generated name for this stage.

1290

"inputSource": [ # Input sources for this stage.

1291

{ # Description of an input or output of an execution stage.

1292

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1293

# source is most closely associated.

1294

"name": "A String", # Dataflow service generated name for this source.

1295

"sizeBytes": "A String", # Size of the source, if measurable.

1296

"userName": "A String", # Human-readable name for this source; may be user or system generated.

},

],

},

],

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

1302

{ # Description of the type, names/ids, and input/outputs for a transform.

1303

"kind": "A String", # Type of transform.

1304

"inputCollectionName": [ # User names for all collection inputs to this transform.

1305

"A String",

1306

],

1307

"name": "A String", # User provided name for this transform instance.

1308

"id": "A String", # SDK generated id of this transform instance.

1309

"displayData": [ # Transform-specific display data.

1310

{ # Data provided with a pipeline or transform to provide descriptive info.

1311

"timestampValue": "A String", # Contains value if the data is of timestamp type.

1312

"boolValue": True or False, # Contains value if the data is of a boolean type.

1313

"javaClassValue": "A String", # Contains value if the data is of java class type.

1314

"strValue": "A String", # Contains value if the data is of string type.

1315

"int64Value": "A String", # Contains value if the data is of int64 type.

1316

"durationValue": "A String", # Contains value if the data is of duration type.

1317

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1318

# language namespace (i.e. python module) which defines the display data.

1319

# This allows a dax monitoring system to specially handle the data

1320

# and perform custom rendering.

1321

"floatValue": 3.14, # Contains value if the data is of float type.

1322

"key": "A String", # The key identifying the display data.

1323

# This is intended to be used as a label for the display data

1324

# when viewed in a dax monitoring system.

1325

"shortStrValue": "A String", # A possible additional shorter value to display.

1326

# For example a java_class_name_value of com.mypackage.MyDoFn

1327

# will be stored with MyDoFn as the short_str_value and

1328

# com.mypackage.MyDoFn as the java_class_name value.

1329

# short_str_value can be displayed and java_class_name_value

1330

# will be displayed as a tooltip.

1331

"url": "A String", # An optional full URL.

1332

"label": "A String", # An optional label to display in a dax UI for the element.

1333

},

1334

],

1335

"outputCollectionName": [ # User names for all collection outputs to this transform.

"A String",

],

},

],

"displayData": [ # Pipeline level display data.

1341

{ # Data provided with a pipeline or transform to provide descriptive info.

1342

"timestampValue": "A String", # Contains value if the data is of timestamp type.

1343

"boolValue": True or False, # Contains value if the data is of a boolean type.

1344

"javaClassValue": "A String", # Contains value if the data is of java class type.

1345

"strValue": "A String", # Contains value if the data is of string type.

1346

"int64Value": "A String", # Contains value if the data is of int64 type.

1347

"durationValue": "A String", # Contains value if the data is of duration type.

1348

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1349

# language namespace (i.e. python module) which defines the display data.

1350

# This allows a dax monitoring system to specially handle the data

1351

# and perform custom rendering.

1352

"floatValue": 3.14, # Contains value if the data is of float type.

1353

"key": "A String", # The key identifying the display data.

1354

# This is intended to be used as a label for the display data

1355

# when viewed in a dax monitoring system.

1356

"shortStrValue": "A String", # A possible additional shorter value to display.

1357

# For example a java_class_name_value of com.mypackage.MyDoFn

1358

# will be stored with MyDoFn as the short_str_value and

1359

# com.mypackage.MyDoFn as the java_class_name value.

1360

# short_str_value can be displayed and java_class_name_value

1361

# will be displayed as a tooltip.

1362

"url": "A String", # An optional full URL.

1363

"label": "A String", # An optional label to display in a dax UI for the element.

},

],

},

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1368

# of the job it replaced.

1369

#

1370

# When sending a `CreateJobRequest`, you can update a job by specifying it

1371

# here. The job named here is stopped, and its intermediate state is

1372

# transferred to this job.

1373

"tempFiles": [ # A set of files the system should be aware of that are used

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1374

# for temporary storage. These temporary files will be

1375

# removed on job completion.

1376

# No duplicates are allowed.

1377

# No file patterns are supported.

1378

#

1379

# The supported files are:

1380

#

1381

# Google Cloud Storage:

1382

#

1383

# storage.googleapis.com/{bucket}/{object}

1384

# bucket.storage.googleapis.com/{object}

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1385

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1386

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1387

"name": "A String", # The user-specified Cloud Dataflow job name.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1388

#

1389

# Only one Job with a given name may exist in a project at any

1390

# given time. If a caller attempts to create a Job with the same

1391

# name as an already-existing Job, the attempt returns the

1392

# existing Job.

1393

#

1394

# The name must match the regular expression

1395

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1396

"steps": [ # Exactly one of step or steps_location should be specified.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1397

#

1398

# The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1399

{ # Defines a particular step within a Cloud Dataflow job.

1400

#

1401

# A job consists of multiple steps, each of which performs some

1402

# specific operation as part of the overall job. Data is typically

1403

# passed from one step to another as part of the job.

1404

#

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1405

# Here's an example of a sequence of steps which together implement a

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1406

# Map-Reduce job:

1407

#

1408

# * Read a collection of data from some source, parsing the

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1409

# collection's elements.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1410

#

1411

# * Validate the elements.

1412

#

1413

# * Apply a user-defined function to map each element to some value

1414

# and extract an element-specific key value.

1415

#

1416

# * Group elements with the same key into a single element with

1417

# that key, transforming a multiply-keyed collection into a

1418

# uniquely-keyed collection.

1419

#

1420

# * Write the elements out to some data sink.

1421

#

1422

# Note that the Cloud Dataflow service may be used to run many different

1423

# types of jobs, not just Map-Reduce.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1424

"name": "A String", # The name that identifies the step. This must be unique for each

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1425

# step with respect to all other steps in the Cloud Dataflow job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1426

"kind": "A String", # The kind of step in the Cloud Dataflow job.

1427

"properties": { # Named properties associated with the step. Each kind of

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1428

# predefined step has its own required set of properties.

1429

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1430

"a_key": "", # Properties of the object.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1431

},

1432

},

1433

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1434

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1435

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1436

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1437

# isn't contained in the submitted job.

1438

"stages": { # A mapping from each stage to the information about that stage.

1439

"a_key": { # Contains information about how a particular

1440

# google.dataflow.v1beta3.Step will be executed.

1441

"stepName": [ # The steps associated with the execution stage.

1442

# Note that stages may have several steps, and that a given step

1443

# might be run by more than one stage.

"A String",

],

},

},

},

"currentState": "A String", # The current state of the job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1450

#

1451

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1452

# specified.

1453

#

1454

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1455

# terminal state. After a job has reached a terminal state, no

1456

# further state updates may be made.

1457

#

1458

# This field may be mutated by the Cloud Dataflow service;

1459

# callers cannot mutate it.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1460

"location": "A String", # The [regional endpoint]

1461

# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that

1462

# contains this job.

1463

"startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).

1464

# Flexible resource scheduling jobs are started with some delay after job

1465

# creation, so start_time is unset before start and is updated when the

1466

# job is started by the Cloud Dataflow service. For other jobs, start_time

1467

# always equals to create_time and is immutable and set by the Cloud Dataflow

1468

# service.

1469

"stepsLocation": "A String", # The GCS location where the steps are stored.

1470

"labels": { # User-defined labels for this job.

1471

#

1472

# The labels map can contain no more than 64 entries. Entries of the labels

1473

# map are UTF8 strings that comply with the following restrictions:

1474

#

1475

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1476

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1477

# * Both keys and values are additionally constrained to be <= 128 bytes in

1478

# size.

1479

"a_key": "A String",

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1480

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1481

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1482

# Cloud Dataflow service.

1483

"requestedState": "A String", # The job's requested state.

1484

#

1485

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1486

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1487

# also be used to directly set a job's requested state to

1488

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1489

# job if it has not already reached a terminal state.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1490

},

Sai Cheemalapati