Blame - docs/dyn/dataflow_v1b3.projects.templates.html - platform/external/python/google-api-python-client

2016-08-16 12:44:29 -0700

[diff] [blame]

76

<h2>Instance Methods</h2>

77

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

78

<code><a href="#create">create(projectId, body=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

79

<p class="firstline">Creates a Cloud Dataflow job from a template.</p>

80

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

81

<code><a href="#get">get(projectId, view=None, gcsPath=None, location=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

82

<p class="firstline">Get the template associated with a template.</p>

83

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

84

<code><a href="#launch">launch(projectId, body=None, dynamicTemplate_gcsPath=None, dynamicTemplate_stagingLocation=None, location=None, validateOnly=None, gcsPath=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

85

<p class="firstline">Launch a template.</p>

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

86

<h3>Method Details</h3>

87

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

88

<code class="details" id="create">create(projectId, body=None, x__xgafv=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

89

<pre>Creates a Cloud Dataflow job from a template.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

90

91

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

92

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

93

body: object, The request body.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

94

The object takes the form of:

95

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

96

{ # A request to create a Cloud Dataflow job from a template.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

97

"location": "A String", # The [regional endpoint]

98

# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to

99

# which to direct the request.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

100

"environment": { # The environment values to set at runtime. # The runtime environment for the job.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

101

"bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.

102

# Use with caution.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

103

"tempLocation": "A String", # The Cloud Storage path to use for temporary files.

104

# Must be a valid Cloud Storage URL, beginning with `gs://`.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

105

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

106

# the service will use the network "default".

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

107

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

108

# the form "regions/REGION/subnetworks/SUBNETWORK".

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

109

"workerRegion": "A String", # The Compute Engine region

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

110

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

111

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

112

# with worker_zone. If neither worker_region nor worker_zone is specified,

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

113

# default to the control plane's region.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

114

"numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.

115

"additionalExperiments": [ # Additional experiment flags for the job.

116

"A String",

117

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

118

"zone": "A String", # The Compute Engine [availability

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

119

# zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

120

# for launching worker instances to run your pipeline.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

121

# In the future, worker_zone will take precedence.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

122

"serviceAccountEmail": "A String", # The email address of the service account to run the job as.

123

"maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made

124

# available to your pipeline during execution, from 1 to 1000.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

125

"workerZone": "A String", # The Compute Engine zone

126

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

127

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

128

# with worker_region. If neither worker_region nor worker_zone is specified,

129

# a zone in the control plane's region is chosen based on available capacity.

130

# If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.

131

"additionalUserLabels": { # Additional user labels to be specified for the job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

132

# Keys and values should follow the restrictions specified in the [labeling

133

# restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)

134

# page.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

135

"a_key": "A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

136

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

137

"machineType": "A String", # The machine type to use for the job. Defaults to the value from the

138

# template if not specified.

139

"ipConfiguration": "A String", # Configuration for VM IPs.

140

"kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.

141

# Key format is:

142

# projects/<project>/locations/<location>/keyRings/<keyring>/cryptoKeys/<key>

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

143

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

144

"gcsPath": "A String", # Required. A Cloud Storage path to the template from which to

145

# create the job.

146

# Must be a valid Cloud Storage URL, beginning with `gs://`.

147

"jobName": "A String", # Required. The job name to use for the created job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

148

"parameters": { # The runtime parameters to pass to the job.

149

"a_key": "A String",

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

},

}

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

154

Allowed values

155

1 - v1 error format

156

2 - v2 error format

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

157

158

Returns:

159

An object of the form:

160

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

161

{ # Defines a job to be run by the Cloud Dataflow service.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

162

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

163

# A description of the user pipeline and stages through which it is executed.

164

# Created by Cloud Dataflow service. Only retrieved with

165

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

166

# form. This data is provided by the Dataflow service for ease of visualizing

167

# the pipeline and interpreting Dataflow provided metrics.

168

"displayData": [ # Pipeline level display data.

169

{ # Data provided with a pipeline or transform to provide descriptive info.

170

"url": "A String", # An optional full URL.

171

"javaClassValue": "A String", # Contains value if the data is of java class type.

172

"timestampValue": "A String", # Contains value if the data is of timestamp type.

173

"durationValue": "A String", # Contains value if the data is of duration type.

174

"label": "A String", # An optional label to display in a dax UI for the element.

175

"key": "A String", # The key identifying the display data.

176

# This is intended to be used as a label for the display data

177

# when viewed in a dax monitoring system.

178

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

179

# language namespace (i.e. python module) which defines the display data.

180

# This allows a dax monitoring system to specially handle the data

181

# and perform custom rendering.

182

"floatValue": 3.14, # Contains value if the data is of float type.

183

"strValue": "A String", # Contains value if the data is of string type.

184

"int64Value": "A String", # Contains value if the data is of int64 type.

185

"boolValue": True or False, # Contains value if the data is of a boolean type.

186

"shortStrValue": "A String", # A possible additional shorter value to display.

187

# For example a java_class_name_value of com.mypackage.MyDoFn

188

# will be stored with MyDoFn as the short_str_value and

189

# com.mypackage.MyDoFn as the java_class_name value.

190

# short_str_value can be displayed and java_class_name_value

191

# will be displayed as a tooltip.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

192

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

193

],

194

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

195

{ # Description of the type, names/ids, and input/outputs for a transform.

196

"outputCollectionName": [ # User names for all collection outputs to this transform.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

197

"A String",

198

],

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

199

"displayData": [ # Transform-specific display data.

200

{ # Data provided with a pipeline or transform to provide descriptive info.

201

"url": "A String", # An optional full URL.

202

"javaClassValue": "A String", # Contains value if the data is of java class type.

203

"timestampValue": "A String", # Contains value if the data is of timestamp type.

204

"durationValue": "A String", # Contains value if the data is of duration type.

205

"label": "A String", # An optional label to display in a dax UI for the element.

206

"key": "A String", # The key identifying the display data.

207

# This is intended to be used as a label for the display data

208

# when viewed in a dax monitoring system.

209

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

210

# language namespace (i.e. python module) which defines the display data.

211

# This allows a dax monitoring system to specially handle the data

212

# and perform custom rendering.

213

"floatValue": 3.14, # Contains value if the data is of float type.

214

"strValue": "A String", # Contains value if the data is of string type.

215

"int64Value": "A String", # Contains value if the data is of int64 type.

216

"boolValue": True or False, # Contains value if the data is of a boolean type.

217

"shortStrValue": "A String", # A possible additional shorter value to display.

218

# For example a java_class_name_value of com.mypackage.MyDoFn

219

# will be stored with MyDoFn as the short_str_value and

220

# com.mypackage.MyDoFn as the java_class_name value.

221

# short_str_value can be displayed and java_class_name_value

222

# will be displayed as a tooltip.

223

},

224

],

225

"id": "A String", # SDK generated id of this transform instance.

226

"inputCollectionName": [ # User names for all collection inputs to this transform.

227

"A String",

228

],

229

"name": "A String", # User provided name for this transform instance.

230

"kind": "A String", # Type of transform.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

231

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

232

],

233

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

234

{ # Description of the composing transforms, names/ids, and input/outputs of a

235

# stage of execution. Some composing transforms and sources may have been

236

# generated by the Dataflow service during execution planning.

237

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

238

{ # Description of an interstitial value between transforms in an execution

239

# stage.

240

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

241

"name": "A String", # Dataflow service generated name for this source.

242

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

243

# source is most closely associated.

244

},

245

],

246

"inputSource": [ # Input sources for this stage.

247

{ # Description of an input or output of an execution stage.

248

"userName": "A String", # Human-readable name for this source; may be user or system generated.

249

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

250

# source is most closely associated.

251

"sizeBytes": "A String", # Size of the source, if measurable.

252

"name": "A String", # Dataflow service generated name for this source.

253

},

254

],

255

"name": "A String", # Dataflow service generated name for this stage.

256

"componentTransform": [ # Transforms that comprise this execution stage.

257

{ # Description of a transform executed as part of an execution stage.

258

"name": "A String", # Dataflow service generated name for this source.

259

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

260

"originalTransform": "A String", # User name for the original user transform with which this transform is

261

# most closely associated.

262

},

263

],

264

"id": "A String", # Dataflow service generated id for this stage.

265

"outputSource": [ # Output sources for this stage.

266

{ # Description of an input or output of an execution stage.

267

"userName": "A String", # Human-readable name for this source; may be user or system generated.

268

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

269

# source is most closely associated.

270

"sizeBytes": "A String", # Size of the source, if measurable.

271

"name": "A String", # Dataflow service generated name for this source.

272

},

273

],

274

"kind": "A String", # Type of tranform this stage is executing.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

275

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

276

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

277

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

278

"labels": { # User-defined labels for this job.

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

279

#

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

280

# The labels map can contain no more than 64 entries. Entries of the labels

281

# map are UTF8 strings that comply with the following restrictions:

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

282

#

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

283

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

284

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

285

# * Both keys and values are additionally constrained to be <= 128 bytes in

286

# size.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

287

"a_key": "A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

288

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

289

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

290

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

291

"flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

292

"workerRegion": "A String", # The Compute Engine region

293

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

294

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

295

# with worker_zone. If neither worker_region nor worker_zone is specified,

296

# default to the control plane's region.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

297

"userAgent": { # A description of the process that generated the request.

298

"a_key": "", # Properties of the object.

299

},

300

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

301

"version": { # A structure describing which components and their versions of the service

302

# are required in order to run the job.

303

"a_key": "", # Properties of the object.

304

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

305

"serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data

306

# at rest, AKA a Customer Managed Encryption Key (CMEK).

307

#

308

# Format:

309

# projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

310

"experiments": [ # The list of experiments to enable.

311

"A String",

312

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

313

"workerZone": "A String", # The Compute Engine zone

314

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

315

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

316

# with worker_region. If neither worker_region nor worker_zone is specified,

317

# a zone in the control plane's region is chosen based on available capacity.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

318

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

319

# specified in order for the job to have workers.

320

{ # Describes one particular pool of Cloud Dataflow workers to be

321

# instantiated by the Cloud Dataflow service in order to perform the

322

# computations required by a job. Note that a workflow job may use

323

# multiple pools, in order to match the various computational

324

# requirements of the various stages of the job.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

325

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

326

# Compute Engine API.

327

"sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will

328

# only be set in the Fn API path. For non-cross-language pipelines this

329

# should have only one entry. Cross-language pipelines will have two or more

330

# entries.

331

{ # Defines a SDK harness container for executing Dataflow pipelines.

332

"containerImage": "A String", # A docker container image that resides in Google Container Registry.

333

"useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK

334

# container instance with this image. If false (or unset) recommends using

335

# more than one core per SDK container instance with this image for

336

# efficiency. Note that Dataflow service may choose to override this property

337

# if needed.

338

},

339

],

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

340

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

341

# will attempt to choose a reasonable default.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

342

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

343

# are supported.

344

"metadata": { # Metadata to set on the Google Compute Engine VMs.

345

"a_key": "A String",

346

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

347

"diskSourceImage": "A String", # Fully qualified source image for disks.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

348

"dataDisks": [ # Data disks that are used by a VM in this workflow.

349

{ # Describes the data disk used by a workflow job.

350

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

351

# attempt to choose a reasonable default.

352

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

353

# must be a disk type appropriate to the project and zone in which

354

# the workers will run. If unknown or unspecified, the service

355

# will attempt to choose a reasonable default.

356

#

357

# For example, the standard persistent disk type is a resource name

358

# typically ending in "pd-standard". If SSD persistent disks are

359

# available, the resource name typically ends with "pd-ssd". The

360

# actual valid values are defined the Google Compute Engine API,

361

# not by the Cloud Dataflow API; consult the Google Compute Engine

362

# documentation for more information about determining the set of

363

# available disk types for a particular project and zone.

364

#

365

# Google Compute Engine Disk types are local to a particular

366

# project in a particular zone, and so the resource name will

367

# typically look something like this:

368

#

369

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

370

"mountPoint": "A String", # Directory in a VM where disk is mounted.

371

},

372

],

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

373

"packages": [ # Packages to be installed on workers.

374

{ # The packages that must be installed in order for a worker to run the

375

# steps of the Cloud Dataflow job that will be assigned to its worker

376

# pool.

377

#

378

# This is the mechanism by which the Cloud Dataflow SDK causes code to

379

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

380

# might use this to install jars containing the user's code and all of the

381

# various dependencies (libraries, data files, etc.) required in order

382

# for that code to run.

383

"name": "A String", # The name of the package.

384

"location": "A String", # The resource to read the package from. The supported resource type is:

385

#

386

# Google Cloud Storage:

387

#

388

# storage.googleapis.com/{bucket}

389

# bucket.storage.googleapis.com/

390

},

391

],

392

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

393

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

394

# `TEARDOWN_NEVER`.

395

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

396

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

397

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

398

# down.

399

#

400

# If the workers are not torn down by the service, they will

401

# continue to run and use Google Compute Engine VM resources in the

402

# user's project until they are explicitly terminated by the user.

403

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

404

# policy except for small, manually supervised test jobs.

405

#

406

# If unknown or unspecified, the service will attempt to choose a reasonable

407

# default.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

408

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

409

# the service will use the network "default".

410

"ipConfiguration": "A String", # Configuration for VM IPs.

411

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

412

# attempt to choose a reasonable default.

413

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

414

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

415

"algorithm": "A String", # The algorithm to use for autoscaling.

416

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

417

"poolArgs": { # Extra arguments for this worker pool.

418

"a_key": "", # Properties of the object. Contains field @type with type URL.

419

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

420

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

421

# the form "regions/REGION/subnetworks/SUBNETWORK".

422

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

423

# execute the job. If zero or unspecified, the service will

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

424

# attempt to choose a reasonable default.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

425

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

426

# service will choose a number of threads (according to the number of cores

427

# on the selected machine type for batch, or 1 by convention for streaming).

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

428

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

429

# harness, residing in Google Container Registry.

430

#

431

# Deprecated for the Fn API path. Use sdk_harness_container_images instead.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

432

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

433

# using the standard Dataflow task runner. Users should ignore

434

# this field.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

435

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

436

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

437

# access the Cloud Dataflow API.

438

"A String",

439

],

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

440

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

441

#

442

# When workers access Google Cloud APIs, they logically do so via

443

# relative URLs. If this field is specified, it supplies the base

444

# URL to use for resolving these relative URLs. The normative

445

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

446

# Locators".

447

#

448

# If not specified, the default value is "http://www.googleapis.com/"

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

449

"workflowFileName": "A String", # The file to store the workflow in.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

450

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

451

# console.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

452

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

453

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

454

# taskrunner; e.g. "root".

455

"vmId": "A String", # The ID string of the VM.

456

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

457

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

458

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

459

# "shuffle/v1beta1".

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

460

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

461

# storage.

462

#

463

# The supported resource type is:

464

#

465

# Google Cloud Storage:

466

#

467

# storage.googleapis.com/{bucket}/{object}

468

# bucket.storage.googleapis.com/{object}

469

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

470

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

471

# "dataflow/v1b3/projects".

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

472

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

473

#

474

# When workers access Google Cloud APIs, they logically do so via

475

# relative URLs. If this field is specified, it supplies the base

476

# URL to use for resolving these relative URLs. The normative

477

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

478

# Locators".

479

#

480

# If not specified, the default value is "http://www.googleapis.com/"

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

481

"workerId": "A String", # The ID of the worker running this pipeline.

482

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

483

"harnessCommand": "A String", # The command to launch the worker harness.

484

"logDir": "A String", # The directory on the VM to store logs.

485

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

486

"languageHint": "A String", # The suggested backend language.

487

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

488

# taskrunner; e.g. "wheel".

489

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

490

# will not be uploaded.

491

#

492

# The supported resource type is:

493

#

494

# Google Cloud Storage:

495

# storage.googleapis.com/{bucket}/{object}

496

# bucket.storage.googleapis.com/{object}

497

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

498

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

499

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

500

# temporary storage.

501

#

502

# The supported resource type is:

503

#

504

# Google Cloud Storage:

505

# storage.googleapis.com/{bucket}/{object}

506

# bucket.storage.googleapis.com/{object}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

507

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

508

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

509

# attempt to choose a reasonable default.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

510

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

511

# select a default set of packages which are useful to worker

512

# harnesses written in a particular language.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

513

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

514

# service will attempt to choose a reasonable default.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

515

},

516

],

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

517

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

518

# storage. The system will append the suffix "/temp-{JOBNAME} to

519

# this resource prefix, where {JOBNAME} is the value of the

520

# job_name field. The resulting bucket and object prefix is used

521

# as the prefix of the resources used to store temporary data

522

# needed during the job execution. NOTE: This will override the

523

# value in taskrunner_settings.

524

# The supported resource type is:

525

#

526

# Google Cloud Storage:

527

#

528

# storage.googleapis.com/{bucket}/{object}

529

# bucket.storage.googleapis.com/{object}

530

"internalExperiments": { # Experimental settings.

531

"a_key": "", # Properties of the object. Contains field @type with type URL.

532

},

533

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

534

# options are passed through the service and are used to recreate the

535

# SDK pipeline options on the worker in a language agnostic and platform

536

# independent way.

537

"a_key": "", # Properties of the object.

538

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

539

"dataset": "A String", # The dataset for the current project where various workflow

540

# related tables are stored.

541

#

542

# The supported resource type is:

543

#

544

# Google BigQuery:

545

# bigquery.googleapis.com/{dataset}

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

546

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

547

# unspecified, the service will attempt to choose a reasonable

548

# default. This should be in the form of the API service name,

549

# e.g. "compute.googleapis.com".

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

550

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

551

"stepsLocation": "A String", # The GCS location where the steps are stored.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

552

"steps": [ # Exactly one of step or steps_location should be specified.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

553

#

554

# The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

555

{ # Defines a particular step within a Cloud Dataflow job.

556

#

557

# A job consists of multiple steps, each of which performs some

558

# specific operation as part of the overall job. Data is typically

559

# passed from one step to another as part of the job.

560

#

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

561

# Here's an example of a sequence of steps which together implement a

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

562

# Map-Reduce job:

563

#

564

# * Read a collection of data from some source, parsing the

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

565

# collection's elements.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

566

#

567

# * Validate the elements.

568

#

569

# * Apply a user-defined function to map each element to some value

570

# and extract an element-specific key value.

571

#

572

# * Group elements with the same key into a single element with

573

# that key, transforming a multiply-keyed collection into a

574

# uniquely-keyed collection.

575

#

576

# * Write the elements out to some data sink.

577

#

578

# Note that the Cloud Dataflow service may be used to run many different

579

# types of jobs, not just Map-Reduce.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

580

"kind": "A String", # The kind of step in the Cloud Dataflow job.

581

"properties": { # Named properties associated with the step. Each kind of

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

582

# predefined step has its own required set of properties.

583

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

584

"a_key": "", # Properties of the object.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

585

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

586

"name": "A String", # The name that identifies the step. This must be unique for each

587

# step with respect to all other steps in the Cloud Dataflow job.

588

},

589

],

590

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

591

# callers cannot mutate it.

592

{ # A message describing the state of a particular execution stage.

593

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

594

"executionStageName": "A String", # The name of the execution stage.

595

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

596

},

597

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

598

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

599

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

600

"jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs

601

# by the metadata values provided here. Populated for ListJobs and all GetJob

602

# views SUMMARY and higher.

603

# ListJob response and Job SUMMARY view.

604

"sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.

605

"sdkSupportStatus": "A String", # The support status for this SDK version.

606

"versionDisplayName": "A String", # A readable string describing the version of the SDK.

607

"version": "A String", # The version of the SDK used to run the job.

608

},

609

"bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.

610

{ # Metadata for a BigTable connector used by the job.

611

"instanceId": "A String", # InstanceId accessed in the connection.

612

"tableId": "A String", # TableId accessed in the connection.

613

"projectId": "A String", # ProjectId accessed in the connection.

614

},

615

],

616

"pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.

617

{ # Metadata for a PubSub connector used by the job.

618

"subscription": "A String", # Subscription used in the connection.

619

"topic": "A String", # Topic accessed in the connection.

620

},

621

],

622

"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.

623

{ # Metadata for a BigQuery connector used by the job.

624

"dataset": "A String", # Dataset accessed in the connection.

625

"projectId": "A String", # Project accessed in the connection.

626

"query": "A String", # Query used to access data in the connection.

627

"table": "A String", # Table accessed in the connection.

628

},

629

],

630

"fileDetails": [ # Identification of a File source used in the Dataflow job.

631

{ # Metadata for a File connector used by the job.

632

"filePattern": "A String", # File Pattern used to access files by the connector.

633

},

634

],

635

"datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.

636

{ # Metadata for a Datastore connector used by the job.

637

"namespace": "A String", # Namespace used in the connection.

638

"projectId": "A String", # ProjectId accessed in the connection.

639

},

640

],

641

"spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.

642

{ # Metadata for a Spanner connector used by the job.

643

"instanceId": "A String", # InstanceId accessed in the connection.

644

"databaseId": "A String", # DatabaseId accessed in the connection.

645

"projectId": "A String", # ProjectId accessed in the connection.

},

],

},

"location": "A String", # The [regional endpoint]

650

# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that

651

# contains this job.

652

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

653

# corresponding name prefixes of the new job.

654

"a_key": "A String",

655

},

656

"startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).

657

# Flexible resource scheduling jobs are started with some delay after job

658

# creation, so start_time is unset before start and is updated when the

659

# job is started by the Cloud Dataflow service. For other jobs, start_time

660

# always equals to create_time and is immutable and set by the Cloud Dataflow

661

# service.

662

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

663

# If this field is set, the service will ensure its uniqueness.

664

# The request to create a job will fail if the service has knowledge of a

665

# previously submitted job with the same client's ID and job name.

666

# The caller may use this field to ensure idempotence of job

667

# creation across retried attempts to create a job.

668

# By default, the field is empty and, in that case, the service ignores it.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

669

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

670

# isn't contained in the submitted job.

671

"stages": { # A mapping from each stage to the information about that stage.

672

"a_key": { # Contains information about how a particular

673

# google.dataflow.v1beta3.Step will be executed.

674

"stepName": [ # The steps associated with the execution stage.

675

# Note that stages may have several steps, and that a given step

676

# might be run by more than one stage.

"A String",

],

},

},

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

682

"type": "A String", # The type of Cloud Dataflow job.

683

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

684

# Cloud Dataflow service.

685

"tempFiles": [ # A set of files the system should be aware of that are used

686

# for temporary storage. These temporary files will be

687

# removed on job completion.

688

# No duplicates are allowed.

689

# No file patterns are supported.

690

#

691

# The supported files are:

692

#

693

# Google Cloud Storage:

694

#

695

# storage.googleapis.com/{bucket}/{object}

696

# bucket.storage.googleapis.com/{object}

697

"A String",

698

],

699

"id": "A String", # The unique ID of this job.

700

#

701

# This field is set by the Cloud Dataflow service when the Job is

702

# created, and is immutable for the life of the job.

703

"requestedState": "A String", # The job's requested state.

704

#

705

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

706

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

707

# also be used to directly set a job's requested state to

708

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

709

# job if it has not already reached a terminal state.

710

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

711

# of the job it replaced.

712

#

713

# When sending a `CreateJobRequest`, you can update a job by specifying it

714

# here. The job named here is stopped, and its intermediate state is

715

# transferred to this job.

716

"createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given

717

# snapshot.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

718

"currentState": "A String", # The current state of the job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

719

#

720

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

721

# specified.

722

#

723

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

724

# terminal state. After a job has reached a terminal state, no

725

# further state updates may be made.

726

#

727

# This field may be mutated by the Cloud Dataflow service;

728

# callers cannot mutate it.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

729

"name": "A String", # The user-specified Cloud Dataflow job name.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

730

#

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

731

# Only one Job with a given name may exist in a project at any

732

# given time. If a caller attempts to create a Job with the same

733

# name as an already-existing Job, the attempt returns the

734

# existing Job.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

735

#

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

736

# The name must match the regular expression

737

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

738

"currentStateTime": "A String", # The timestamp associated with the current state.

}</pre>

</div>

<code class="details" id="get">get(projectId, view=None, gcsPath=None, location=None, x__xgafv=None)</code>

744

<pre>Get the template associated with a template.

745

746

Args:

747

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

748

view: string, The view to retrieve. Defaults to METADATA_ONLY.

749

gcsPath: string, Required. A Cloud Storage path to the template from which to

750

create the job.

751

Must be valid Cloud Storage URL, beginning with 'gs://'.

752

location: string, The [regional endpoint]

753

(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to

754

which to direct the request.

755

x__xgafv: string, V1 error format.

Allowed values

1 - v1 error format

2 - v2 error format

Returns:

An object of the form:

762

763

{ # The response to a GetTemplate request.

764

"runtimeMetadata": { # RuntimeMetadata describing a runtime environment. # Describes the runtime metadata with SDKInfo and available parameters.

765

"parameters": [ # The parameters for the template.

766

{ # Metadata for a specific parameter.

767

"label": "A String", # Required. The label to display for the parameter.

768

"helpText": "A String", # Required. The help text to display for the parameter.

769

"regexes": [ # Optional. Regexes that the parameter must match.

770

"A String",

771

],

772

"paramType": "A String", # Optional. The type of the parameter.

773

# Used for selecting input picker.

774

"isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.

775

"name": "A String", # Required. The name of the parameter.

776

},

777

],

778

"sdkInfo": { # SDK Information. # SDK Info for the template.

779

"language": "A String", # Required. The SDK Language.

780

"version": "A String", # Optional. The SDK version.

781

},

782

},

783

"status": { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the

784

# request will be indicated in the error_details.

785

# different programming environments, including REST APIs and RPC APIs. It is

786

# used by [gRPC](https://github.com/grpc). Each `Status` message contains

787

# three pieces of data: error code, error message, and error details.

788

#

789

# You can find out more about this error model and how to work with it in the

790

# [API Design Guide](https://cloud.google.com/apis/design/errors).

791

"details": [ # A list of messages that carry the error details. There is a common set of

792

# message types for APIs to use.

793

{

794

"a_key": "", # Properties of the object. Contains field @type with type URL.

795

},

796

],

797

"code": 42, # The status code, which should be an enum value of google.rpc.Code.

798

"message": "A String", # A developer-facing error message, which should be in English. Any

799

# user-facing error message should be localized and sent in the

800

# google.rpc.Status.details field, or localized by the client.

801

},

802

"metadata": { # Metadata describing a template. # The template metadata describing the template name, available

803

# parameters, etc.

804

"description": "A String", # Optional. A description of the template.

805

"parameters": [ # The parameters for the template.

806

{ # Metadata for a specific parameter.

807

"label": "A String", # Required. The label to display for the parameter.

808

"helpText": "A String", # Required. The help text to display for the parameter.

809

"regexes": [ # Optional. Regexes that the parameter must match.

810

"A String",

811

],

812

"paramType": "A String", # Optional. The type of the parameter.

813

# Used for selecting input picker.

814

"isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.

815

"name": "A String", # Required. The name of the parameter.

816

},

817

],

818

"name": "A String", # Required. The name of the template.

819

},

820

"templateType": "A String", # Template Type.

}</pre>

</div>

<code class="details" id="launch">launch(projectId, body=None, dynamicTemplate_gcsPath=None, dynamicTemplate_stagingLocation=None, location=None, validateOnly=None, gcsPath=None, x__xgafv=None)</code>

826

<pre>Launch a template.

827

828

Args:

829

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

830

body: object, The request body.

831

The object takes the form of:

832

833

{ # Parameters to provide to the template being launched.

834

"environment": { # The environment values to set at runtime. # The runtime environment for the job.

835

"bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.

836

# Use with caution.

837

"tempLocation": "A String", # The Cloud Storage path to use for temporary files.

838

# Must be a valid Cloud Storage URL, beginning with `gs://`.

839

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

840

# the service will use the network "default".

841

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

842

# the form "regions/REGION/subnetworks/SUBNETWORK".

843

"workerRegion": "A String", # The Compute Engine region

844

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

845

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

846

# with worker_zone. If neither worker_region nor worker_zone is specified,

847

# default to the control plane's region.

848

"numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.

849

"additionalExperiments": [ # Additional experiment flags for the job.

850

"A String",

851

],

852

"zone": "A String", # The Compute Engine [availability

853

# zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)

854

# for launching worker instances to run your pipeline.

855

# In the future, worker_zone will take precedence.

856

"serviceAccountEmail": "A String", # The email address of the service account to run the job as.

857

"maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made

858

# available to your pipeline during execution, from 1 to 1000.

859

"workerZone": "A String", # The Compute Engine zone

860

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

861

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

862

# with worker_region. If neither worker_region nor worker_zone is specified,

863

# a zone in the control plane's region is chosen based on available capacity.

864

# If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.

865

"additionalUserLabels": { # Additional user labels to be specified for the job.

866

# Keys and values should follow the restrictions specified in the [labeling

867

# restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)

868

# page.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

869

"a_key": "A String",

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

870

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

871

"machineType": "A String", # The machine type to use for the job. Defaults to the value from the

872

# template if not specified.

873

"ipConfiguration": "A String", # Configuration for VM IPs.

874

"kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.

875

# Key format is:

876

# projects/<project>/locations/<location>/keyRings/<keyring>/cryptoKeys/<key>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

877

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

878

"transformNameMapping": { # Only applicable when updating a pipeline. Map of transform name prefixes of

879

# the job to be replaced to the corresponding name prefixes of the new job.

880

"a_key": "A String",

881

},

882

"update": True or False, # If set, replace the existing pipeline with the name specified by jobName

883

# with this pipeline, preserving state.

884

"jobName": "A String", # Required. The job name to use for the created job.

885

"parameters": { # The runtime parameters to pass to the job.

"a_key": "A String",

},

}

dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.

891

The file must be a Json serialized DynamicTemplateFieSpec object.

892

dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.

893

Must be a valid Cloud Storage URL, beginning with `gs://`.

894

location: string, The [regional endpoint]

895

(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to

896

which to direct the request.

897

validateOnly: boolean, If true, the request is validated but not actually executed.

898

Defaults to false.

899

gcsPath: string, A Cloud Storage path to the template from which to create

900

the job.

901

Must be valid Cloud Storage URL, beginning with 'gs://'.

902

x__xgafv: string, V1 error format.

Allowed values

1 - v1 error format

2 - v2 error format

Returns:

An object of the form:

909

910

{ # Response to the request to launch a template.

911

"job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and

912

# the job was successfully launched.

913

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

914

# A description of the user pipeline and stages through which it is executed.

915

# Created by Cloud Dataflow service. Only retrieved with

916

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

917

# form. This data is provided by the Dataflow service for ease of visualizing

918

# the pipeline and interpreting Dataflow provided metrics.

919

"displayData": [ # Pipeline level display data.

920

{ # Data provided with a pipeline or transform to provide descriptive info.

921

"url": "A String", # An optional full URL.

922

"javaClassValue": "A String", # Contains value if the data is of java class type.

923

"timestampValue": "A String", # Contains value if the data is of timestamp type.

924

"durationValue": "A String", # Contains value if the data is of duration type.

925

"label": "A String", # An optional label to display in a dax UI for the element.

926

"key": "A String", # The key identifying the display data.

927

# This is intended to be used as a label for the display data

928

# when viewed in a dax monitoring system.

929

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

930

# language namespace (i.e. python module) which defines the display data.

931

# This allows a dax monitoring system to specially handle the data

932

# and perform custom rendering.

933

"floatValue": 3.14, # Contains value if the data is of float type.

934

"strValue": "A String", # Contains value if the data is of string type.

935

"int64Value": "A String", # Contains value if the data is of int64 type.

936

"boolValue": True or False, # Contains value if the data is of a boolean type.

937

"shortStrValue": "A String", # A possible additional shorter value to display.

938

# For example a java_class_name_value of com.mypackage.MyDoFn

939

# will be stored with MyDoFn as the short_str_value and

940

# com.mypackage.MyDoFn as the java_class_name value.

941

# short_str_value can be displayed and java_class_name_value

942

# will be displayed as a tooltip.

943

},

944

],

945

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

946

{ # Description of the type, names/ids, and input/outputs for a transform.

947

"outputCollectionName": [ # User names for all collection outputs to this transform.

948

"A String",

949

],

950

"displayData": [ # Transform-specific display data.

951

{ # Data provided with a pipeline or transform to provide descriptive info.

952

"url": "A String", # An optional full URL.

953

"javaClassValue": "A String", # Contains value if the data is of java class type.

954

"timestampValue": "A String", # Contains value if the data is of timestamp type.

955

"durationValue": "A String", # Contains value if the data is of duration type.

956

"label": "A String", # An optional label to display in a dax UI for the element.

957

"key": "A String", # The key identifying the display data.

958

# This is intended to be used as a label for the display data

959

# when viewed in a dax monitoring system.

960

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

961

# language namespace (i.e. python module) which defines the display data.

962

# This allows a dax monitoring system to specially handle the data

963

# and perform custom rendering.

964

"floatValue": 3.14, # Contains value if the data is of float type.

965

"strValue": "A String", # Contains value if the data is of string type.

966

"int64Value": "A String", # Contains value if the data is of int64 type.

967

"boolValue": True or False, # Contains value if the data is of a boolean type.

968

"shortStrValue": "A String", # A possible additional shorter value to display.

969

# For example a java_class_name_value of com.mypackage.MyDoFn

970

# will be stored with MyDoFn as the short_str_value and

971

# com.mypackage.MyDoFn as the java_class_name value.

972

# short_str_value can be displayed and java_class_name_value

973

# will be displayed as a tooltip.

974

},

975

],

976

"id": "A String", # SDK generated id of this transform instance.

977

"inputCollectionName": [ # User names for all collection inputs to this transform.

978

"A String",

979

],

980

"name": "A String", # User provided name for this transform instance.

981

"kind": "A String", # Type of transform.

982

},

983

],

984

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

985

{ # Description of the composing transforms, names/ids, and input/outputs of a

986

# stage of execution. Some composing transforms and sources may have been

987

# generated by the Dataflow service during execution planning.

988

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

989

{ # Description of an interstitial value between transforms in an execution

990

# stage.

991

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

992

"name": "A String", # Dataflow service generated name for this source.

993

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

994

# source is most closely associated.

995

},

996

],

997

"inputSource": [ # Input sources for this stage.

998

{ # Description of an input or output of an execution stage.

999

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1000

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1001

# source is most closely associated.

1002

"sizeBytes": "A String", # Size of the source, if measurable.

1003

"name": "A String", # Dataflow service generated name for this source.

1004

},

1005

],

1006

"name": "A String", # Dataflow service generated name for this stage.

1007

"componentTransform": [ # Transforms that comprise this execution stage.

1008

{ # Description of a transform executed as part of an execution stage.

1009

"name": "A String", # Dataflow service generated name for this source.

1010

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1011

"originalTransform": "A String", # User name for the original user transform with which this transform is

1012

# most closely associated.

1013

},

1014

],

1015

"id": "A String", # Dataflow service generated id for this stage.

1016

"outputSource": [ # Output sources for this stage.

1017

{ # Description of an input or output of an execution stage.

1018

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1019

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1020

# source is most closely associated.

1021

"sizeBytes": "A String", # Size of the source, if measurable.

1022

"name": "A String", # Dataflow service generated name for this source.

1023

},

1024

],

1025

"kind": "A String", # Type of tranform this stage is executing.

},

],

},

"labels": { # User-defined labels for this job.

1030

#

1031

# The labels map can contain no more than 64 entries. Entries of the labels

1032

# map are UTF8 strings that comply with the following restrictions:

1033

#

1034

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

1035

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

1036

# * Both keys and values are additionally constrained to be <= 128 bytes in

# size.

"a_key": "A String",

},

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1041

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

1042

"flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.

1043

"workerRegion": "A String", # The Compute Engine region

1044

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

1045

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

1046

# with worker_zone. If neither worker_region nor worker_zone is specified,

1047

# default to the control plane's region.

1048

"userAgent": { # A description of the process that generated the request.

1049

"a_key": "", # Properties of the object.

1050

},

1051

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

1052

"version": { # A structure describing which components and their versions of the service

1053

# are required in order to run the job.

1054

"a_key": "", # Properties of the object.

1055

},

1056

"serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data

1057

# at rest, AKA a Customer Managed Encryption Key (CMEK).

1058

#

1059

# Format:

1060

# projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY

1061

"experiments": [ # The list of experiments to enable.

1062

"A String",

1063

],

1064

"workerZone": "A String", # The Compute Engine zone

1065

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

1066

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

1067

# with worker_region. If neither worker_region nor worker_zone is specified,

1068

# a zone in the control plane's region is chosen based on available capacity.

1069

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1070

# specified in order for the job to have workers.

1071

{ # Describes one particular pool of Cloud Dataflow workers to be

1072

# instantiated by the Cloud Dataflow service in order to perform the

1073

# computations required by a job. Note that a workflow job may use

1074

# multiple pools, in order to match the various computational

1075

# requirements of the various stages of the job.

1076

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1077

# Compute Engine API.

1078

"sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will

1079

# only be set in the Fn API path. For non-cross-language pipelines this

1080

# should have only one entry. Cross-language pipelines will have two or more

1081

# entries.

1082

{ # Defines a SDK harness container for executing Dataflow pipelines.

1083

"containerImage": "A String", # A docker container image that resides in Google Container Registry.

1084

"useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK

1085

# container instance with this image. If false (or unset) recommends using

1086

# more than one core per SDK container instance with this image for

1087

# efficiency. Note that Dataflow service may choose to override this property

# if needed.

},

],

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1092

# will attempt to choose a reasonable default.

1093

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1094

# are supported.

1095

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1096

"a_key": "A String",

1097

},

1098

"diskSourceImage": "A String", # Fully qualified source image for disks.

1099

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1100

{ # Describes the data disk used by a workflow job.

1101

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1102

# attempt to choose a reasonable default.

1103

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1104

# must be a disk type appropriate to the project and zone in which

1105

# the workers will run. If unknown or unspecified, the service

1106

# will attempt to choose a reasonable default.

1107

#

1108

# For example, the standard persistent disk type is a resource name

1109

# typically ending in "pd-standard". If SSD persistent disks are

1110

# available, the resource name typically ends with "pd-ssd". The

1111

# actual valid values are defined the Google Compute Engine API,

1112

# not by the Cloud Dataflow API; consult the Google Compute Engine

1113

# documentation for more information about determining the set of

1114

# available disk types for a particular project and zone.

1115

#

1116

# Google Compute Engine Disk types are local to a particular

1117

# project in a particular zone, and so the resource name will

1118

# typically look something like this:

1119

#

1120

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

1121

"mountPoint": "A String", # Directory in a VM where disk is mounted.

1122

},

1123

],

1124

"packages": [ # Packages to be installed on workers.

1125

{ # The packages that must be installed in order for a worker to run the

1126

# steps of the Cloud Dataflow job that will be assigned to its worker

1127

# pool.

1128

#

1129

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1130

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1131

# might use this to install jars containing the user's code and all of the

1132

# various dependencies (libraries, data files, etc.) required in order

1133

# for that code to run.

1134

"name": "A String", # The name of the package.

1135

"location": "A String", # The resource to read the package from. The supported resource type is:

1136

#

1137

# Google Cloud Storage:

1138

#

1139

# storage.googleapis.com/{bucket}

1140

# bucket.storage.googleapis.com/

1141

},

1142

],

1143

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1144

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1145

# `TEARDOWN_NEVER`.

1146

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1147

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1148

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1149

# down.

1150

#

1151

# If the workers are not torn down by the service, they will

1152

# continue to run and use Google Compute Engine VM resources in the

1153

# user's project until they are explicitly terminated by the user.

1154

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1155

# policy except for small, manually supervised test jobs.

1156

#

1157

# If unknown or unspecified, the service will attempt to choose a reasonable

1158

# default.

1159

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1160

# the service will use the network "default".

1161

"ipConfiguration": "A String", # Configuration for VM IPs.

1162

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1163

# attempt to choose a reasonable default.

1164

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1165

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1166

"algorithm": "A String", # The algorithm to use for autoscaling.

1167

},

1168

"poolArgs": { # Extra arguments for this worker pool.

1169

"a_key": "", # Properties of the object. Contains field @type with type URL.

1170

},

1171

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1172

# the form "regions/REGION/subnetworks/SUBNETWORK".

1173

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1174

# execute the job. If zero or unspecified, the service will

1175

# attempt to choose a reasonable default.

1176

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1177

# service will choose a number of threads (according to the number of cores

1178

# on the selected machine type for batch, or 1 by convention for streaming).

1179

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1180

# harness, residing in Google Container Registry.

1181

#

1182

# Deprecated for the Fn API path. Use sdk_harness_container_images instead.

1183

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1184

# using the standard Dataflow task runner. Users should ignore

1185

# this field.

1186

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

1187

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1188

# access the Cloud Dataflow API.

1189

"A String",

1190

],

1191

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1192

#

1193

# When workers access Google Cloud APIs, they logically do so via

1194

# relative URLs. If this field is specified, it supplies the base

1195

# URL to use for resolving these relative URLs. The normative

1196

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1197

# Locators".

1198

#

1199

# If not specified, the default value is "http://www.googleapis.com/"

1200

"workflowFileName": "A String", # The file to store the workflow in.

1201

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1202

# console.

1203

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1204

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1205

# taskrunner; e.g. "root".

1206

"vmId": "A String", # The ID string of the VM.

1207

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1208

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1209

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1210

# "shuffle/v1beta1".

1211

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1212

# storage.

1213

#

1214

# The supported resource type is:

1215

#

1216

# Google Cloud Storage:

1217

#

1218

# storage.googleapis.com/{bucket}/{object}

1219

# bucket.storage.googleapis.com/{object}

1220

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1221

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1222

# "dataflow/v1b3/projects".

1223

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1224

#

1225

# When workers access Google Cloud APIs, they logically do so via

1226

# relative URLs. If this field is specified, it supplies the base

1227

# URL to use for resolving these relative URLs. The normative

1228

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1229

# Locators".

1230

#

1231

# If not specified, the default value is "http://www.googleapis.com/"

1232

"workerId": "A String", # The ID of the worker running this pipeline.

1233

},

1234

"harnessCommand": "A String", # The command to launch the worker harness.

1235

"logDir": "A String", # The directory on the VM to store logs.

1236

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1237

"languageHint": "A String", # The suggested backend language.

1238

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1239

# taskrunner; e.g. "wheel".

1240

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1241

# will not be uploaded.

1242

#

1243

# The supported resource type is:

1244

#

1245

# Google Cloud Storage:

1246

# storage.googleapis.com/{bucket}/{object}

1247

# bucket.storage.googleapis.com/{object}

1248

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

1249

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

1250

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1251

# temporary storage.

1252

#

1253

# The supported resource type is:

1254

#

1255

# Google Cloud Storage:

1256

# storage.googleapis.com/{bucket}/{object}

1257

# bucket.storage.googleapis.com/{object}

1258

},

1259

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

1260

# attempt to choose a reasonable default.

1261

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1262

# select a default set of packages which are useful to worker

1263

# harnesses written in a particular language.

1264

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1265

# service will attempt to choose a reasonable default.

1266

},

1267

],

1268

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1269

# storage. The system will append the suffix "/temp-{JOBNAME} to

1270

# this resource prefix, where {JOBNAME} is the value of the

1271

# job_name field. The resulting bucket and object prefix is used

1272

# as the prefix of the resources used to store temporary data

1273

# needed during the job execution. NOTE: This will override the

1274

# value in taskrunner_settings.

1275

# The supported resource type is:

1276

#

1277

# Google Cloud Storage:

1278

#

1279

# storage.googleapis.com/{bucket}/{object}

1280

# bucket.storage.googleapis.com/{object}

1281

"internalExperiments": { # Experimental settings.

1282

"a_key": "", # Properties of the object. Contains field @type with type URL.

1283

},

1284

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1285

# options are passed through the service and are used to recreate the

1286

# SDK pipeline options on the worker in a language agnostic and platform

1287

# independent way.

1288

"a_key": "", # Properties of the object.

1289

},

1290

"dataset": "A String", # The dataset for the current project where various workflow

1291

# related tables are stored.

1292

#

1293

# The supported resource type is:

1294

#

1295

# Google BigQuery:

1296

# bigquery.googleapis.com/{dataset}

1297

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1298

# unspecified, the service will attempt to choose a reasonable

1299

# default. This should be in the form of the API service name,

1300

# e.g. "compute.googleapis.com".

1301

},

1302

"stepsLocation": "A String", # The GCS location where the steps are stored.

1303

"steps": [ # Exactly one of step or steps_location should be specified.

1304

#

1305

# The top-level steps that constitute the entire job.

1306

{ # Defines a particular step within a Cloud Dataflow job.

1307

#

1308

# A job consists of multiple steps, each of which performs some

1309

# specific operation as part of the overall job. Data is typically

1310

# passed from one step to another as part of the job.

1311

#

1312

# Here's an example of a sequence of steps which together implement a

1313

# Map-Reduce job:

1314

#

1315

# * Read a collection of data from some source, parsing the

1316

# collection's elements.

1317

#

1318

# * Validate the elements.

1319

#

1320

# * Apply a user-defined function to map each element to some value

1321

# and extract an element-specific key value.

1322

#

1323

# * Group elements with the same key into a single element with

1324

# that key, transforming a multiply-keyed collection into a

1325

# uniquely-keyed collection.

1326

#

1327

# * Write the elements out to some data sink.

1328

#

1329

# Note that the Cloud Dataflow service may be used to run many different

1330

# types of jobs, not just Map-Reduce.

1331

"kind": "A String", # The kind of step in the Cloud Dataflow job.

1332

"properties": { # Named properties associated with the step. Each kind of

1333

# predefined step has its own required set of properties.

1334

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

1335

"a_key": "", # Properties of the object.

1336

},

1337

"name": "A String", # The name that identifies the step. This must be unique for each

1338

# step with respect to all other steps in the Cloud Dataflow job.

1339

},

1340

],

1341

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

1342

# callers cannot mutate it.

1343

{ # A message describing the state of a particular execution stage.

1344

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

1345

"executionStageName": "A String", # The name of the execution stage.

1346

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

1347

},

1348

],

1349

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1350

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1351

"jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs

1352

# by the metadata values provided here. Populated for ListJobs and all GetJob

1353

# views SUMMARY and higher.

1354

# ListJob response and Job SUMMARY view.

1355

"sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.

1356

"sdkSupportStatus": "A String", # The support status for this SDK version.

1357

"versionDisplayName": "A String", # A readable string describing the version of the SDK.

1358

"version": "A String", # The version of the SDK used to run the job.

1359

},

1360

"bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.

1361

{ # Metadata for a BigTable connector used by the job.

1362

"instanceId": "A String", # InstanceId accessed in the connection.

1363

"tableId": "A String", # TableId accessed in the connection.

1364

"projectId": "A String", # ProjectId accessed in the connection.

1365

},

1366

],

1367

"pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.

1368

{ # Metadata for a PubSub connector used by the job.

1369

"subscription": "A String", # Subscription used in the connection.

1370

"topic": "A String", # Topic accessed in the connection.

1371

},

1372

],

1373

"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.

1374

{ # Metadata for a BigQuery connector used by the job.

1375

"dataset": "A String", # Dataset accessed in the connection.

1376

"projectId": "A String", # Project accessed in the connection.

1377

"query": "A String", # Query used to access data in the connection.

1378

"table": "A String", # Table accessed in the connection.

1379

},

1380

],

1381

"fileDetails": [ # Identification of a File source used in the Dataflow job.

1382

{ # Metadata for a File connector used by the job.

1383

"filePattern": "A String", # File Pattern used to access files by the connector.

1384

},

1385

],

1386

"datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.

1387

{ # Metadata for a Datastore connector used by the job.

1388

"namespace": "A String", # Namespace used in the connection.

1389

"projectId": "A String", # ProjectId accessed in the connection.

1390

},

1391

],

1392

"spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.

1393

{ # Metadata for a Spanner connector used by the job.

1394

"instanceId": "A String", # InstanceId accessed in the connection.

1395

"databaseId": "A String", # DatabaseId accessed in the connection.

1396

"projectId": "A String", # ProjectId accessed in the connection.

},

],

},

"location": "A String", # The [regional endpoint]

1401

# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that

1402

# contains this job.

1403

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1404

# corresponding name prefixes of the new job.

1405

"a_key": "A String",

1406

},

1407

"startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).

1408

# Flexible resource scheduling jobs are started with some delay after job

1409

# creation, so start_time is unset before start and is updated when the

1410

# job is started by the Cloud Dataflow service. For other jobs, start_time

1411

# always equals to create_time and is immutable and set by the Cloud Dataflow

1412

# service.

1413

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1414

# If this field is set, the service will ensure its uniqueness.

1415

# The request to create a job will fail if the service has knowledge of a

1416

# previously submitted job with the same client's ID and job name.

1417

# The caller may use this field to ensure idempotence of job

1418

# creation across retried attempts to create a job.

1419

# By default, the field is empty and, in that case, the service ignores it.

1420

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1421

# isn't contained in the submitted job.

1422

"stages": { # A mapping from each stage to the information about that stage.

1423

"a_key": { # Contains information about how a particular

1424

# google.dataflow.v1beta3.Step will be executed.

1425

"stepName": [ # The steps associated with the execution stage.

1426

# Note that stages may have several steps, and that a given step

1427

# might be run by more than one stage.

"A String",

],

},

},

},

"type": "A String", # The type of Cloud Dataflow job.

1434

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1435

# Cloud Dataflow service.

1436

"tempFiles": [ # A set of files the system should be aware of that are used

1437

# for temporary storage. These temporary files will be

1438

# removed on job completion.

1439

# No duplicates are allowed.

1440

# No file patterns are supported.

1441

#

1442

# The supported files are:

1443

#

1444

# Google Cloud Storage:

1445

#

1446

# storage.googleapis.com/{bucket}/{object}

1447

# bucket.storage.googleapis.com/{object}

1448

"A String",

1449

],

1450

"id": "A String", # The unique ID of this job.

1451

#

1452

# This field is set by the Cloud Dataflow service when the Job is

1453

# created, and is immutable for the life of the job.

1454

"requestedState": "A String", # The job's requested state.

1455

#

1456

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1457

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1458

# also be used to directly set a job's requested state to

1459

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1460

# job if it has not already reached a terminal state.

1461

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1462

# of the job it replaced.

1463

#

1464

# When sending a `CreateJobRequest`, you can update a job by specifying it

1465

# here. The job named here is stopped, and its intermediate state is

1466

# transferred to this job.

1467

"createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given

1468

# snapshot.

1469

"currentState": "A String", # The current state of the job.

1470

#

1471

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1472

# specified.

1473

#

1474

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1475

# terminal state. After a job has reached a terminal state, no

1476

# further state updates may be made.

1477

#

1478

# This field may be mutated by the Cloud Dataflow service;

1479

# callers cannot mutate it.

1480

"name": "A String", # The user-specified Cloud Dataflow job name.

1481

#

1482

# Only one Job with a given name may exist in a project at any

1483

# given time. If a caller attempts to create a Job with the same

1484

# name as an already-existing Job, the attempt returns the

1485

# existing Job.

1486

#

1487

# The name must match the regular expression

1488

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1489

"currentStateTime": "A String", # The timestamp associated with the current state.

1490

},

Sai Cheemalapati