Blame - docs/dyn/dataflow_v1b3.projects.templates.html - platform/external/python/google-api-python-client

2016-08-16 12:44:29 -0700

[diff] [blame]

76

<h2>Instance Methods</h2>

77

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

78

<code><a href="#create">create(projectId, body=None, x__xgafv=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

79

<p class="firstline">Creates a Cloud Dataflow job from a template.</p>

80

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

81

<code><a href="#get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

82

<p class="firstline">Get the template associated with a template.</p>

83

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

84

<code><a href="#launch">launch(projectId, body=None, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, location=None, gcsPath=None, validateOnly=None)</a></code></p>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

85

<p class="firstline">Launch a template.</p>

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

86

<h3>Method Details</h3>

87

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

88

<code class="details" id="create">create(projectId, body=None, x__xgafv=None)</code>

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

89

<pre>Creates a Cloud Dataflow job from a template.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

90

91

Args:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

92

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

93

body: object, The request body.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

94

The object takes the form of:

95

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

96

{ # A request to create a Cloud Dataflow job from a template.

97

"environment": { # The environment values to set at runtime. # The runtime environment for the job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

98

"workerRegion": "A String", # The Compute Engine region

99

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

100

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

101

# with worker_zone. If neither worker_region nor worker_zone is specified,

102

# default to the control plane's region.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

103

"machineType": "A String", # The machine type to use for the job. Defaults to the value from the

104

# template if not specified.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

105

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

106

# the service will use the network "default".

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

107

"zone": "A String", # The Compute Engine [availability

108

# zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

109

# for launching worker instances to run your pipeline.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

110

# In the future, worker_zone will take precedence.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

111

"additionalUserLabels": { # Additional user labels to be specified for the job.

112

# Keys and values should follow the restrictions specified in the [labeling

113

# restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)

114

# page.

115

"a_key": "A String",

116

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

117

"numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

118

"additionalExperiments": [ # Additional experiment flags for the job.

119

"A String",

120

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

121

"ipConfiguration": "A String", # Configuration for VM IPs.

Thomas Coffee

2f24537

2017-03-27 10:39:26 -0700

[diff] [blame]

122

"bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.

123

# Use with caution.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

124

"tempLocation": "A String", # The Cloud Storage path to use for temporary files.

125

# Must be a valid Cloud Storage URL, beginning with `gs://`.

126

"serviceAccountEmail": "A String", # The email address of the service account to run the job as.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

127

"kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.

128

# Key format is:

129

# projects/<project>/locations/<location>/keyRings/<keyring>/cryptoKeys/<key>

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

130

"maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made

131

# available to your pipeline during execution, from 1 to 1000.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

132

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

133

# the form "regions/REGION/subnetworks/SUBNETWORK".

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

134

"workerZone": "A String", # The Compute Engine zone

135

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

136

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

137

# with worker_region. If neither worker_region nor worker_zone is specified,

138

# a zone in the control plane's region is chosen based on available capacity.

139

# If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.

Jon Wayne Parrott

692617a

2017-01-06 09:58:29 -0800

[diff] [blame]

140

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

141

"gcsPath": "A String", # Required. A Cloud Storage path to the template from which to

142

# create the job.

143

# Must be a valid Cloud Storage URL, beginning with `gs://`.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

144

"location": "A String", # The [regional endpoint]

145

# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to

146

# which to direct the request.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

147

"parameters": { # The runtime parameters to pass to the job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

148

"a_key": "A String",

149

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

150

"jobName": "A String", # Required. The job name to use for the created job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

151

}

152

153

x__xgafv: string, V1 error format.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

154

Allowed values

155

1 - v1 error format

156

2 - v2 error format

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

157

158

Returns:

159

An object of the form:

160

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

161

{ # Defines a job to be run by the Cloud Dataflow service.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

162

"labels": { # User-defined labels for this job.

163

#

164

# The labels map can contain no more than 64 entries. Entries of the labels

165

# map are UTF8 strings that comply with the following restrictions:

166

#

167

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

168

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

169

# * Both keys and values are additionally constrained to be <= 128 bytes in

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

# size.

"a_key": "A String",

},

"jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs

174

# by the metadata values provided here. Populated for ListJobs and all GetJob

175

# views SUMMARY and higher.

176

# ListJob response and Job SUMMARY view.

177

"sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.

178

"versionDisplayName": "A String", # A readable string describing the version of the SDK.

179

"version": "A String", # The version of the SDK used to run the job.

180

"sdkSupportStatus": "A String", # The support status for this SDK version.

181

},

182

"pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.

183

{ # Metadata for a PubSub connector used by the job.

184

"topic": "A String", # Topic accessed in the connection.

185

"subscription": "A String", # Subscription used in the connection.

186

},

187

],

188

"datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.

189

{ # Metadata for a Datastore connector used by the job.

190

"projectId": "A String", # ProjectId accessed in the connection.

191

"namespace": "A String", # Namespace used in the connection.

192

},

193

],

194

"fileDetails": [ # Identification of a File source used in the Dataflow job.

195

{ # Metadata for a File connector used by the job.

196

"filePattern": "A String", # File Pattern used to access files by the connector.

197

},

198

],

199

"spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.

200

{ # Metadata for a Spanner connector used by the job.

201

"instanceId": "A String", # InstanceId accessed in the connection.

202

"projectId": "A String", # ProjectId accessed in the connection.

203

"databaseId": "A String", # DatabaseId accessed in the connection.

204

},

205

],

206

"bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.

207

{ # Metadata for a BigTable connector used by the job.

208

"instanceId": "A String", # InstanceId accessed in the connection.

209

"projectId": "A String", # ProjectId accessed in the connection.

210

"tableId": "A String", # TableId accessed in the connection.

211

},

212

],

213

"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.

214

{ # Metadata for a BigQuery connector used by the job.

215

"projectId": "A String", # Project accessed in the connection.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

216

"query": "A String", # Query used to access data in the connection.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

217

"table": "A String", # Table accessed in the connection.

218

"dataset": "A String", # Dataset accessed in the connection.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

223

# A description of the user pipeline and stages through which it is executed.

224

# Created by Cloud Dataflow service. Only retrieved with

225

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

226

# form. This data is provided by the Dataflow service for ease of visualizing

227

# the pipeline and interpreting Dataflow provided metrics.

228

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

229

{ # Description of the type, names/ids, and input/outputs for a transform.

230

"kind": "A String", # Type of transform.

231

"name": "A String", # User provided name for this transform instance.

232

"inputCollectionName": [ # User names for all collection inputs to this transform.

233

"A String",

234

],

235

"displayData": [ # Transform-specific display data.

236

{ # Data provided with a pipeline or transform to provide descriptive info.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

237

"key": "A String", # The key identifying the display data.

238

# This is intended to be used as a label for the display data

239

# when viewed in a dax monitoring system.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

240

"shortStrValue": "A String", # A possible additional shorter value to display.

241

# For example a java_class_name_value of com.mypackage.MyDoFn

242

# will be stored with MyDoFn as the short_str_value and

243

# com.mypackage.MyDoFn as the java_class_name value.

244

# short_str_value can be displayed and java_class_name_value

245

# will be displayed as a tooltip.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

246

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

247

"url": "A String", # An optional full URL.

248

"floatValue": 3.14, # Contains value if the data is of float type.

249

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

250

# language namespace (i.e. python module) which defines the display data.

251

# This allows a dax monitoring system to specially handle the data

252

# and perform custom rendering.

253

"javaClassValue": "A String", # Contains value if the data is of java class type.

254

"label": "A String", # An optional label to display in a dax UI for the element.

255

"boolValue": True or False, # Contains value if the data is of a boolean type.

256

"strValue": "A String", # Contains value if the data is of string type.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

257

"durationValue": "A String", # Contains value if the data is of duration type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

258

"int64Value": "A String", # Contains value if the data is of int64 type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

259

},

260

],

261

"outputCollectionName": [ # User names for all collection outputs to this transform.

262

"A String",

263

],

264

"id": "A String", # SDK generated id of this transform instance.

265

},

266

],

267

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

268

{ # Description of the composing transforms, names/ids, and input/outputs of a

269

# stage of execution. Some composing transforms and sources may have been

270

# generated by the Dataflow service during execution planning.

271

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

272

{ # Description of an interstitial value between transforms in an execution

273

# stage.

274

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

275

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

276

# source is most closely associated.

277

"name": "A String", # Dataflow service generated name for this source.

278

},

279

],

280

"kind": "A String", # Type of tranform this stage is executing.

281

"name": "A String", # Dataflow service generated name for this stage.

282

"outputSource": [ # Output sources for this stage.

283

{ # Description of an input or output of an execution stage.

284

"userName": "A String", # Human-readable name for this source; may be user or system generated.

285

"sizeBytes": "A String", # Size of the source, if measurable.

286

"name": "A String", # Dataflow service generated name for this source.

287

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

288

# source is most closely associated.

289

},

290

],

291

"inputSource": [ # Input sources for this stage.

292

{ # Description of an input or output of an execution stage.

293

"userName": "A String", # Human-readable name for this source; may be user or system generated.

294

"sizeBytes": "A String", # Size of the source, if measurable.

295

"name": "A String", # Dataflow service generated name for this source.

296

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

297

# source is most closely associated.

298

},

299

],

300

"componentTransform": [ # Transforms that comprise this execution stage.

301

{ # Description of a transform executed as part of an execution stage.

302

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

303

"originalTransform": "A String", # User name for the original user transform with which this transform is

304

# most closely associated.

305

"name": "A String", # Dataflow service generated name for this source.

306

},

307

],

308

"id": "A String", # Dataflow service generated id for this stage.

309

},

310

],

311

"displayData": [ # Pipeline level display data.

312

{ # Data provided with a pipeline or transform to provide descriptive info.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

313

"key": "A String", # The key identifying the display data.

314

# This is intended to be used as a label for the display data

315

# when viewed in a dax monitoring system.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

316

"shortStrValue": "A String", # A possible additional shorter value to display.

317

# For example a java_class_name_value of com.mypackage.MyDoFn

318

# will be stored with MyDoFn as the short_str_value and

319

# com.mypackage.MyDoFn as the java_class_name value.

320

# short_str_value can be displayed and java_class_name_value

321

# will be displayed as a tooltip.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

322

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

323

"url": "A String", # An optional full URL.

324

"floatValue": 3.14, # Contains value if the data is of float type.

325

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

326

# language namespace (i.e. python module) which defines the display data.

327

# This allows a dax monitoring system to specially handle the data

328

# and perform custom rendering.

329

"javaClassValue": "A String", # Contains value if the data is of java class type.

330

"label": "A String", # An optional label to display in a dax UI for the element.

331

"boolValue": True or False, # Contains value if the data is of a boolean type.

332

"strValue": "A String", # Contains value if the data is of string type.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

333

"durationValue": "A String", # Contains value if the data is of duration type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

334

"int64Value": "A String", # Contains value if the data is of int64 type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

339

# callers cannot mutate it.

340

{ # A message describing the state of a particular execution stage.

341

"executionStageName": "A String", # The name of the execution stage.

342

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

343

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

344

},

345

],

346

"id": "A String", # The unique ID of this job.

347

#

348

# This field is set by the Cloud Dataflow service when the Job is

349

# created, and is immutable for the life of the job.

350

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

351

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

352

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

353

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

354

# corresponding name prefixes of the new job.

355

"a_key": "A String",

356

},

357

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

358

"workerRegion": "A String", # The Compute Engine region

359

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

360

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

361

# with worker_zone. If neither worker_region nor worker_zone is specified,

362

# default to the control plane's region.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

363

"version": { # A structure describing which components and their versions of the service

364

# are required in order to run the job.

365

"a_key": "", # Properties of the object.

366

},

367

"flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.

368

"serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data

369

# at rest, AKA a Customer Managed Encryption Key (CMEK).

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

370

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

371

# Format:

372

# projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY

373

"internalExperiments": { # Experimental settings.

374

"a_key": "", # Properties of the object. Contains field @type with type URL.

375

},

376

"dataset": "A String", # The dataset for the current project where various workflow

377

# related tables are stored.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

378

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

379

# The supported resource type is:

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

380

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

381

# Google BigQuery:

382

# bigquery.googleapis.com/{dataset}

383

"experiments": [ # The list of experiments to enable.

384

"A String",

385

],

386

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

387

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

388

# options are passed through the service and are used to recreate the

389

# SDK pipeline options on the worker in a language agnostic and platform

390

# independent way.

391

"a_key": "", # Properties of the object.

392

},

393

"userAgent": { # A description of the process that generated the request.

394

"a_key": "", # Properties of the object.

395

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

396

"workerZone": "A String", # The Compute Engine zone

397

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

398

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

399

# with worker_region. If neither worker_region nor worker_zone is specified,

400

# a zone in the control plane's region is chosen based on available capacity.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

401

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

402

# specified in order for the job to have workers.

403

{ # Describes one particular pool of Cloud Dataflow workers to be

404

# instantiated by the Cloud Dataflow service in order to perform the

405

# computations required by a job. Note that a workflow job may use

406

# multiple pools, in order to match the various computational

407

# requirements of the various stages of the job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

408

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

409

# harness, residing in Google Container Registry.

410

#

411

# Deprecated for the Fn API path. Use sdk_harness_container_images instead.

412

"ipConfiguration": "A String", # Configuration for VM IPs.

413

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

414

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

415

"algorithm": "A String", # The algorithm to use for autoscaling.

416

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

417

"diskSourceImage": "A String", # Fully qualified source image for disks.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

418

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

419

# the service will use the network "default".

420

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

421

# will attempt to choose a reasonable default.

422

"metadata": { # Metadata to set on the Google Compute Engine VMs.

423

"a_key": "A String",

424

},

425

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

426

# service will attempt to choose a reasonable default.

427

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

428

# Compute Engine API.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

429

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

430

# using the standard Dataflow task runner. Users should ignore

431

# this field.

432

"workflowFileName": "A String", # The file to store the workflow in.

433

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

434

# will not be uploaded.

435

#

436

# The supported resource type is:

437

#

438

# Google Cloud Storage:

439

# storage.googleapis.com/{bucket}/{object}

440

# bucket.storage.googleapis.com/{object}

441

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

442

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

443

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

444

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

445

"vmId": "A String", # The ID string of the VM.

446

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

447

# taskrunner; e.g. "wheel".

448

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

449

# taskrunner; e.g. "root".

450

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

451

# access the Cloud Dataflow API.

452

"A String",

453

],

454

"languageHint": "A String", # The suggested backend language.

455

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

456

# console.

457

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

458

"logDir": "A String", # The directory on the VM to store logs.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

459

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

460

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

461

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

462

# "shuffle/v1beta1".

463

"workerId": "A String", # The ID of the worker running this pipeline.

464

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

465

#

466

# When workers access Google Cloud APIs, they logically do so via

467

# relative URLs. If this field is specified, it supplies the base

468

# URL to use for resolving these relative URLs. The normative

469

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

470

# Locators".

471

#

472

# If not specified, the default value is "http://www.googleapis.com/"

473

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

474

# "dataflow/v1b3/projects".

475

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

476

# storage.

477

#

478

# The supported resource type is:

479

#

480

# Google Cloud Storage:

481

#

482

# storage.googleapis.com/{bucket}/{object}

483

# bucket.storage.googleapis.com/{object}

484

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

485

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

486

"harnessCommand": "A String", # The command to launch the worker harness.

487

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

488

# temporary storage.

489

#

490

# The supported resource type is:

491

#

492

# Google Cloud Storage:

493

# storage.googleapis.com/{bucket}/{object}

494

# bucket.storage.googleapis.com/{object}

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

495

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

496

#

497

# When workers access Google Cloud APIs, they logically do so via

498

# relative URLs. If this field is specified, it supplies the base

499

# URL to use for resolving these relative URLs. The normative

500

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

501

# Locators".

502

#

503

# If not specified, the default value is "http://www.googleapis.com/"

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

504

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

505

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

506

# service will choose a number of threads (according to the number of cores

507

# on the selected machine type for batch, or 1 by convention for streaming).

508

"poolArgs": { # Extra arguments for this worker pool.

509

"a_key": "", # Properties of the object. Contains field @type with type URL.

510

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

511

"packages": [ # Packages to be installed on workers.

512

{ # The packages that must be installed in order for a worker to run the

513

# steps of the Cloud Dataflow job that will be assigned to its worker

514

# pool.

515

#

516

# This is the mechanism by which the Cloud Dataflow SDK causes code to

517

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

518

# might use this to install jars containing the user's code and all of the

519

# various dependencies (libraries, data files, etc.) required in order

520

# for that code to run.

521

"location": "A String", # The resource to read the package from. The supported resource type is:

522

#

523

# Google Cloud Storage:

524

#

525

# storage.googleapis.com/{bucket}

526

# bucket.storage.googleapis.com/

527

"name": "A String", # The name of the package.

528

},

529

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

530

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

531

# select a default set of packages which are useful to worker

532

# harnesses written in a particular language.

533

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

534

# are supported.

535

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

536

# attempt to choose a reasonable default.

537

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

538

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

539

# `TEARDOWN_NEVER`.

540

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

541

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

542

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

543

# down.

544

#

545

# If the workers are not torn down by the service, they will

546

# continue to run and use Google Compute Engine VM resources in the

547

# user's project until they are explicitly terminated by the user.

548

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

549

# policy except for small, manually supervised test jobs.

550

#

551

# If unknown or unspecified, the service will attempt to choose a reasonable

552

# default.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

553

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

554

# attempt to choose a reasonable default.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

555

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

556

# execute the job. If zero or unspecified, the service will

557

# attempt to choose a reasonable default.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

558

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

559

# the form "regions/REGION/subnetworks/SUBNETWORK".

560

"dataDisks": [ # Data disks that are used by a VM in this workflow.

561

{ # Describes the data disk used by a workflow job.

562

"mountPoint": "A String", # Directory in a VM where disk is mounted.

563

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

564

# attempt to choose a reasonable default.

565

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

566

# must be a disk type appropriate to the project and zone in which

567

# the workers will run. If unknown or unspecified, the service

568

# will attempt to choose a reasonable default.

569

#

570

# For example, the standard persistent disk type is a resource name

571

# typically ending in "pd-standard". If SSD persistent disks are

572

# available, the resource name typically ends with "pd-ssd". The

573

# actual valid values are defined the Google Compute Engine API,

574

# not by the Cloud Dataflow API; consult the Google Compute Engine

575

# documentation for more information about determining the set of

576

# available disk types for a particular project and zone.

577

#

578

# Google Compute Engine Disk types are local to a particular

579

# project in a particular zone, and so the resource name will

580

# typically look something like this:

581

#

582

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

583

},

584

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

585

"sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will

586

# only be set in the Fn API path. For non-cross-language pipelines this

587

# should have only one entry. Cross-language pipelines will have two or more

588

# entries.

589

{ # Defines a SDK harness container for executing Dataflow pipelines.

590

"containerImage": "A String", # A docker container image that resides in Google Container Registry.

591

"useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK

592

# container instance with this image. If false (or unset) recommends using

593

# more than one core per SDK container instance with this image for

594

# efficiency. Note that Dataflow service may choose to override this property

595

# if needed.

596

},

597

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

598

},

599

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

600

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

601

# unspecified, the service will attempt to choose a reasonable

602

# default. This should be in the form of the API service name,

603

# e.g. "compute.googleapis.com".

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

604

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

605

# storage. The system will append the suffix "/temp-{JOBNAME} to

606

# this resource prefix, where {JOBNAME} is the value of the

607

# job_name field. The resulting bucket and object prefix is used

608

# as the prefix of the resources used to store temporary data

609

# needed during the job execution. NOTE: This will override the

610

# value in taskrunner_settings.

611

# The supported resource type is:

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

612

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

613

# Google Cloud Storage:

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

614

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

615

# storage.googleapis.com/{bucket}/{object}

616

# bucket.storage.googleapis.com/{object}

617

},

618

"location": "A String", # The [regional endpoint]

619

# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that

620

# contains this job.

621

"tempFiles": [ # A set of files the system should be aware of that are used

622

# for temporary storage. These temporary files will be

623

# removed on job completion.

624

# No duplicates are allowed.

625

# No file patterns are supported.

626

#

627

# The supported files are:

628

#

629

# Google Cloud Storage:

630

#

631

# storage.googleapis.com/{bucket}/{object}

632

# bucket.storage.googleapis.com/{object}

633

"A String",

634

],

635

"type": "A String", # The type of Cloud Dataflow job.

636

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

637

# If this field is set, the service will ensure its uniqueness.

638

# The request to create a job will fail if the service has knowledge of a

639

# previously submitted job with the same client's ID and job name.

640

# The caller may use this field to ensure idempotence of job

641

# creation across retried attempts to create a job.

642

# By default, the field is empty and, in that case, the service ignores it.

643

"createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given

644

# snapshot.

645

"stepsLocation": "A String", # The GCS location where the steps are stored.

646

"currentStateTime": "A String", # The timestamp associated with the current state.

647

"startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).

648

# Flexible resource scheduling jobs are started with some delay after job

649

# creation, so start_time is unset before start and is updated when the

650

# job is started by the Cloud Dataflow service. For other jobs, start_time

651

# always equals to create_time and is immutable and set by the Cloud Dataflow

652

# service.

653

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

654

# Cloud Dataflow service.

655

"requestedState": "A String", # The job's requested state.

656

#

657

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

658

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

659

# also be used to directly set a job's requested state to

660

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

661

# job if it has not already reached a terminal state.

662

"name": "A String", # The user-specified Cloud Dataflow job name.

663

#

664

# Only one Job with a given name may exist in a project at any

665

# given time. If a caller attempts to create a Job with the same

666

# name as an already-existing Job, the attempt returns the

667

# existing Job.

668

#

669

# The name must match the regular expression

670

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

671

"steps": [ # Exactly one of step or steps_location should be specified.

672

#

673

# The top-level steps that constitute the entire job.

674

{ # Defines a particular step within a Cloud Dataflow job.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

675

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

676

# A job consists of multiple steps, each of which performs some

677

# specific operation as part of the overall job. Data is typically

678

# passed from one step to another as part of the job.

679

#

680

# Here's an example of a sequence of steps which together implement a

681

# Map-Reduce job:

682

#

683

# * Read a collection of data from some source, parsing the

684

# collection's elements.

685

#

686

# * Validate the elements.

687

#

688

# * Apply a user-defined function to map each element to some value

689

# and extract an element-specific key value.

690

#

691

# * Group elements with the same key into a single element with

692

# that key, transforming a multiply-keyed collection into a

693

# uniquely-keyed collection.

694

#

695

# * Write the elements out to some data sink.

696

#

697

# Note that the Cloud Dataflow service may be used to run many different

698

# types of jobs, not just Map-Reduce.

699

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

700

"name": "A String", # The name that identifies the step. This must be unique for each

701

# step with respect to all other steps in the Cloud Dataflow job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

702

"properties": { # Named properties associated with the step. Each kind of

703

# predefined step has its own required set of properties.

704

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

705

"a_key": "", # Properties of the object.

706

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

707

},

708

],

709

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

710

# of the job it replaced.

711

#

712

# When sending a `CreateJobRequest`, you can update a job by specifying it

713

# here. The job named here is stopped, and its intermediate state is

714

# transferred to this job.

715

"currentState": "A String", # The current state of the job.

716

#

717

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

718

# specified.

719

#

720

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

721

# terminal state. After a job has reached a terminal state, no

722

# further state updates may be made.

723

#

724

# This field may be mutated by the Cloud Dataflow service;

725

# callers cannot mutate it.

726

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

727

# isn't contained in the submitted job.

728

"stages": { # A mapping from each stage to the information about that stage.

729

"a_key": { # Contains information about how a particular

730

# google.dataflow.v1beta3.Step will be executed.

731

"stepName": [ # The steps associated with the execution stage.

732

# Note that stages may have several steps, and that a given step

733

# might be run by more than one stage.

"A String",

],

},

},

},

}</pre>

</div>

<code class="details" id="get">get(projectId, gcsPath=None, location=None, x__xgafv=None, view=None)</code>

744

<pre>Get the template associated with a template.

745

746

Args:

747

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

748

gcsPath: string, Required. A Cloud Storage path to the template from which to

749

create the job.

750

Must be valid Cloud Storage URL, beginning with 'gs://'.

751

location: string, The [regional endpoint]

752

(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to

753

which to direct the request.

754

x__xgafv: string, V1 error format.

Allowed values

1 - v1 error format

2 - v2 error format

view: string, The view to retrieve. Defaults to METADATA_ONLY.

759

760

Returns:

761

An object of the form:

762

763

{ # The response to a GetTemplate request.

764

"status": { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the

765

# request will be indicated in the error_details.

766

# different programming environments, including REST APIs and RPC APIs. It is

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

767

# used by [gRPC](https://github.com/grpc). Each `Status` message contains

768

# three pieces of data: error code, error message, and error details.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

769

#

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

770

# You can find out more about this error model and how to work with it in the

771

# [API Design Guide](https://cloud.google.com/apis/design/errors).

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

772

"message": "A String", # A developer-facing error message, which should be in English. Any

773

# user-facing error message should be localized and sent in the

774

# google.rpc.Status.details field, or localized by the client.

775

"code": 42, # The status code, which should be an enum value of google.rpc.Code.

776

"details": [ # A list of messages that carry the error details. There is a common set of

777

# message types for APIs to use.

778

{

779

"a_key": "", # Properties of the object. Contains field @type with type URL.

780

},

781

],

782

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

783

"templateType": "A String", # Template Type.

784

"runtimeMetadata": { # RuntimeMetadata describing a runtime environment. # Describes the runtime metadata with SDKInfo and available parameters.

785

"parameters": [ # The parameters for the template.

786

{ # Metadata for a specific parameter.

787

"name": "A String", # Required. The name of the parameter.

788

"isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.

789

"regexes": [ # Optional. Regexes that the parameter must match.

790

"A String",

791

],

792

"label": "A String", # Required. The label to display for the parameter.

793

"helpText": "A String", # Required. The help text to display for the parameter.

794

"paramType": "A String", # Optional. The type of the parameter.

795

# Used for selecting input picker.

796

},

797

],

798

"sdkInfo": { # SDK Information. # SDK Info for the template.

799

"version": "A String", # Optional. The SDK version.

800

"language": "A String", # Required. The SDK Language.

801

},

802

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

803

"metadata": { # Metadata describing a template. # The template metadata describing the template name, available

804

# parameters, etc.

805

"name": "A String", # Required. The name of the template.

806

"parameters": [ # The parameters for the template.

807

{ # Metadata for a specific parameter.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

808

"name": "A String", # Required. The name of the parameter.

809

"isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

810

"regexes": [ # Optional. Regexes that the parameter must match.

811

"A String",

812

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

813

"label": "A String", # Required. The label to display for the parameter.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

814

"helpText": "A String", # Required. The help text to display for the parameter.

815

"paramType": "A String", # Optional. The type of the parameter.

816

# Used for selecting input picker.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

817

},

818

],

819

"description": "A String", # Optional. A description of the template.

},

}</pre>

</div>

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

825

<code class="details" id="launch">launch(projectId, body=None, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, location=None, gcsPath=None, validateOnly=None)</code>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

826

<pre>Launch a template.

827

828

Args:

829

projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

830

body: object, The request body.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

831

The object takes the form of:

832

833

{ # Parameters to provide to the template being launched.

834

"environment": { # The environment values to set at runtime. # The runtime environment for the job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

835

"workerRegion": "A String", # The Compute Engine region

836

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

837

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

838

# with worker_zone. If neither worker_region nor worker_zone is specified,

839

# default to the control plane's region.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

840

"machineType": "A String", # The machine type to use for the job. Defaults to the value from the

841

# template if not specified.

842

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

843

# the service will use the network "default".

844

"zone": "A String", # The Compute Engine [availability

845

# zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)

846

# for launching worker instances to run your pipeline.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

847

# In the future, worker_zone will take precedence.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

848

"additionalUserLabels": { # Additional user labels to be specified for the job.

849

# Keys and values should follow the restrictions specified in the [labeling

850

# restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)

851

# page.

852

"a_key": "A String",

853

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

854

"numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

855

"additionalExperiments": [ # Additional experiment flags for the job.

856

"A String",

857

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

858

"ipConfiguration": "A String", # Configuration for VM IPs.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

859

"bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.

860

# Use with caution.

861

"tempLocation": "A String", # The Cloud Storage path to use for temporary files.

862

# Must be a valid Cloud Storage URL, beginning with `gs://`.

863

"serviceAccountEmail": "A String", # The email address of the service account to run the job as.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

864

"kmsKeyName": "A String", # Optional. Name for the Cloud KMS key for the job.

865

# Key format is:

866

# projects/<project>/locations/<location>/keyRings/<keyring>/cryptoKeys/<key>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

867

"maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made

868

# available to your pipeline during execution, from 1 to 1000.

869

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

870

# the form "regions/REGION/subnetworks/SUBNETWORK".

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

871

"workerZone": "A String", # The Compute Engine zone

872

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

873

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

874

# with worker_region. If neither worker_region nor worker_zone is specified,

875

# a zone in the control plane's region is chosen based on available capacity.

876

# If both `worker_zone` and `zone` are set, `worker_zone` takes precedence.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

877

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

878

"transformNameMapping": { # Only applicable when updating a pipeline. Map of transform name prefixes of

879

# the job to be replaced to the corresponding name prefixes of the new job.

880

"a_key": "A String",

881

},

882

"update": True or False, # If set, replace the existing pipeline with the name specified by jobName

883

# with this pipeline, preserving state.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

884

"parameters": { # The runtime parameters to pass to the job.

885

"a_key": "A String",

886

},

887

"jobName": "A String", # Required. The job name to use for the created job.

888

}

889

890

dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.

891

The file must be a Json serialized DynamicTemplateFieSpec object.

892

x__xgafv: string, V1 error format.

Allowed values

1 - v1 error format

2 - v2 error format

dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.

897

Must be a valid Cloud Storage URL, beginning with `gs://`.

898

location: string, The [regional endpoint]

899

(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to

900

which to direct the request.

901

gcsPath: string, A Cloud Storage path to the template from which to create

902

the job.

903

Must be valid Cloud Storage URL, beginning with 'gs://'.

904

validateOnly: boolean, If true, the request is validated but not actually executed.

Defaults to false.

Returns:

An object of the form:

909

910

{ # Response to the request to launch a template.

911

"job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and

912

# the job was successfully launched.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

913

"labels": { # User-defined labels for this job.

914

#

915

# The labels map can contain no more than 64 entries. Entries of the labels

916

# map are UTF8 strings that comply with the following restrictions:

917

#

918

# * Keys must conform to regexp: \p{Ll}\p{Lo}{0,62}

919

# * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

920

# * Both keys and values are additionally constrained to be <= 128 bytes in

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

921

# size.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

922

"a_key": "A String",

923

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

924

"jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs

925

# by the metadata values provided here. Populated for ListJobs and all GetJob

926

# views SUMMARY and higher.

927

# ListJob response and Job SUMMARY view.

928

"sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.

929

"versionDisplayName": "A String", # A readable string describing the version of the SDK.

930

"version": "A String", # The version of the SDK used to run the job.

931

"sdkSupportStatus": "A String", # The support status for this SDK version.

932

},

933

"pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.

934

{ # Metadata for a PubSub connector used by the job.

935

"topic": "A String", # Topic accessed in the connection.

936

"subscription": "A String", # Subscription used in the connection.

937

},

938

],

939

"datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.

940

{ # Metadata for a Datastore connector used by the job.

941

"projectId": "A String", # ProjectId accessed in the connection.

942

"namespace": "A String", # Namespace used in the connection.

943

},

944

],

945

"fileDetails": [ # Identification of a File source used in the Dataflow job.

946

{ # Metadata for a File connector used by the job.

947

"filePattern": "A String", # File Pattern used to access files by the connector.

948

},

949

],

950

"spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.

951

{ # Metadata for a Spanner connector used by the job.

952

"instanceId": "A String", # InstanceId accessed in the connection.

953

"projectId": "A String", # ProjectId accessed in the connection.

954

"databaseId": "A String", # DatabaseId accessed in the connection.

955

},

956

],

957

"bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.

958

{ # Metadata for a BigTable connector used by the job.

959

"instanceId": "A String", # InstanceId accessed in the connection.

960

"projectId": "A String", # ProjectId accessed in the connection.

961

"tableId": "A String", # TableId accessed in the connection.

962

},

963

],

964

"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.

965

{ # Metadata for a BigQuery connector used by the job.

966

"projectId": "A String", # Project accessed in the connection.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

967

"query": "A String", # Query used to access data in the connection.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

968

"table": "A String", # Table accessed in the connection.

969

"dataset": "A String", # Dataset accessed in the connection.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

970

},

971

],

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

972

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

973

"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.

974

# A description of the user pipeline and stages through which it is executed.

975

# Created by Cloud Dataflow service. Only retrieved with

976

# JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.

977

# form. This data is provided by the Dataflow service for ease of visualizing

978

# the pipeline and interpreting Dataflow provided metrics.

979

"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.

980

{ # Description of the type, names/ids, and input/outputs for a transform.

981

"kind": "A String", # Type of transform.

982

"name": "A String", # User provided name for this transform instance.

983

"inputCollectionName": [ # User names for all collection inputs to this transform.

984

"A String",

985

],

986

"displayData": [ # Transform-specific display data.

987

{ # Data provided with a pipeline or transform to provide descriptive info.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

988

"key": "A String", # The key identifying the display data.

989

# This is intended to be used as a label for the display data

990

# when viewed in a dax monitoring system.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

991

"shortStrValue": "A String", # A possible additional shorter value to display.

992

# For example a java_class_name_value of com.mypackage.MyDoFn

993

# will be stored with MyDoFn as the short_str_value and

994

# com.mypackage.MyDoFn as the java_class_name value.

995

# short_str_value can be displayed and java_class_name_value

996

# will be displayed as a tooltip.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

997

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

998

"url": "A String", # An optional full URL.

999

"floatValue": 3.14, # Contains value if the data is of float type.

1000

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1001

# language namespace (i.e. python module) which defines the display data.

1002

# This allows a dax monitoring system to specially handle the data

1003

# and perform custom rendering.

1004

"javaClassValue": "A String", # Contains value if the data is of java class type.

1005

"label": "A String", # An optional label to display in a dax UI for the element.

1006

"boolValue": True or False, # Contains value if the data is of a boolean type.

1007

"strValue": "A String", # Contains value if the data is of string type.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1008

"durationValue": "A String", # Contains value if the data is of duration type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1009

"int64Value": "A String", # Contains value if the data is of int64 type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1010

},

1011

],

1012

"outputCollectionName": [ # User names for all collection outputs to this transform.

1013

"A String",

1014

],

1015

"id": "A String", # SDK generated id of this transform instance.

1016

},

1017

],

1018

"executionPipelineStage": [ # Description of each stage of execution of the pipeline.

1019

{ # Description of the composing transforms, names/ids, and input/outputs of a

1020

# stage of execution. Some composing transforms and sources may have been

1021

# generated by the Dataflow service during execution planning.

1022

"componentSource": [ # Collections produced and consumed by component transforms of this stage.

1023

{ # Description of an interstitial value between transforms in an execution

1024

# stage.

1025

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1026

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1027

# source is most closely associated.

1028

"name": "A String", # Dataflow service generated name for this source.

1029

},

1030

],

1031

"kind": "A String", # Type of tranform this stage is executing.

1032

"name": "A String", # Dataflow service generated name for this stage.

1033

"outputSource": [ # Output sources for this stage.

1034

{ # Description of an input or output of an execution stage.

1035

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1036

"sizeBytes": "A String", # Size of the source, if measurable.

1037

"name": "A String", # Dataflow service generated name for this source.

1038

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1039

# source is most closely associated.

1040

},

1041

],

1042

"inputSource": [ # Input sources for this stage.

1043

{ # Description of an input or output of an execution stage.

1044

"userName": "A String", # Human-readable name for this source; may be user or system generated.

1045

"sizeBytes": "A String", # Size of the source, if measurable.

1046

"name": "A String", # Dataflow service generated name for this source.

1047

"originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this

1048

# source is most closely associated.

1049

},

1050

],

1051

"componentTransform": [ # Transforms that comprise this execution stage.

1052

{ # Description of a transform executed as part of an execution stage.

1053

"userName": "A String", # Human-readable name for this transform; may be user or system generated.

1054

"originalTransform": "A String", # User name for the original user transform with which this transform is

1055

# most closely associated.

1056

"name": "A String", # Dataflow service generated name for this source.

1057

},

1058

],

1059

"id": "A String", # Dataflow service generated id for this stage.

1060

},

1061

],

1062

"displayData": [ # Pipeline level display data.

1063

{ # Data provided with a pipeline or transform to provide descriptive info.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1064

"key": "A String", # The key identifying the display data.

1065

# This is intended to be used as a label for the display data

1066

# when viewed in a dax monitoring system.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1067

"shortStrValue": "A String", # A possible additional shorter value to display.

1068

# For example a java_class_name_value of com.mypackage.MyDoFn

1069

# will be stored with MyDoFn as the short_str_value and

1070

# com.mypackage.MyDoFn as the java_class_name value.

1071

# short_str_value can be displayed and java_class_name_value

1072

# will be displayed as a tooltip.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1073

"timestampValue": "A String", # Contains value if the data is of timestamp type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1074

"url": "A String", # An optional full URL.

1075

"floatValue": 3.14, # Contains value if the data is of float type.

1076

"namespace": "A String", # The namespace for the key. This is usually a class name or programming

1077

# language namespace (i.e. python module) which defines the display data.

1078

# This allows a dax monitoring system to specially handle the data

1079

# and perform custom rendering.

1080

"javaClassValue": "A String", # Contains value if the data is of java class type.

1081

"label": "A String", # An optional label to display in a dax UI for the element.

1082

"boolValue": True or False, # Contains value if the data is of a boolean type.

1083

"strValue": "A String", # Contains value if the data is of string type.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1084

"durationValue": "A String", # Contains value if the data is of duration type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1085

"int64Value": "A String", # Contains value if the data is of int64 type.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

"stageStates": [ # This field may be mutated by the Cloud Dataflow service;

1090

# callers cannot mutate it.

1091

{ # A message describing the state of a particular execution stage.

1092

"executionStageName": "A String", # The name of the execution stage.

1093

"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.

1094

"currentStateTime": "A String", # The time at which the stage transitioned to this state.

1095

},

1096

],

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1097

"id": "A String", # The unique ID of this job.

1098

#

1099

# This field is set by the Cloud Dataflow service when the Job is

1100

# created, and is immutable for the life of the job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1101

"replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in

1102

# `JOB_STATE_UPDATED`), this field contains the ID of that job.

1103

"projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.

1104

"transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the

1105

# corresponding name prefixes of the new job.

1106

"a_key": "A String",

1107

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1108

"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1109

"workerRegion": "A String", # The Compute Engine region

1110

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

1111

# which worker processing should occur, e.g. "us-west1". Mutually exclusive

1112

# with worker_zone. If neither worker_region nor worker_zone is specified,

1113

# default to the control plane's region.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1114

"version": { # A structure describing which components and their versions of the service

1115

# are required in order to run the job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1116

"a_key": "", # Properties of the object.

1117

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1118

"flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.

1119

"serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data

1120

# at rest, AKA a Customer Managed Encryption Key (CMEK).

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1121

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1122

# Format:

1123

# projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1124

"internalExperiments": { # Experimental settings.

1125

"a_key": "", # Properties of the object. Contains field @type with type URL.

1126

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1127

"dataset": "A String", # The dataset for the current project where various workflow

1128

# related tables are stored.

1129

#

1130

# The supported resource type is:

1131

#

1132

# Google BigQuery:

1133

# bigquery.googleapis.com/{dataset}

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1134

"experiments": [ # The list of experiments to enable.

1135

"A String",

1136

],

Sai Cheemalapati

ea3a5e1

2016-10-12 14:05:53 -0700

[diff] [blame]

1137

"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1138

"sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These

1139

# options are passed through the service and are used to recreate the

1140

# SDK pipeline options on the worker in a language agnostic and platform

1141

# independent way.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1142

"a_key": "", # Properties of the object.

1143

},

1144

"userAgent": { # A description of the process that generated the request.

1145

"a_key": "", # Properties of the object.

1146

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1147

"workerZone": "A String", # The Compute Engine zone

1148

# (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in

1149

# which worker processing should occur, e.g. "us-west1-a". Mutually exclusive

1150

# with worker_region. If neither worker_region nor worker_zone is specified,

1151

# a zone in the control plane's region is chosen based on available capacity.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1152

"workerPools": [ # The worker pools. At least one "harness" worker pool must be

1153

# specified in order for the job to have workers.

1154

{ # Describes one particular pool of Cloud Dataflow workers to be

1155

# instantiated by the Cloud Dataflow service in order to perform the

1156

# computations required by a job. Note that a workflow job may use

1157

# multiple pools, in order to match the various computational

1158

# requirements of the various stages of the job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1159

"workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker

1160

# harness, residing in Google Container Registry.

1161

#

1162

# Deprecated for the Fn API path. Use sdk_harness_container_images instead.

1163

"ipConfiguration": "A String", # Configuration for VM IPs.

1164

"autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.

1165

"maxNumWorkers": 42, # The maximum number of workers to cap scaling at.

1166

"algorithm": "A String", # The algorithm to use for autoscaling.

1167

},

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1168

"diskSourceImage": "A String", # Fully qualified source image for disks.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1169

"network": "A String", # Network to which VMs will be assigned. If empty or unspecified,

1170

# the service will use the network "default".

1171

"zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service

1172

# will attempt to choose a reasonable default.

1173

"metadata": { # Metadata to set on the Google Compute Engine VMs.

1174

"a_key": "A String",

1175

},

1176

"machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the

1177

# service will attempt to choose a reasonable default.

1178

"onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google

1179

# Compute Engine API.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1180

"taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when

1181

# using the standard Dataflow task runner. Users should ignore

1182

# this field.

1183

"workflowFileName": "A String", # The file to store the workflow in.

1184

"logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs

1185

# will not be uploaded.

1186

#

1187

# The supported resource type is:

1188

#

1189

# Google Cloud Storage:

1190

# storage.googleapis.com/{bucket}/{object}

1191

# bucket.storage.googleapis.com/{object}

Sai Cheemalapati

e833b79

2017-03-24 15:06:46 -0700

[diff] [blame]

1192

"commandlinesFileName": "A String", # The file to store preprocessing commands in.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1193

"alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.

1194

"continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.

1195

"baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.

1196

"vmId": "A String", # The ID string of the VM.

1197

"taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by

1198

# taskrunner; e.g. "wheel".

1199

"taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by

1200

# taskrunner; e.g. "root".

1201

"oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to

1202

# access the Cloud Dataflow API.

1203

"A String",

1204

],

1205

"languageHint": "A String", # The suggested backend language.

1206

"logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial

1207

# console.

1208

"streamingWorkerMainClass": "A String", # The streaming worker main class name.

1209

"logDir": "A String", # The directory on the VM to store logs.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1210

"parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.

1211

"reportingEnabled": True or False, # Whether to send work progress updates to the service.

1212

"shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,

1213

# "shuffle/v1beta1".

1214

"workerId": "A String", # The ID of the worker running this pipeline.

1215

"baseUrl": "A String", # The base URL for accessing Google Cloud APIs.

1216

#

1217

# When workers access Google Cloud APIs, they logically do so via

1218

# relative URLs. If this field is specified, it supplies the base

1219

# URL to use for resolving these relative URLs. The normative

1220

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1221

# Locators".

1222

#

1223

# If not specified, the default value is "http://www.googleapis.com/"

1224

"servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,

1225

# "dataflow/v1b3/projects".

1226

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1227

# storage.

1228

#

1229

# The supported resource type is:

1230

#

1231

# Google Cloud Storage:

1232

#

1233

# storage.googleapis.com/{bucket}/{object}

1234

# bucket.storage.googleapis.com/{object}

1235

},

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1236

"dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"

Sai Cheemalapati

e833b79

2017-03-24 15:06:46 -0700

[diff] [blame]

1237

"harnessCommand": "A String", # The command to launch the worker harness.

1238

"tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for

1239

# temporary storage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1240

#

Sai Cheemalapati

e833b79

2017-03-24 15:06:46 -0700

[diff] [blame]

1241

# The supported resource type is:

1242

#

1243

# Google Cloud Storage:

1244

# storage.googleapis.com/{bucket}/{object}

1245

# bucket.storage.googleapis.com/{object}

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1246

"baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.

1247

#

1248

# When workers access Google Cloud APIs, they logically do so via

1249

# relative URLs. If this field is specified, it supplies the base

1250

# URL to use for resolving these relative URLs. The normative

1251

# algorithm used is defined by RFC 1808, "Relative Uniform Resource

1252

# Locators".

1253

#

1254

# If not specified, the default value is "http://www.googleapis.com/"

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1255

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1256

"numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the

1257

# service will choose a number of threads (according to the number of cores

1258

# on the selected machine type for batch, or 1 by convention for streaming).

1259

"poolArgs": { # Extra arguments for this worker pool.

1260

"a_key": "", # Properties of the object. Contains field @type with type URL.

1261

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1262

"packages": [ # Packages to be installed on workers.

1263

{ # The packages that must be installed in order for a worker to run the

1264

# steps of the Cloud Dataflow job that will be assigned to its worker

1265

# pool.

1266

#

1267

# This is the mechanism by which the Cloud Dataflow SDK causes code to

1268

# be loaded onto the workers. For example, the Cloud Dataflow Java SDK

1269

# might use this to install jars containing the user's code and all of the

1270

# various dependencies (libraries, data files, etc.) required in order

1271

# for that code to run.

1272

"location": "A String", # The resource to read the package from. The supported resource type is:

1273

#

1274

# Google Cloud Storage:

1275

#

1276

# storage.googleapis.com/{bucket}

1277

# bucket.storage.googleapis.com/

1278

"name": "A String", # The name of the package.

1279

},

1280

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1281

"defaultPackageSet": "A String", # The default package set to install. This allows the service to

1282

# select a default set of packages which are useful to worker

1283

# harnesses written in a particular language.

1284

"kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`

1285

# are supported.

1286

"diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1287

# attempt to choose a reasonable default.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1288

"teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.

1289

# Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and

1290

# `TEARDOWN_NEVER`.

1291

# `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether

1292

# the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down

1293

# if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn

1294

# down.

1295

#

1296

# If the workers are not torn down by the service, they will

1297

# continue to run and use Google Compute Engine VM resources in the

1298

# user's project until they are explicitly terminated by the user.

1299

# Because of this, Google recommends using the `TEARDOWN_ALWAYS`

1300

# policy except for small, manually supervised test jobs.

1301

#

1302

# If unknown or unspecified, the service will attempt to choose a reasonable

1303

# default.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1304

"diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will

1305

# attempt to choose a reasonable default.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1306

"numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to

1307

# execute the job. If zero or unspecified, the service will

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1308

# attempt to choose a reasonable default.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1309

"subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of

1310

# the form "regions/REGION/subnetworks/SUBNETWORK".

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1311

"dataDisks": [ # Data disks that are used by a VM in this workflow.

1312

{ # Describes the data disk used by a workflow job.

1313

"mountPoint": "A String", # Directory in a VM where disk is mounted.

1314

"sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will

1315

# attempt to choose a reasonable default.

1316

"diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This

1317

# must be a disk type appropriate to the project and zone in which

1318

# the workers will run. If unknown or unspecified, the service

1319

# will attempt to choose a reasonable default.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1320

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1321

# For example, the standard persistent disk type is a resource name

1322

# typically ending in "pd-standard". If SSD persistent disks are

1323

# available, the resource name typically ends with "pd-ssd". The

1324

# actual valid values are defined the Google Compute Engine API,

1325

# not by the Cloud Dataflow API; consult the Google Compute Engine

1326

# documentation for more information about determining the set of

1327

# available disk types for a particular project and zone.

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1328

#

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1329

# Google Compute Engine Disk types are local to a particular

1330

# project in a particular zone, and so the resource name will

1331

# typically look something like this:

1332

#

1333

# compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard

Sai Cheemalapati

2017-06-06 18:46:08 -0400

[diff] [blame]

1334

},

1335

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1336

"sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will

1337

# only be set in the Fn API path. For non-cross-language pipelines this

1338

# should have only one entry. Cross-language pipelines will have two or more

1339

# entries.

1340

{ # Defines a SDK harness container for executing Dataflow pipelines.

1341

"containerImage": "A String", # A docker container image that resides in Google Container Registry.

1342

"useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK

1343

# container instance with this image. If false (or unset) recommends using

1344

# more than one core per SDK container instance with this image for

1345

# efficiency. Note that Dataflow service may choose to override this property

1346

# if needed.

1347

},

1348

],

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1349

},

1350

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1351

"clusterManagerApiService": "A String", # The type of cluster manager API to use. If unknown or

1352

# unspecified, the service will attempt to choose a reasonable

1353

# default. This should be in the form of the API service name,

1354

# e.g. "compute.googleapis.com".

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1355

"tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary

1356

# storage. The system will append the suffix "/temp-{JOBNAME} to

1357

# this resource prefix, where {JOBNAME} is the value of the

1358

# job_name field. The resulting bucket and object prefix is used

1359

# as the prefix of the resources used to store temporary data

1360

# needed during the job execution. NOTE: This will override the

1361

# value in taskrunner_settings.

1362

# The supported resource type is:

1363

#

1364

# Google Cloud Storage:

1365

#

1366

# storage.googleapis.com/{bucket}/{object}

1367

# bucket.storage.googleapis.com/{object}

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1368

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1369

"location": "A String", # The [regional endpoint]

1370

# (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that

1371

# contains this job.

1372

"tempFiles": [ # A set of files the system should be aware of that are used

1373

# for temporary storage. These temporary files will be

1374

# removed on job completion.

1375

# No duplicates are allowed.

1376

# No file patterns are supported.

1377

#

1378

# The supported files are:

1379

#

1380

# Google Cloud Storage:

1381

#

1382

# storage.googleapis.com/{bucket}/{object}

1383

# bucket.storage.googleapis.com/{object}

1384

"A String",

1385

],

1386

"type": "A String", # The type of Cloud Dataflow job.

1387

"clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.

1388

# If this field is set, the service will ensure its uniqueness.

1389

# The request to create a job will fail if the service has knowledge of a

1390

# previously submitted job with the same client's ID and job name.

1391

# The caller may use this field to ensure idempotence of job

1392

# creation across retried attempts to create a job.

1393

# By default, the field is empty and, in that case, the service ignores it.

1394

"createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given

1395

# snapshot.

1396

"stepsLocation": "A String", # The GCS location where the steps are stored.

1397

"currentStateTime": "A String", # The timestamp associated with the current state.

1398

"startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).

1399

# Flexible resource scheduling jobs are started with some delay after job

1400

# creation, so start_time is unset before start and is updated when the

1401

# job is started by the Cloud Dataflow service. For other jobs, start_time

1402

# always equals to create_time and is immutable and set by the Cloud Dataflow

1403

# service.

1404

"createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the

1405

# Cloud Dataflow service.

1406

"requestedState": "A String", # The job's requested state.

1407

#

1408

# `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and

1409

# `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may

1410

# also be used to directly set a job's requested state to

1411

# `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the

1412

# job if it has not already reached a terminal state.

1413

"name": "A String", # The user-specified Cloud Dataflow job name.

1414

#

1415

# Only one Job with a given name may exist in a project at any

1416

# given time. If a caller attempts to create a Job with the same

1417

# name as an already-existing Job, the attempt returns the

1418

# existing Job.

1419

#

1420

# The name must match the regular expression

1421

# `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`

1422

"steps": [ # Exactly one of step or steps_location should be specified.

1423

#

1424

# The top-level steps that constitute the entire job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1425

{ # Defines a particular step within a Cloud Dataflow job.

1426

#

1427

# A job consists of multiple steps, each of which performs some

1428

# specific operation as part of the overall job. Data is typically

1429

# passed from one step to another as part of the job.

1430

#

1431

# Here's an example of a sequence of steps which together implement a

1432

# Map-Reduce job:

1433

#

1434

# * Read a collection of data from some source, parsing the

1435

# collection's elements.

1436

#

1437

# * Validate the elements.

1438

#

1439

# * Apply a user-defined function to map each element to some value

1440

# and extract an element-specific key value.

1441

#

1442

# * Group elements with the same key into a single element with

1443

# that key, transforming a multiply-keyed collection into a

1444

# uniquely-keyed collection.

1445

#

1446

# * Write the elements out to some data sink.

1447

#

1448

# Note that the Cloud Dataflow service may be used to run many different

1449

# types of jobs, not just Map-Reduce.

1450

"kind": "A String", # The kind of step in the Cloud Dataflow job.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame^]

1451

"name": "A String", # The name that identifies the step. This must be unique for each

1452

# step with respect to all other steps in the Cloud Dataflow job.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1453

"properties": { # Named properties associated with the step. Each kind of

1454

# predefined step has its own required set of properties.

1455

# Must be provided on Create. Only retrieved with JOB_VIEW_ALL.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1456

"a_key": "", # Properties of the object.

1457

},

1458

},

1459

],

Thomas Coffee

2f24537

2017-03-27 10:39:26 -0700

[diff] [blame]

1460

"replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID

1461

# of the job it replaced.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1462

#

Thomas Coffee

2f24537

2017-03-27 10:39:26 -0700

[diff] [blame]

1463

# When sending a `CreateJobRequest`, you can update a job by specifying it

1464

# here. The job named here is stopped, and its intermediate state is

1465

# transferred to this job.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1466

"currentState": "A String", # The current state of the job.

1467

#

1468

# Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise

1469

# specified.

1470

#

1471

# A job in the `JOB_STATE_RUNNING` state may asynchronously enter a

1472

# terminal state. After a job has reached a terminal state, no

1473

# further state updates may be made.

1474

#

1475

# This field may be mutated by the Cloud Dataflow service;

1476

# callers cannot mutate it.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1477

"executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.

1478

# isn't contained in the submitted job.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

1479

"stages": { # A mapping from each stage to the information about that stage.

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1480

"a_key": { # Contains information about how a particular

1481

# google.dataflow.v1beta3.Step will be executed.

1482

"stepName": [ # The steps associated with the execution stage.

1483

# Note that stages may have several steps, and that a given step

1484

# might be run by more than one stage.

Jon Wayne Parrott

2016-08-16 12:44:29 -0700

[diff] [blame]

"A String",

],

},

},

},

Sai Cheemalapati

2017-03-13 12:12:03 -0400

[diff] [blame]

1490

},

Sai Cheemalapati