docs: update docs (#916)
* fix: re-run script
* test: fix noxfile
diff --git a/docs/dyn/dataflow_v1b3.projects.locations.flexTemplates.html b/docs/dyn/dataflow_v1b3.projects.locations.flexTemplates.html
index aaa07be..3aee8db 100644
--- a/docs/dyn/dataflow_v1b3.projects.locations.flexTemplates.html
+++ b/docs/dyn/dataflow_v1b3.projects.locations.flexTemplates.html
@@ -94,9 +94,14 @@
"validateOnly": True or False, # If true, the request is validated but not actually executed.
# Defaults to false.
"launchParameter": { # Launch FlexTemplate Parameter. # Required. Parameter to launch a job form Flex Template.
+ "containerSpecGcsPath": "A String", # Gcs path to a file with json serialized ContainerSpec as content.
+ "parameters": { # The parameters for FlexTemplate.
+ # Ex. {"num_workers":"5"}
+ "a_key": "A String",
+ },
+ "jobName": "A String", # Required. The job name to use for the created job.
"containerSpec": { # Container Spec. # Spec about the container image to launch.
"metadata": { # Metadata describing a template. # Metadata describing a template including description and validation rules.
- "name": "A String", # Required. The name of the template.
"parameters": [ # The parameters for the template.
{ # Metadata for a specific parameter.
"label": "A String", # Required. The label to display for the parameter.
@@ -110,6 +115,7 @@
"isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
},
],
+ "name": "A String", # Required. The name of the template.
"description": "A String", # Optional. A description of the template.
},
"sdkInfo": { # SDK Information. # Required. SDK info of the Flex Template.
@@ -118,12 +124,6 @@
},
"image": "A String", # Name of the docker container image. E.g., gcr.io/project/some-image
},
- "containerSpecGcsPath": "A String", # Gcs path to a file with json serialized ContainerSpec as content.
- "parameters": { # The parameters for FlexTemplate.
- # Ex. {"num_workers":"5"}
- "a_key": "A String",
- },
- "jobName": "A String", # Required. The job name to use for the created job.
},
}
@@ -155,219 +155,6 @@
"a_key": "A String",
},
"environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
- "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
- # options are passed through the service and are used to recreate the
- # SDK pipeline options on the worker in a language agnostic and platform
- # independent way.
- "a_key": "", # Properties of the object.
- },
- "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
- "workerPools": [ # The worker pools. At least one "harness" worker pool must be
- # specified in order for the job to have workers.
- { # Describes one particular pool of Cloud Dataflow workers to be
- # instantiated by the Cloud Dataflow service in order to perform the
- # computations required by a job. Note that a workflow job may use
- # multiple pools, in order to match the various computational
- # requirements of the various stages of the job.
- "defaultPackageSet": "A String", # The default package set to install. This allows the service to
- # select a default set of packages which are useful to worker
- # harnesses written in a particular language.
- "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
- # the service will use the network "default".
- "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
- # will attempt to choose a reasonable default.
- "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
- # execute the job. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
- "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
- # service will choose a number of threads (according to the number of cores
- # on the selected machine type for batch, or 1 by convention for streaming).
- "diskSourceImage": "A String", # Fully qualified source image for disks.
- "packages": [ # Packages to be installed on workers.
- { # The packages that must be installed in order for a worker to run the
- # steps of the Cloud Dataflow job that will be assigned to its worker
- # pool.
- #
- # This is the mechanism by which the Cloud Dataflow SDK causes code to
- # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
- # might use this to install jars containing the user's code and all of the
- # various dependencies (libraries, data files, etc.) required in order
- # for that code to run.
- "location": "A String", # The resource to read the package from. The supported resource type is:
- #
- # Google Cloud Storage:
- #
- # storage.googleapis.com/{bucket}
- # bucket.storage.googleapis.com/
- "name": "A String", # The name of the package.
- },
- ],
- "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
- # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
- # `TEARDOWN_NEVER`.
- # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
- # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
- # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
- # down.
- #
- # If the workers are not torn down by the service, they will
- # continue to run and use Google Compute Engine VM resources in the
- # user's project until they are explicitly terminated by the user.
- # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
- # policy except for small, manually supervised test jobs.
- #
- # If unknown or unspecified, the service will attempt to choose a reasonable
- # default.
- "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
- # Compute Engine API.
- "poolArgs": { # Extra arguments for this worker pool.
- "a_key": "", # Properties of the object. Contains field @type with type URL.
- },
- "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
- "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
- # harness, residing in Google Container Registry.
- #
- # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
- "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
- # attempt to choose a reasonable default.
- "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
- # service will attempt to choose a reasonable default.
- "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
- # are supported.
- "dataDisks": [ # Data disks that are used by a VM in this workflow.
- { # Describes the data disk used by a workflow job.
- "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
- # attempt to choose a reasonable default.
- "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
- # must be a disk type appropriate to the project and zone in which
- # the workers will run. If unknown or unspecified, the service
- # will attempt to choose a reasonable default.
- #
- # For example, the standard persistent disk type is a resource name
- # typically ending in "pd-standard". If SSD persistent disks are
- # available, the resource name typically ends with "pd-ssd". The
- # actual valid values are defined the Google Compute Engine API,
- # not by the Cloud Dataflow API; consult the Google Compute Engine
- # documentation for more information about determining the set of
- # available disk types for a particular project and zone.
- #
- # Google Compute Engine Disk types are local to a particular
- # project in a particular zone, and so the resource name will
- # typically look something like this:
- #
- # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
- "mountPoint": "A String", # Directory in a VM where disk is mounted.
- },
- ],
- "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
- # only be set in the Fn API path. For non-cross-language pipelines this
- # should have only one entry. Cross-language pipelines will have two or more
- # entries.
- { # Defines a SDK harness container for executing Dataflow pipelines.
- "containerImage": "A String", # A docker container image that resides in Google Container Registry.
- "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
- # container instance with this image. If false (or unset) recommends using
- # more than one core per SDK container instance with this image for
- # efficiency. Note that Dataflow service may choose to override this property
- # if needed.
- },
- ],
- "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
- # the form "regions/REGION/subnetworks/SUBNETWORK".
- "ipConfiguration": "A String", # Configuration for VM IPs.
- "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
- # using the standard Dataflow task runner. Users should ignore
- # this field.
- "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
- "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "wheel".
- "harnessCommand": "A String", # The command to launch the worker harness.
- "logDir": "A String", # The directory on the VM to store logs.
- "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
- # access the Cloud Dataflow API.
- "A String",
- ],
- "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
- "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
- # will not be uploaded.
- #
- # The supported resource type is:
- #
- # Google Cloud Storage:
- # storage.googleapis.com/{bucket}/{object}
- # bucket.storage.googleapis.com/{object}
- "streamingWorkerMainClass": "A String", # The streaming worker main class name.
- "workflowFileName": "A String", # The file to store the workflow in.
- "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
- "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
- # temporary storage.
- #
- # The supported resource type is:
- #
- # Google Cloud Storage:
- # storage.googleapis.com/{bucket}/{object}
- # bucket.storage.googleapis.com/{object}
- "commandlinesFileName": "A String", # The file to store preprocessing commands in.
- "languageHint": "A String", # The suggested backend language.
- "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
- #
- # When workers access Google Cloud APIs, they logically do so via
- # relative URLs. If this field is specified, it supplies the base
- # URL to use for resolving these relative URLs. The normative
- # algorithm used is defined by RFC 1808, "Relative Uniform Resource
- # Locators".
- #
- # If not specified, the default value is "http://www.googleapis.com/"
- "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
- # console.
- "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
- "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
- "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
- #
- # When workers access Google Cloud APIs, they logically do so via
- # relative URLs. If this field is specified, it supplies the base
- # URL to use for resolving these relative URLs. The normative
- # algorithm used is defined by RFC 1808, "Relative Uniform Resource
- # Locators".
- #
- # If not specified, the default value is "http://www.googleapis.com/"
- "reportingEnabled": True or False, # Whether to send work progress updates to the service.
- "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
- # "dataflow/v1b3/projects".
- "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
- # "shuffle/v1beta1".
- "workerId": "A String", # The ID of the worker running this pipeline.
- "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
- # storage.
- #
- # The supported resource type is:
- #
- # Google Cloud Storage:
- #
- # storage.googleapis.com/{bucket}/{object}
- # bucket.storage.googleapis.com/{object}
- },
- "vmId": "A String", # The ID string of the VM.
- "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
- # taskrunner; e.g. "root".
- },
- "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
- "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
- "algorithm": "A String", # The algorithm to use for autoscaling.
- },
- "metadata": { # Metadata to set on the Google Compute Engine VMs.
- "a_key": "A String",
- },
- },
- ],
- "dataset": "A String", # The dataset for the current project where various workflow
- # related tables are stored.
- #
- # The supported resource type is:
- #
- # Google BigQuery:
- # bigquery.googleapis.com/{dataset}
"internalExperiments": { # Experimental settings.
"a_key": "", # Properties of the object. Contains field @type with type URL.
},
@@ -414,50 +201,249 @@
"a_key": "", # Properties of the object.
},
"serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
+ "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
+ # options are passed through the service and are used to recreate the
+ # SDK pipeline options on the worker in a language agnostic and platform
+ # independent way.
+ "a_key": "", # Properties of the object.
+ },
+ "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
+ "workerPools": [ # The worker pools. At least one "harness" worker pool must be
+ # specified in order for the job to have workers.
+ { # Describes one particular pool of Cloud Dataflow workers to be
+ # instantiated by the Cloud Dataflow service in order to perform the
+ # computations required by a job. Note that a workflow job may use
+ # multiple pools, in order to match the various computational
+ # requirements of the various stages of the job.
+ "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
+ # service will choose a number of threads (according to the number of cores
+ # on the selected machine type for batch, or 1 by convention for streaming).
+ "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
+ # execute the job. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "zone": "A String", # Zone to run the worker pools in. If empty or unspecified, the service
+ # will attempt to choose a reasonable default.
+ "diskSourceImage": "A String", # Fully qualified source image for disks.
+ "packages": [ # Packages to be installed on workers.
+ { # The packages that must be installed in order for a worker to run the
+ # steps of the Cloud Dataflow job that will be assigned to its worker
+ # pool.
+ #
+ # This is the mechanism by which the Cloud Dataflow SDK causes code to
+ # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
+ # might use this to install jars containing the user's code and all of the
+ # various dependencies (libraries, data files, etc.) required in order
+ # for that code to run.
+ "name": "A String", # The name of the package.
+ "location": "A String", # The resource to read the package from. The supported resource type is:
+ #
+ # Google Cloud Storage:
+ #
+ # storage.googleapis.com/{bucket}
+ # bucket.storage.googleapis.com/
+ },
+ ],
+ "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
+ # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
+ # `TEARDOWN_NEVER`.
+ # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
+ # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
+ # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
+ # down.
+ #
+ # If the workers are not torn down by the service, they will
+ # continue to run and use Google Compute Engine VM resources in the
+ # user's project until they are explicitly terminated by the user.
+ # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
+ # policy except for small, manually supervised test jobs.
+ #
+ # If unknown or unspecified, the service will attempt to choose a reasonable
+ # default.
+ "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
+ # Compute Engine API.
+ "poolArgs": { # Extra arguments for this worker pool.
+ "a_key": "", # Properties of the object. Contains field @type with type URL.
+ },
+ "diskSizeGb": 42, # Size of root disk for VMs, in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
+ # harness, residing in Google Container Registry.
+ #
+ # Deprecated for the Fn API path. Use sdk_harness_container_images instead.
+ "diskType": "A String", # Type of root disk for VMs. If empty or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "machineType": "A String", # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the
+ # service will attempt to choose a reasonable default.
+ "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
+ # are supported.
+ "sdkHarnessContainerImages": [ # Set of SDK harness containers needed to execute this pipeline. This will
+ # only be set in the Fn API path. For non-cross-language pipelines this
+ # should have only one entry. Cross-language pipelines will have two or more
+ # entries.
+ { # Defines a SDK harness container for executing Dataflow pipelines.
+ "containerImage": "A String", # A docker container image that resides in Google Container Registry.
+ "useSingleCorePerContainer": True or False, # If true, recommends the Dataflow service to use only one core per SDK
+ # container instance with this image. If false (or unset) recommends using
+ # more than one core per SDK container instance with this image for
+ # efficiency. Note that Dataflow service may choose to override this property
+ # if needed.
+ },
+ ],
+ "dataDisks": [ # Data disks that are used by a VM in this workflow.
+ { # Describes the data disk used by a workflow job.
+ "diskType": "A String", # Disk storage type, as defined by Google Compute Engine. This
+ # must be a disk type appropriate to the project and zone in which
+ # the workers will run. If unknown or unspecified, the service
+ # will attempt to choose a reasonable default.
+ #
+ # For example, the standard persistent disk type is a resource name
+ # typically ending in "pd-standard". If SSD persistent disks are
+ # available, the resource name typically ends with "pd-ssd". The
+ # actual valid values are defined the Google Compute Engine API,
+ # not by the Cloud Dataflow API; consult the Google Compute Engine
+ # documentation for more information about determining the set of
+ # available disk types for a particular project and zone.
+ #
+ # Google Compute Engine Disk types are local to a particular
+ # project in a particular zone, and so the resource name will
+ # typically look something like this:
+ #
+ # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
+ "sizeGb": 42, # Size of disk in GB. If zero or unspecified, the service will
+ # attempt to choose a reasonable default.
+ "mountPoint": "A String", # Directory in a VM where disk is mounted.
+ },
+ ],
+ "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired. Expected to be of
+ # the form "regions/REGION/subnetworks/SUBNETWORK".
+ "ipConfiguration": "A String", # Configuration for VM IPs.
+ "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
+ # using the standard Dataflow task runner. Users should ignore
+ # this field.
+ "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
+ "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "wheel".
+ "harnessCommand": "A String", # The command to launch the worker harness.
+ "logDir": "A String", # The directory on the VM to store logs.
+ "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
+ # access the Cloud Dataflow API.
+ "A String",
+ ],
+ "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
+ "logUploadLocation": "A String", # Indicates where to put logs. If this is not specified, the logs
+ # will not be uploaded.
+ #
+ # The supported resource type is:
+ #
+ # Google Cloud Storage:
+ # storage.googleapis.com/{bucket}/{object}
+ # bucket.storage.googleapis.com/{object}
+ "streamingWorkerMainClass": "A String", # The streaming worker main class name.
+ "workflowFileName": "A String", # The file to store the workflow in.
+ "languageHint": "A String", # The suggested backend language.
+ "commandlinesFileName": "A String", # The file to store preprocessing commands in.
+ "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
+ "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
+ # temporary storage.
+ #
+ # The supported resource type is:
+ #
+ # Google Cloud Storage:
+ # storage.googleapis.com/{bucket}/{object}
+ # bucket.storage.googleapis.com/{object}
+ "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
+ #
+ # When workers access Google Cloud APIs, they logically do so via
+ # relative URLs. If this field is specified, it supplies the base
+ # URL to use for resolving these relative URLs. The normative
+ # algorithm used is defined by RFC 1808, "Relative Uniform Resource
+ # Locators".
+ #
+ # If not specified, the default value is "http://www.googleapis.com/"
+ "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
+ # console.
+ "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
+ "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
+ "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
+ # storage.
+ #
+ # The supported resource type is:
+ #
+ # Google Cloud Storage:
+ #
+ # storage.googleapis.com/{bucket}/{object}
+ # bucket.storage.googleapis.com/{object}
+ "reportingEnabled": True or False, # Whether to send work progress updates to the service.
+ "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
+ #
+ # When workers access Google Cloud APIs, they logically do so via
+ # relative URLs. If this field is specified, it supplies the base
+ # URL to use for resolving these relative URLs. The normative
+ # algorithm used is defined by RFC 1808, "Relative Uniform Resource
+ # Locators".
+ #
+ # If not specified, the default value is "http://www.googleapis.com/"
+ "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
+ # "dataflow/v1b3/projects".
+ "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
+ # "shuffle/v1beta1".
+ "workerId": "A String", # The ID of the worker running this pipeline.
+ },
+ "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
+ # taskrunner; e.g. "root".
+ "vmId": "A String", # The ID string of the VM.
+ },
+ "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
+ "algorithm": "A String", # The algorithm to use for autoscaling.
+ "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
+ },
+ "metadata": { # Metadata to set on the Google Compute Engine VMs.
+ "a_key": "A String",
+ },
+ "defaultPackageSet": "A String", # The default package set to install. This allows the service to
+ # select a default set of packages which are useful to worker
+ # harnesses written in a particular language.
+ "network": "A String", # Network to which VMs will be assigned. If empty or unspecified,
+ # the service will use the network "default".
+ },
+ ],
+ "dataset": "A String", # The dataset for the current project where various workflow
+ # related tables are stored.
+ #
+ # The supported resource type is:
+ #
+ # Google BigQuery:
+ # bigquery.googleapis.com/{dataset}
},
"stageStates": [ # This field may be mutated by the Cloud Dataflow service;
# callers cannot mutate it.
{ # A message describing the state of a particular execution stage.
- "executionStageName": "A String", # The name of the execution stage.
"currentStateTime": "A String", # The time at which the stage transitioned to this state.
"executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
+ "executionStageName": "A String", # The name of the execution stage.
},
],
"jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
# by the metadata values provided here. Populated for ListJobs and all GetJob
# views SUMMARY and higher.
# ListJob response and Job SUMMARY view.
- "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
- { # Metadata for a BigTable connector used by the job.
- "tableId": "A String", # TableId accessed in the connection.
- "projectId": "A String", # ProjectId accessed in the connection.
- "instanceId": "A String", # InstanceId accessed in the connection.
- },
- ],
- "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
- { # Metadata for a Spanner connector used by the job.
- "databaseId": "A String", # DatabaseId accessed in the connection.
- "instanceId": "A String", # InstanceId accessed in the connection.
- "projectId": "A String", # ProjectId accessed in the connection.
- },
- ],
"datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
{ # Metadata for a Datastore connector used by the job.
- "projectId": "A String", # ProjectId accessed in the connection.
"namespace": "A String", # Namespace used in the connection.
+ "projectId": "A String", # ProjectId accessed in the connection.
},
],
"sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
- "versionDisplayName": "A String", # A readable string describing the version of the SDK.
- "sdkSupportStatus": "A String", # The support status for this SDK version.
"version": "A String", # The version of the SDK used to run the job.
+ "sdkSupportStatus": "A String", # The support status for this SDK version.
+ "versionDisplayName": "A String", # A readable string describing the version of the SDK.
},
"bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
{ # Metadata for a BigQuery connector used by the job.
"table": "A String", # Table accessed in the connection.
"dataset": "A String", # Dataset accessed in the connection.
- "projectId": "A String", # Project accessed in the connection.
"query": "A String", # Query used to access data in the connection.
+ "projectId": "A String", # Project accessed in the connection.
},
],
"fileDetails": [ # Identification of a File source used in the Dataflow job.
@@ -467,15 +453,29 @@
],
"pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
{ # Metadata for a PubSub connector used by the job.
- "subscription": "A String", # Subscription used in the connection.
"topic": "A String", # Topic accessed in the connection.
+ "subscription": "A String", # Subscription used in the connection.
+ },
+ ],
+ "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
+ { # Metadata for a BigTable connector used by the job.
+ "projectId": "A String", # ProjectId accessed in the connection.
+ "instanceId": "A String", # InstanceId accessed in the connection.
+ "tableId": "A String", # TableId accessed in the connection.
+ },
+ ],
+ "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
+ { # Metadata for a Spanner connector used by the job.
+ "instanceId": "A String", # InstanceId accessed in the connection.
+ "projectId": "A String", # ProjectId accessed in the connection.
+ "databaseId": "A String", # DatabaseId accessed in the connection.
},
],
},
+ "type": "A String", # The type of Cloud Dataflow job.
+ "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
"createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
# snapshot.
- "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
- "type": "A String", # The type of Cloud Dataflow job.
"pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
# A description of the user pipeline and stages through which it is executed.
# Created by Cloud Dataflow service. Only retrieved with
@@ -486,6 +486,25 @@
{ # Description of the composing transforms, names/ids, and input/outputs of a
# stage of execution. Some composing transforms and sources may have been
# generated by the Dataflow service during execution planning.
+ "outputSource": [ # Output sources for this stage.
+ { # Description of an input or output of an execution stage.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
+ "userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
+ # source is most closely associated.
+ },
+ ],
+ "name": "A String", # Dataflow service generated name for this stage.
+ "inputSource": [ # Input sources for this stage.
+ { # Description of an input or output of an execution stage.
+ "sizeBytes": "A String", # Size of the source, if measurable.
+ "name": "A String", # Dataflow service generated name for this source.
+ "userName": "A String", # Human-readable name for this source; may be user or system generated.
+ "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
+ # source is most closely associated.
+ },
+ ],
"id": "A String", # Dataflow service generated id for this stage.
"componentTransform": [ # Transforms that comprise this execution stage.
{ # Description of a transform executed as part of an execution stage.
@@ -505,25 +524,6 @@
},
],
"kind": "A String", # Type of tranform this stage is executing.
- "outputSource": [ # Output sources for this stage.
- { # Description of an input or output of an execution stage.
- "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
- # source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
- "userName": "A String", # Human-readable name for this source; may be user or system generated.
- },
- ],
- "name": "A String", # Dataflow service generated name for this stage.
- "inputSource": [ # Input sources for this stage.
- { # Description of an input or output of an execution stage.
- "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
- # source is most closely associated.
- "name": "A String", # Dataflow service generated name for this source.
- "sizeBytes": "A String", # Size of the source, if measurable.
- "userName": "A String", # Human-readable name for this source; may be user or system generated.
- },
- ],
},
],
"originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
@@ -536,12 +536,8 @@
"id": "A String", # SDK generated id of this transform instance.
"displayData": [ # Transform-specific display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
- "boolValue": True or False, # Contains value if the data is of a boolean type.
- "javaClassValue": "A String", # Contains value if the data is of java class type.
- "strValue": "A String", # Contains value if the data is of string type.
- "int64Value": "A String", # Contains value if the data is of int64 type.
"durationValue": "A String", # Contains value if the data is of duration type.
+ "int64Value": "A String", # Contains value if the data is of int64 type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
# language namespace (i.e. python module) which defines the display data.
# This allows a dax monitoring system to specially handle the data
@@ -558,6 +554,10 @@
# will be displayed as a tooltip.
"url": "A String", # An optional full URL.
"label": "A String", # An optional label to display in a dax UI for the element.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "boolValue": True or False, # Contains value if the data is of a boolean type.
+ "javaClassValue": "A String", # Contains value if the data is of java class type.
+ "strValue": "A String", # Contains value if the data is of string type.
},
],
"outputCollectionName": [ # User names for all collection outputs to this transform.
@@ -567,12 +567,8 @@
],
"displayData": [ # Pipeline level display data.
{ # Data provided with a pipeline or transform to provide descriptive info.
- "timestampValue": "A String", # Contains value if the data is of timestamp type.
- "boolValue": True or False, # Contains value if the data is of a boolean type.
- "javaClassValue": "A String", # Contains value if the data is of java class type.
- "strValue": "A String", # Contains value if the data is of string type.
- "int64Value": "A String", # Contains value if the data is of int64 type.
"durationValue": "A String", # Contains value if the data is of duration type.
+ "int64Value": "A String", # Contains value if the data is of int64 type.
"namespace": "A String", # The namespace for the key. This is usually a class name or programming
# language namespace (i.e. python module) which defines the display data.
# This allows a dax monitoring system to specially handle the data
@@ -589,6 +585,10 @@
# will be displayed as a tooltip.
"url": "A String", # An optional full URL.
"label": "A String", # An optional label to display in a dax UI for the element.
+ "timestampValue": "A String", # Contains value if the data is of timestamp type.
+ "boolValue": True or False, # Contains value if the data is of a boolean type.
+ "javaClassValue": "A String", # Contains value if the data is of java class type.
+ "strValue": "A String", # Contains value if the data is of string type.
},
],
},