Clean and regen docs (#401)
diff --git a/docs/dyn/dataproc_v1beta1.projects.jobs.html b/docs/dyn/dataproc_v1beta1.projects.jobs.html
index 22a5584..15b1499 100644
--- a/docs/dyn/dataproc_v1beta1.projects.jobs.html
+++ b/docs/dyn/dataproc_v1beta1.projects.jobs.html
@@ -124,6 +124,41 @@
"substate": "A String", # Output-only Additional state information, which includes status reported by the agent.
"details": "A String", # Optional Job state details, such as an error description if the state is <code>ERROR</code>.
},
+ "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
+ "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
+ "A String",
+ ],
+ "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
+ "A String",
+ ],
+ "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
+ "A String",
+ ],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
+ "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
+ "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
+ "A String",
+ ],
+ "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
+ "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
+ "a_key": "A String",
+ },
+ },
+ "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
+ "a_key": "A String",
+ },
+ "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
+ "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
+ "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
+ },
+ "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
+ "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
+ "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
+ },
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Spark SQL command: SET name="value";).
@@ -155,15 +190,27 @@
"a_key": "A String",
},
},
+ "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
+ { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
+ "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
+ "state": "A String", # Required The application state.
+ "name": "A String", # Required The application name.
+ "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
+ },
+ ],
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Pig command: name=[value]).
"a_key": "A String",
},
- "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"jarFileUris": [ # Optional HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # Required The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob:
# "hiveJob": {
@@ -178,54 +225,11 @@
"A String",
],
},
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
+ "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"properties": { # Optional A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
- "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
- { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
- "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
- "state": "A String", # Required The application state.
- "name": "A String", # Required The application name.
- "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
- },
- ],
- "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
- "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
- "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
- },
- "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
- "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
- "A String",
- ],
- "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
- "A String",
- ],
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
- "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
- "A String",
- ],
- "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
- "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
- "A String",
- ],
- "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
- "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
- "a_key": "A String",
- },
- },
- "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
- "a_key": "A String",
- },
"driverOutputResourceUri": "A String", # Output-only A URI pointing to the location of the stdout of the job's driver program.
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # Required The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
@@ -283,10 +287,6 @@
"scheduling": { # Job scheduling options.Beta Feature: These options are available for testing purposes only. They may be changed before final release. # Optional Job scheduling configuration.
"maxFailuresPerHour": 42, # Optional Maximum number of times per hour a driver may be restarted as a result of driver terminating with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.
},
- "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
- "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
- "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
- },
"statusHistory": [ # Output-only The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # Required A state message specifying the overall job state.
@@ -371,6 +371,41 @@
"substate": "A String", # Output-only Additional state information, which includes status reported by the agent.
"details": "A String", # Optional Job state details, such as an error description if the state is <code>ERROR</code>.
},
+ "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
+ "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
+ "A String",
+ ],
+ "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
+ "A String",
+ ],
+ "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
+ "A String",
+ ],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
+ "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
+ "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
+ "A String",
+ ],
+ "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
+ "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
+ "a_key": "A String",
+ },
+ },
+ "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
+ "a_key": "A String",
+ },
+ "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
+ "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
+ "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
+ },
+ "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
+ "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
+ "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
+ },
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Spark SQL command: SET name="value";).
@@ -402,15 +437,27 @@
"a_key": "A String",
},
},
+ "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
+ { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
+ "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
+ "state": "A String", # Required The application state.
+ "name": "A String", # Required The application name.
+ "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
+ },
+ ],
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Pig command: name=[value]).
"a_key": "A String",
},
- "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"jarFileUris": [ # Optional HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # Required The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob:
# "hiveJob": {
@@ -425,54 +472,11 @@
"A String",
],
},
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
+ "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"properties": { # Optional A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
- "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
- { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
- "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
- "state": "A String", # Required The application state.
- "name": "A String", # Required The application name.
- "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
- },
- ],
- "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
- "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
- "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
- },
- "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
- "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
- "A String",
- ],
- "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
- "A String",
- ],
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
- "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
- "A String",
- ],
- "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
- "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
- "A String",
- ],
- "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
- "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
- "a_key": "A String",
- },
- },
- "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
- "a_key": "A String",
- },
"driverOutputResourceUri": "A String", # Output-only A URI pointing to the location of the stdout of the job's driver program.
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # Required The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
@@ -530,10 +534,6 @@
"scheduling": { # Job scheduling options.Beta Feature: These options are available for testing purposes only. They may be changed before final release. # Optional Job scheduling configuration.
"maxFailuresPerHour": 42, # Optional Maximum number of times per hour a driver may be restarted as a result of driver terminating with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.
},
- "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
- "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
- "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
- },
"statusHistory": [ # Output-only The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # Required A state message specifying the overall job state.
@@ -602,6 +602,41 @@
"substate": "A String", # Output-only Additional state information, which includes status reported by the agent.
"details": "A String", # Optional Job state details, such as an error description if the state is <code>ERROR</code>.
},
+ "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
+ "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
+ "A String",
+ ],
+ "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
+ "A String",
+ ],
+ "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
+ "A String",
+ ],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
+ "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
+ "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
+ "A String",
+ ],
+ "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
+ "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
+ "a_key": "A String",
+ },
+ },
+ "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
+ "a_key": "A String",
+ },
+ "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
+ "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
+ "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
+ },
+ "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
+ "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
+ "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
+ },
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Spark SQL command: SET name="value";).
@@ -633,15 +668,27 @@
"a_key": "A String",
},
},
+ "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
+ { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
+ "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
+ "state": "A String", # Required The application state.
+ "name": "A String", # Required The application name.
+ "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
+ },
+ ],
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Pig command: name=[value]).
"a_key": "A String",
},
- "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"jarFileUris": [ # Optional HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # Required The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob:
# "hiveJob": {
@@ -656,54 +703,11 @@
"A String",
],
},
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
+ "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"properties": { # Optional A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
- "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
- { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
- "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
- "state": "A String", # Required The application state.
- "name": "A String", # Required The application name.
- "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
- },
- ],
- "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
- "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
- "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
- },
- "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
- "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
- "A String",
- ],
- "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
- "A String",
- ],
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
- "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
- "A String",
- ],
- "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
- "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
- "A String",
- ],
- "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
- "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
- "a_key": "A String",
- },
- },
- "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
- "a_key": "A String",
- },
"driverOutputResourceUri": "A String", # Output-only A URI pointing to the location of the stdout of the job's driver program.
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # Required The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
@@ -761,10 +765,6 @@
"scheduling": { # Job scheduling options.Beta Feature: These options are available for testing purposes only. They may be changed before final release. # Optional Job scheduling configuration.
"maxFailuresPerHour": 42, # Optional Maximum number of times per hour a driver may be restarted as a result of driver terminating with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.
},
- "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
- "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
- "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
- },
"statusHistory": [ # Output-only The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # Required A state message specifying the overall job state.
@@ -837,6 +837,41 @@
"substate": "A String", # Output-only Additional state information, which includes status reported by the agent.
"details": "A String", # Optional Job state details, such as an error description if the state is <code>ERROR</code>.
},
+ "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
+ "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
+ "A String",
+ ],
+ "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
+ "A String",
+ ],
+ "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
+ "A String",
+ ],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
+ "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
+ "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
+ "A String",
+ ],
+ "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
+ "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
+ "a_key": "A String",
+ },
+ },
+ "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
+ "a_key": "A String",
+ },
+ "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
+ "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
+ "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
+ },
+ "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
+ "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
+ "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
+ },
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Spark SQL command: SET name="value";).
@@ -868,15 +903,27 @@
"a_key": "A String",
},
},
+ "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
+ { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
+ "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
+ "state": "A String", # Required The application state.
+ "name": "A String", # Required The application name.
+ "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
+ },
+ ],
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Pig command: name=[value]).
"a_key": "A String",
},
- "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"jarFileUris": [ # Optional HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # Required The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob:
# "hiveJob": {
@@ -891,54 +938,11 @@
"A String",
],
},
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
+ "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"properties": { # Optional A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
- "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
- { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
- "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
- "state": "A String", # Required The application state.
- "name": "A String", # Required The application name.
- "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
- },
- ],
- "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
- "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
- "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
- },
- "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
- "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
- "A String",
- ],
- "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
- "A String",
- ],
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
- "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
- "A String",
- ],
- "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
- "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
- "A String",
- ],
- "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
- "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
- "a_key": "A String",
- },
- },
- "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
- "a_key": "A String",
- },
"driverOutputResourceUri": "A String", # Output-only A URI pointing to the location of the stdout of the job's driver program.
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # Required The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
@@ -996,10 +1000,6 @@
"scheduling": { # Job scheduling options.Beta Feature: These options are available for testing purposes only. They may be changed before final release. # Optional Job scheduling configuration.
"maxFailuresPerHour": 42, # Optional Maximum number of times per hour a driver may be restarted as a result of driver terminating with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.
},
- "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
- "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
- "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
- },
"statusHistory": [ # Output-only The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # Required A state message specifying the overall job state.
@@ -1054,6 +1054,41 @@
"substate": "A String", # Output-only Additional state information, which includes status reported by the agent.
"details": "A String", # Optional Job state details, such as an error description if the state is <code>ERROR</code>.
},
+ "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
+ "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
+ "A String",
+ ],
+ "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
+ "A String",
+ ],
+ "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
+ "A String",
+ ],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
+ "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
+ "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
+ "A String",
+ ],
+ "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
+ "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
+ "a_key": "A String",
+ },
+ },
+ "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
+ "a_key": "A String",
+ },
+ "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
+ "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
+ "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
+ },
+ "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
+ "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
+ "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
+ },
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Spark SQL command: SET name="value";).
@@ -1085,15 +1120,27 @@
"a_key": "A String",
},
},
+ "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
+ { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
+ "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
+ "state": "A String", # Required The application state.
+ "name": "A String", # Required The application name.
+ "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
+ },
+ ],
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Pig command: name=[value]).
"a_key": "A String",
},
- "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"jarFileUris": [ # Optional HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # Required The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob:
# "hiveJob": {
@@ -1108,54 +1155,11 @@
"A String",
],
},
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
+ "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"properties": { # Optional A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
- "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
- { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
- "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
- "state": "A String", # Required The application state.
- "name": "A String", # Required The application name.
- "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
- },
- ],
- "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
- "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
- "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
- },
- "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
- "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
- "A String",
- ],
- "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
- "A String",
- ],
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
- "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
- "A String",
- ],
- "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
- "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
- "A String",
- ],
- "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
- "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
- "a_key": "A String",
- },
- },
- "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
- "a_key": "A String",
- },
"driverOutputResourceUri": "A String", # Output-only A URI pointing to the location of the stdout of the job's driver program.
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # Required The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
@@ -1213,10 +1217,6 @@
"scheduling": { # Job scheduling options.Beta Feature: These options are available for testing purposes only. They may be changed before final release. # Optional Job scheduling configuration.
"maxFailuresPerHour": 42, # Optional Maximum number of times per hour a driver may be restarted as a result of driver terminating with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.
},
- "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
- "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
- "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
- },
"statusHistory": [ # Output-only The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # Required A state message specifying the overall job state.
@@ -1273,6 +1273,41 @@
"substate": "A String", # Output-only Additional state information, which includes status reported by the agent.
"details": "A String", # Optional Job state details, such as an error description if the state is <code>ERROR</code>.
},
+ "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
+ "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
+ "A String",
+ ],
+ "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
+ "A String",
+ ],
+ "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
+ "A String",
+ ],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
+ "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
+ "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
+ "A String",
+ ],
+ "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
+ "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
+ "a_key": "A String",
+ },
+ },
+ "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
+ "a_key": "A String",
+ },
+ "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
+ "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
+ "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
+ },
+ "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
+ "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
+ "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
+ },
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Spark SQL command: SET name="value";).
@@ -1304,15 +1339,27 @@
"a_key": "A String",
},
},
+ "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
+ { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
+ "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
+ "state": "A String", # Required The application state.
+ "name": "A String", # Required The application name.
+ "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
+ },
+ ],
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Pig command: name=[value]).
"a_key": "A String",
},
- "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"jarFileUris": [ # Optional HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # Required The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob:
# "hiveJob": {
@@ -1327,54 +1374,11 @@
"A String",
],
},
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
+ "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"properties": { # Optional A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
- "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
- { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
- "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
- "state": "A String", # Required The application state.
- "name": "A String", # Required The application name.
- "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
- },
- ],
- "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
- "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
- "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
- },
- "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
- "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
- "A String",
- ],
- "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
- "A String",
- ],
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
- "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
- "A String",
- ],
- "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
- "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
- "A String",
- ],
- "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
- "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
- "a_key": "A String",
- },
- },
- "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
- "a_key": "A String",
- },
"driverOutputResourceUri": "A String", # Output-only A URI pointing to the location of the stdout of the job's driver program.
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # Required The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
@@ -1432,10 +1436,6 @@
"scheduling": { # Job scheduling options.Beta Feature: These options are available for testing purposes only. They may be changed before final release. # Optional Job scheduling configuration.
"maxFailuresPerHour": 42, # Optional Maximum number of times per hour a driver may be restarted as a result of driver terminating with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.
},
- "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
- "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
- "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
- },
"statusHistory": [ # Output-only The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # Required A state message specifying the overall job state.
@@ -1490,6 +1490,41 @@
"substate": "A String", # Output-only Additional state information, which includes status reported by the agent.
"details": "A String", # Optional Job state details, such as an error description if the state is <code>ERROR</code>.
},
+ "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
+ "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
+ "A String",
+ ],
+ "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
+ "A String",
+ ],
+ "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
+ "A String",
+ ],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
+ "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
+ "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
+ "A String",
+ ],
+ "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
+ "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
+ "a_key": "A String",
+ },
+ },
+ "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
+ "a_key": "A String",
+ },
+ "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
+ "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
+ "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
+ },
+ "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
+ "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
+ "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
+ },
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Spark SQL command: SET name="value";).
@@ -1521,15 +1556,27 @@
"a_key": "A String",
},
},
+ "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
+ { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
+ "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
+ "state": "A String", # Required The application state.
+ "name": "A String", # Required The application name.
+ "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
+ },
+ ],
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # Optional Mapping of query variable names to values (equivalent to the Pig command: name=[value]).
"a_key": "A String",
},
- "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"jarFileUris": [ # Optional HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
+ "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
+ "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
+ "a_key": "A String",
+ },
+ },
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # Required The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob:
# "hiveJob": {
@@ -1544,54 +1591,11 @@
"A String",
],
},
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
+ "continueOnFailure": True or False, # Optional Whether to continue executing queries if a query fails. The default value is false. Setting to true can be useful when executing independent parallel queries.
"properties": { # Optional A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
- "yarnApplications": [ # Output-only The collection of YARN applications spun up by this job.
- { # A YARN application created by a job. Application information is a subset of <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
- "progress": 3.14, # Required The numerical progress of the application, from 1 to 100.
- "state": "A String", # Required The application state.
- "name": "A String", # Required The application name.
- "trackingUrl": "A String", # Optional The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
- },
- ],
- "reference": { # Encapsulates the full scoping used to reference a job. # Optional The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a <code>job_id</code>.
- "projectId": "A String", # Required The ID of the Google Cloud Platform project that the job belongs to.
- "jobId": "A String", # Required The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 100 characters.
- },
- "hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
- "jarFileUris": [ # Optional Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
- "A String",
- ],
- "args": [ # Optional The arguments to pass to the driver. Do not include arguments, such as -libjars or -Dfoo=bar, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
- "A String",
- ],
- "loggingConfiguration": { # The runtime logging configuration of the job. # Optional The runtime log configuration for job execution.
- "driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
- "a_key": "A String",
- },
- },
- "fileUris": [ # Optional HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
- "A String",
- ],
- "mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in jar_file_uris.
- "archiveUris": [ # Optional HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
- "A String",
- ],
- "mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
- "properties": { # Optional A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
- "a_key": "A String",
- },
- },
- "labels": { # Optional The labels to associate with this job.Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62}Label values must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}\p{N}_-{0,63}No more than 64 labels can be associated with a given job.
- "a_key": "A String",
- },
"driverOutputResourceUri": "A String", # Output-only A URI pointing to the location of the stdout of the job's driver program.
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # Required The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
@@ -1649,10 +1653,6 @@
"scheduling": { # Job scheduling options.Beta Feature: These options are available for testing purposes only. They may be changed before final release. # Optional Job scheduling configuration.
"maxFailuresPerHour": 42, # Optional Maximum number of times per hour a driver may be restarted as a result of driver terminating with non-zero code before job is reported failed.A job may be reported as thrashing if driver exits with non-zero code 4 times within 10 minute window.Maximum value is 10.
},
- "placement": { # Cloud Dataproc job configuration. # Required Job information, including how, when, and where to run the job.
- "clusterName": "A String", # Required The name of the cluster where the job will be submitted.
- "clusterUuid": "A String", # Output-only A cluster UUID generated by the Dataproc service when the job is submitted.
- },
"statusHistory": [ # Output-only The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # Required A state message specifying the overall job state.