docs: docs update (#911)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/google-api-python-client/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)
Fixes #<issue_number_goes_here> 🦕
diff --git a/docs/dyn/bigquery_v2.models.html b/docs/dyn/bigquery_v2.models.html
index 2f813fe..91fc1e6 100644
--- a/docs/dyn/bigquery_v2.models.html
+++ b/docs/dyn/bigquery_v2.models.html
@@ -114,375 +114,375 @@
An object of the form:
{
- "labels": { # The labels associated with this model. You can use these to organize
- # and group your models. Label keys and values can be no longer
- # than 63 characters, can only contain lowercase letters, numeric
- # characters, underscores and dashes. International characters are allowed.
- # Label values are optional. Label keys must start with a letter and each
- # label in the list must have a different key.
- "a_key": "A String",
+ "location": "A String", # Output only. The geographic location where the model resides. This value
+ # is inherited from the dataset.
+ "friendlyName": "A String", # Optional. A descriptive name for this model.
+ "lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.
+ "labels": { # The labels associated with this model. You can use these to organize
+ # and group your models. Label keys and values can be no longer
+ # than 63 characters, can only contain lowercase letters, numeric
+ # characters, underscores and dashes. International characters are allowed.
+ # Label values are optional. Label keys must start with a letter and each
+ # label in the list must have a different key.
+ "a_key": "A String",
+ },
+ "labelColumns": [ # Output only. Label columns that were used to train this model.
+ # The output of the model will have a "predicted_" prefix to these columns.
+ { # A field or a column.
+ "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
+ "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
+ # specified (e.g., CREATE FUNCTION statement can omit the return type;
+ # in this case the output parameter does not have this "type" field).
+ # Examples:
+ # INT64: {type_kind="INT64"}
+ # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
+ # STRUCT<x STRING, y ARRAY<DATE>>:
+ # {type_kind="STRUCT",
+ # struct_type={fields=[
+ # {name="x", type={type_kind="STRING"}},
+ # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
+ # ]}}
+ "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
+ "fields": [
+ # Object with schema name: StandardSqlField
+ ],
+ },
+ "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
+ "typeKind": "A String", # Required. The top level type of this field.
+ # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
+ },
},
- "description": "A String", # Optional. A user-friendly description of this model.
- "trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.
- { # Information about a single training query run for the model.
- "evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
- # end of training.
- # data or just the eval data based on whether eval data was used during
- # training. These are not present for imported models.
- "clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.
- "meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
- "daviesBouldinIndex": 3.14, # Davies-Bouldin index.
- "clusters": [ # [Beta] Information for all clusters.
- { # Message containing the information about one cluster.
- "count": "A String", # Count of training data rows that were assigned to this cluster.
- "featureValues": [ # Values of highly variant features for this cluster.
- { # Representative value of a single feature within the cluster.
- "featureColumn": "A String", # The feature column name.
- "numericalValue": 3.14, # The numerical feature value. This is the centroid value for this
- # feature.
- "categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.
- "categoryCounts": [ # Counts of all categories for the categorical feature. If there are
- # more than ten categories, we return top ten (by count) and return
- # one more CategoryCount with category "_OTHER_" and count as
- # aggregate counts of remaining categories.
- { # Represents the count of a single category within the cluster.
- "category": "A String", # The name of category.
- "count": "A String", # The count of training samples matching the category within the
- # cluster.
- },
- ],
- },
- },
+ ],
+ "modelType": "A String", # Output only. Type of the model resource.
+ "featureColumns": [ # Output only. Input feature columns that were used to train this model.
+ { # A field or a column.
+ "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
+ "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
+ # specified (e.g., CREATE FUNCTION statement can omit the return type;
+ # in this case the output parameter does not have this "type" field).
+ # Examples:
+ # INT64: {type_kind="INT64"}
+ # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
+ # STRUCT<x STRING, y ARRAY<DATE>>:
+ # {type_kind="STRUCT",
+ # struct_type={fields=[
+ # {name="x", type={type_kind="STRING"}},
+ # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
+ # ]}}
+ "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
+ "fields": [
+ # Object with schema name: StandardSqlField
+ ],
+ },
+ "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
+ "typeKind": "A String", # Required. The top level type of this field.
+ # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
+ },
+ },
+ ],
+ "expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.
+ # If not present, the model will persist indefinitely. Expired models
+ # will be deleted and their storage reclaimed. The defaultTableExpirationMs
+ # property of the encapsulating dataset can be used to set a default
+ # expirationTime on newly created models.
+ "trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.
+ { # Information about a single training query run for the model.
+ "startTime": "A String", # The start time of this training run.
+ "results": [ # Output of each iteration run, results.size() <= max_iterations.
+ { # Information about a single iteration of the training run.
+ "trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
+ "evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
+ "index": 42, # Index of the iteration, 0 based.
+ "learnRate": 3.14, # Learn rate used for this iteration.
+ "durationMs": "A String", # Time taken to run the iteration in milliseconds.
+ "arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
+ # refactoring if we want to use model-specific iteration results.
+ "arimaModelInfo": [ # This message is repeated because there are multiple arima models
+ # fitted in auto-arima. For non-auto-arima model, its size is one.
+ { # Arima model information.
+ "arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.
+ "aic": 3.14, # AIC.
+ "logLikelihood": 3.14, # Log-likelihood.
+ "variance": 3.14, # Variance.
+ },
+ "timeSeriesId": "A String", # The id to indicate different time series.
+ "arimaCoefficients": { # Arima coefficients. # Arima coefficients.
+ "movingAverageCoefficients": [ # Moving-average coefficients, an array of double.
+ 3.14,
+ ],
+ "autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.
+ 3.14,
+ ],
+ "interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.
+ },
+ "hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false
+ # when d is not 1.
+ "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported
+ # for one time series.
+ "A String",
],
- "centroidId": "A String", # Centroid id.
+ "nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.
+ "q": "A String", # Order of the moving-average part.
+ "d": "A String", # Order of the differencing part.
+ "p": "A String", # Order of the autoregressive part.
+ },
},
],
- },
- "regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix
- # factorization models.
- # factorization models.
- "meanSquaredLogError": 3.14, # Mean squared log error.
- "meanAbsoluteError": 3.14, # Mean absolute error.
- "meanSquaredError": 3.14, # Mean squared error.
- "medianAbsoluteError": 3.14, # Median absolute error.
- "rSquared": 3.14, # R^2 score.
- },
- "rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization
- # models.
- # feedback_type=implicit.
- "meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit
- # recommendation models except instead of computing the rating directly,
- # the output from evaluate is computed against a preference which is 1 or 0
- # depending on if the rating exists or not.
- "meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and
- # then averages all the precisions across all the users.
- "averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank
- # from the predicted confidence and dividing it by the original rank.
- "normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the
- # predicted confidence by comparing it to an ideal rank measured by the
- # original ratings.
- },
- "binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
- "negativeLabel": "A String", # Label representing the negative class.
- "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
- # models, the metrics are either macro-averaged or micro-averaged. When
- # macro-averaged, the metrics are calculated for each label and then an
- # unweighted average is taken of those values. When micro-averaged, the
- # metric is calculated globally by counting the total number of correctly
- # predicted rows.
- "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
- # positive prediction. For multiclass this is a macro-averaged metric.
- "precision": 3.14, # Precision is the fraction of actual positive predictions that had
- # positive actual labels. For multiclass this is a macro-averaged
- # metric treating each class as a binary classifier.
- "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
- "threshold": 3.14, # Threshold at which the metrics are computed. For binary
- # classification models this is the positive class threshold.
- # For multi-class classfication models this is the confidence
- # threshold.
- "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
- # multiclass this is a micro-averaged metric.
- "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
- # this is a macro-averaged metric.
- "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
- # metric.
- },
- "positiveLabel": "A String", # Label representing the positive class.
- "binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
- { # Confusion matrix for binary classification models.
- "truePositives": "A String", # Number of true samples predicted as true.
- "recall": 3.14, # The fraction of actual positive labels that were given a positive
- # prediction.
- "precision": 3.14, # The fraction of actual positive predictions that had positive actual
- # labels.
- "falseNegatives": "A String", # Number of false samples predicted as false.
- "trueNegatives": "A String", # Number of true samples predicted as false.
- "falsePositives": "A String", # Number of false samples predicted as true.
- "f1Score": 3.14, # The equally weighted average of recall and precision.
- "positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
- "accuracy": 3.14, # The fraction of predictions given the correct label.
- },
+ "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for
+ # one time series.
+ "A String",
],
},
- "multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
- "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
- # models, the metrics are either macro-averaged or micro-averaged. When
- # macro-averaged, the metrics are calculated for each label and then an
- # unweighted average is taken of those values. When micro-averaged, the
- # metric is calculated globally by counting the total number of correctly
- # predicted rows.
- "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
- # positive prediction. For multiclass this is a macro-averaged metric.
- "precision": 3.14, # Precision is the fraction of actual positive predictions that had
- # positive actual labels. For multiclass this is a macro-averaged
- # metric treating each class as a binary classifier.
- "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
- "threshold": 3.14, # Threshold at which the metrics are computed. For binary
- # classification models this is the positive class threshold.
- # For multi-class classfication models this is the confidence
- # threshold.
- "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
- # multiclass this is a micro-averaged metric.
- "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
- # this is a macro-averaged metric.
- "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
- # metric.
+ "clusterInfos": [ # Information about top clusters for clustering models.
+ { # Information about a single cluster for clustering model.
+ "clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
+ "centroidId": "A String", # Centroid id.
+ "clusterRadius": 3.14, # Cluster radius, the average distance from centroid
+ # to each point assigned to the cluster.
},
- "confusionMatrixList": [ # Confusion matrix at different thresholds.
- { # Confusion matrix for multi-class classification models.
- "confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
- # confusion matrix.
- "rows": [ # One row per actual label.
- { # A single row in the confusion matrix.
- "actualLabel": "A String", # The original label of this row.
- "entries": [ # Info describing predicted label distribution.
- { # A single entry in the confusion matrix.
- "predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
- # also add an entry indicating the number of items under the
- # confidence threshold.
- "itemCount": "A String", # Number of items being predicted as this label.
+ ],
+ },
+ ],
+ "evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
+ # end of training.
+ # data or just the eval data based on whether eval data was used during
+ # training. These are not present for imported models.
+ "binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
+ "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
+ # models, the metrics are either macro-averaged or micro-averaged. When
+ # macro-averaged, the metrics are calculated for each label and then an
+ # unweighted average is taken of those values. When micro-averaged, the
+ # metric is calculated globally by counting the total number of correctly
+ # predicted rows.
+ "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
+ # positive prediction. For multiclass this is a macro-averaged metric.
+ "threshold": 3.14, # Threshold at which the metrics are computed. For binary
+ # classification models this is the positive class threshold.
+ # For multi-class classfication models this is the confidence
+ # threshold.
+ "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
+ # metric.
+ "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
+ "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
+ # this is a macro-averaged metric.
+ "precision": 3.14, # Precision is the fraction of actual positive predictions that had
+ # positive actual labels. For multiclass this is a macro-averaged
+ # metric treating each class as a binary classifier.
+ "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
+ # multiclass this is a micro-averaged metric.
+ },
+ "negativeLabel": "A String", # Label representing the negative class.
+ "positiveLabel": "A String", # Label representing the positive class.
+ "binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
+ { # Confusion matrix for binary classification models.
+ "falseNegatives": "A String", # Number of false samples predicted as false.
+ "falsePositives": "A String", # Number of false samples predicted as true.
+ "trueNegatives": "A String", # Number of true samples predicted as false.
+ "f1Score": 3.14, # The equally weighted average of recall and precision.
+ "precision": 3.14, # The fraction of actual positive predictions that had positive actual
+ # labels.
+ "positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
+ "accuracy": 3.14, # The fraction of predictions given the correct label.
+ "truePositives": "A String", # Number of true samples predicted as true.
+ "recall": 3.14, # The fraction of actual positive labels that were given a positive
+ # prediction.
+ },
+ ],
+ },
+ "regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix
+ # factorization models.
+ # factorization models.
+ "meanSquaredError": 3.14, # Mean squared error.
+ "rSquared": 3.14, # R^2 score.
+ "medianAbsoluteError": 3.14, # Median absolute error.
+ "meanSquaredLogError": 3.14, # Mean squared log error.
+ "meanAbsoluteError": 3.14, # Mean absolute error.
+ },
+ "rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization
+ # models.
+ # feedback_type=implicit.
+ "meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and
+ # then averages all the precisions across all the users.
+ "normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the
+ # predicted confidence by comparing it to an ideal rank measured by the
+ # original ratings.
+ "averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank
+ # from the predicted confidence and dividing it by the original rank.
+ "meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit
+ # recommendation models except instead of computing the rating directly,
+ # the output from evaluate is computed against a preference which is 1 or 0
+ # depending on if the rating exists or not.
+ },
+ "multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
+ "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
+ # models, the metrics are either macro-averaged or micro-averaged. When
+ # macro-averaged, the metrics are calculated for each label and then an
+ # unweighted average is taken of those values. When micro-averaged, the
+ # metric is calculated globally by counting the total number of correctly
+ # predicted rows.
+ "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
+ # positive prediction. For multiclass this is a macro-averaged metric.
+ "threshold": 3.14, # Threshold at which the metrics are computed. For binary
+ # classification models this is the positive class threshold.
+ # For multi-class classfication models this is the confidence
+ # threshold.
+ "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
+ # metric.
+ "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
+ "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
+ # this is a macro-averaged metric.
+ "precision": 3.14, # Precision is the fraction of actual positive predictions that had
+ # positive actual labels. For multiclass this is a macro-averaged
+ # metric treating each class as a binary classifier.
+ "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
+ # multiclass this is a micro-averaged metric.
+ },
+ "confusionMatrixList": [ # Confusion matrix at different thresholds.
+ { # Confusion matrix for multi-class classification models.
+ "confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
+ # confusion matrix.
+ "rows": [ # One row per actual label.
+ { # A single row in the confusion matrix.
+ "entries": [ # Info describing predicted label distribution.
+ { # A single entry in the confusion matrix.
+ "predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
+ # also add an entry indicating the number of items under the
+ # confidence threshold.
+ "itemCount": "A String", # Number of items being predicted as this label.
+ },
+ ],
+ "actualLabel": "A String", # The original label of this row.
+ },
+ ],
+ },
+ ],
+ },
+ "clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.
+ "meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
+ "daviesBouldinIndex": 3.14, # Davies-Bouldin index.
+ "clusters": [ # [Beta] Information for all clusters.
+ { # Message containing the information about one cluster.
+ "count": "A String", # Count of training data rows that were assigned to this cluster.
+ "featureValues": [ # Values of highly variant features for this cluster.
+ { # Representative value of a single feature within the cluster.
+ "numericalValue": 3.14, # The numerical feature value. This is the centroid value for this
+ # feature.
+ "featureColumn": "A String", # The feature column name.
+ "categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.
+ "categoryCounts": [ # Counts of all categories for the categorical feature. If there are
+ # more than ten categories, we return top ten (by count) and return
+ # one more CategoryCount with category "_OTHER_" and count as
+ # aggregate counts of remaining categories.
+ { # Represents the count of a single category within the cluster.
+ "category": "A String", # The name of category.
+ "count": "A String", # The count of training samples matching the category within the
+ # cluster.
},
],
},
- ],
- },
- ],
- },
- },
- "dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is
- # actually split.
- # data tables that were used to train the model.
- "trainingTable": { # Table reference of the training data after split.
- "projectId": "A String", # [Required] The ID of the project containing this table.
- "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- "datasetId": "A String", # [Required] The ID of the dataset containing this table.
- },
- "evaluationTable": { # Table reference of the evaluation data after split.
- "projectId": "A String", # [Required] The ID of the project containing this table.
- "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- "datasetId": "A String", # [Required] The ID of the dataset containing this table.
- },
- },
- "results": [ # Output of each iteration run, results.size() <= max_iterations.
- { # Information about a single iteration of the training run.
- "arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
- # refactoring if we want to use model-specific iteration results.
- "arimaModelInfo": [ # This message is repeated because there are multiple arima models
- # fitted in auto-arima. For non-auto-arima model, its size is one.
- { # Arima model information.
- "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported
- # for one time series.
- "A String",
- ],
- "hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false
- # when d is not 1.
- "arimaCoefficients": { # Arima coefficients. # Arima coefficients.
- "movingAverageCoefficients": [ # Moving-average coefficients, an array of double.
- 3.14,
- ],
- "autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.
- 3.14,
- ],
- "interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.
- },
- "nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.
- "q": "A String", # Order of the moving-average part.
- "p": "A String", # Order of the autoregressive part.
- "d": "A String", # Order of the differencing part.
- },
- "arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.
- "variance": 3.14, # Variance.
- "logLikelihood": 3.14, # Log-likelihood.
- "aic": 3.14, # AIC.
- },
- "timeSeriesId": "A String", # The id to indicate different time series.
},
],
- "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for
- # one time series.
- "A String",
- ],
+ "centroidId": "A String", # Centroid id.
},
- "index": 42, # Index of the iteration, 0 based.
- "evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
- "durationMs": "A String", # Time taken to run the iteration in milliseconds.
- "learnRate": 3.14, # Learn rate used for this iteration.
- "trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
- "clusterInfos": [ # Information about top clusters for clustering models.
- { # Information about a single cluster for clustering model.
- "centroidId": "A String", # Centroid id.
- "clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
- "clusterRadius": 3.14, # Cluster radius, the average distance from centroid
- # to each point assigned to the cluster.
- },
- ],
- },
+ ],
+ },
+ },
+ "trainingOptions": { # Options that were used for this training run, includes
+ # user specified and default options that were used.
+ "dropout": 3.14, # Dropout probability for dnn models.
+ "learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
+ "labelClassWeights": { # Weights associated with each label class, for rebalancing the
+ # training data. Only applicable for classification models.
+ "a_key": 3.14,
+ },
+ "subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent
+ # overfitting for boosted tree models.
+ "earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
+ # any more (compared to min_relative_progress). Used only for iterative
+ # training algorithms.
+ "dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
+ # of data will be used as training data. The format should be double.
+ # Accurate to two decimal places.
+ # Default value is 0.2.
+ "initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
+ # strategy.
+ "itemColumn": "A String", # Item column specified for matrix factorization models.
+ "inputLabelColumns": [ # Name of input label columns in training data.
+ "A String",
],
- "startTime": "A String", # The start time of this training run.
- "trainingOptions": { # Options that were used for this training run, includes
- # user specified and default options that were used.
- "optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
- "itemColumn": "A String", # Item column specified for matrix factorization models.
- "feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix
- # factorization.
- "numFactors": "A String", # Num factors specified for matrix factorization models.
- "inputLabelColumns": [ # Name of input label columns in training data.
- "A String",
- ],
- "batchSize": "A String", # Batch size for dnn models.
- "distanceType": "A String", # Distance type for clustering models.
- "kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm
- # when kmeans_initialization_method is CUSTOM.
- "l2Regularization": 3.14, # L2 regularization coefficient.
- "dropout": 3.14, # Dropout probability for dnn models.
- "minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
- # less than 'min_relative_progress'. Used only for iterative training
- # algorithms.
- "l1Regularization": 3.14, # L1 regularization coefficient.
- "maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
- # training algorithms.
- "earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
- # any more (compared to min_relative_progress). Used only for iterative
- # training algorithms.
- "initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
- # strategy.
- "dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
- # feature.
- # 1. When data_split_method is CUSTOM, the corresponding column should
- # be boolean. The rows with true value tag are eval data, and the false
- # are training data.
- # 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
- # rows (from smallest to largest) in the corresponding column are used
- # as training data, and the rest are eval data. It respects the order
- # in Orderable data types:
- # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
- "numClusters": "A String", # Number of clusters for clustering models.
- "warmStart": True or False, # Whether to train a model from the last checkpoint.
- "hiddenUnits": [ # Hidden units for dnn models.
- "A String",
- ],
- "maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.
- "userColumn": "A String", # User column specified for matrix factorization models.
- "kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.
- "learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
- "dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
- # of data will be used as training data. The format should be double.
- # Accurate to two decimal places.
- # Default value is 0.2.
- "dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
- "subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent
- # overfitting for boosted tree models.
- "labelClassWeights": { # Weights associated with each label class, for rebalancing the
- # training data. Only applicable for classification models.
- "a_key": 3.14,
- },
- "learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
- "modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
- # applicable for imported models.
- "walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is
- # specified.
- "minSplitLoss": 3.14, # Minimum split loss for boosted tree models.
- "lossType": "A String", # Type of loss function used during training run.
+ "warmStart": True or False, # Whether to train a model from the last checkpoint.
+ "learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
+ "numFactors": "A String", # Num factors specified for matrix factorization models.
+ "lossType": "A String", # Type of loss function used during training run.
+ "hiddenUnits": [ # Hidden units for dnn models.
+ "A String",
+ ],
+ "kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.
+ "l1Regularization": 3.14, # L1 regularization coefficient.
+ "distanceType": "A String", # Distance type for clustering models.
+ "walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is
+ # specified.
+ "feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix
+ # factorization.
+ "optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
+ "dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
+ # feature.
+ # 1. When data_split_method is CUSTOM, the corresponding column should
+ # be boolean. The rows with true value tag are eval data, and the false
+ # are training data.
+ # 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
+ # rows (from smallest to largest) in the corresponding column are used
+ # as training data, and the rest are eval data. It respects the order
+ # in Orderable data types:
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
+ "maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
+ # training algorithms.
+ "userColumn": "A String", # User column specified for matrix factorization models.
+ "maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.
+ "l2Regularization": 3.14, # L2 regularization coefficient.
+ "modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
+ # applicable for imported models.
+ "batchSize": "A String", # Batch size for dnn models.
+ "minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
+ # less than 'min_relative_progress'. Used only for iterative training
+ # algorithms.
+ "kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm
+ # when kmeans_initialization_method is CUSTOM.
+ "numClusters": "A String", # Number of clusters for clustering models.
+ "dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
+ "minSplitLoss": 3.14, # Minimum split loss for boosted tree models.
+ },
+ "dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is
+ # actually split.
+ # data tables that were used to train the model.
+ "trainingTable": { # Table reference of the training data after split.
+ "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this table.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this table.
+ },
+ "evaluationTable": { # Table reference of the evaluation data after split.
+ "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this table.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this table.
},
},
- ],
- "featureColumns": [ # Output only. Input feature columns that were used to train this model.
- { # A field or a column.
- "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
- # specified (e.g., CREATE FUNCTION statement can omit the return type;
- # in this case the output parameter does not have this "type" field).
- # Examples:
- # INT64: {type_kind="INT64"}
- # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
- # STRUCT<x STRING, y ARRAY<DATE>>:
- # {type_kind="STRUCT",
- # struct_type={fields=[
- # {name="x", type={type_kind="STRING"}},
- # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
- # ]}}
- "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
- "fields": [
- # Object with schema name: StandardSqlField
- ],
- },
- "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
- "typeKind": "A String", # Required. The top level type of this field.
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
- },
- "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
- },
- ],
- "labelColumns": [ # Output only. Label columns that were used to train this model.
- # The output of the model will have a "predicted_" prefix to these columns.
- { # A field or a column.
- "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
- # specified (e.g., CREATE FUNCTION statement can omit the return type;
- # in this case the output parameter does not have this "type" field).
- # Examples:
- # INT64: {type_kind="INT64"}
- # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
- # STRUCT<x STRING, y ARRAY<DATE>>:
- # {type_kind="STRUCT",
- # struct_type={fields=[
- # {name="x", type={type_kind="STRING"}},
- # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
- # ]}}
- "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
- "fields": [
- # Object with schema name: StandardSqlField
- ],
- },
- "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
- "typeKind": "A String", # Required. The top level type of this field.
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
- },
- "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
- },
- ],
- "creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.
- "modelType": "A String", # Output only. Type of the model resource.
- "encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the
- # encryption configuration of the model data while stored in BigQuery
- # storage. This field can be used with PatchModel to update encryption key
- # for an already encrypted model.
- "kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.
},
- "modelReference": { # Required. Unique identifier for this model.
- "projectId": "A String", # [Required] The ID of the project containing this model.
- "datasetId": "A String", # [Required] The ID of the dataset containing this model.
- "modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- },
- "etag": "A String", # Output only. A hash of this resource.
- "location": "A String", # Output only. The geographic location where the model resides. This value
- # is inherited from the dataset.
- "friendlyName": "A String", # Optional. A descriptive name for this model.
- "expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.
- # If not present, the model will persist indefinitely. Expired models
- # will be deleted and their storage reclaimed. The defaultTableExpirationMs
- # property of the encapsulating dataset can be used to set a default
- # expirationTime on newly created models.
- "lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.
- }</pre>
+ ],
+ "modelReference": { # Required. Unique identifier for this model.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this model.
+ "modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this model.
+ },
+ "description": "A String", # Optional. A user-friendly description of this model.
+ "etag": "A String", # Output only. A hash of this resource.
+ "creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.
+ "encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the
+ # encryption configuration of the model data while stored in BigQuery
+ # storage. This field can be used with PatchModel to update encryption key
+ # for an already encrypted model.
+ "kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.
+ },
+ }</pre>
</div>
<div class="method">
@@ -502,380 +502,380 @@
An object of the form:
{
- "nextPageToken": "A String", # A token to request the next page of results.
- "models": [ # Models in the requested dataset. Only the following fields are populated:
+ "nextPageToken": "A String", # A token to request the next page of results.
+ "models": [ # Models in the requested dataset. Only the following fields are populated:
# model_reference, model_type, creation_time, last_modified_time and
# labels.
{
- "labels": { # The labels associated with this model. You can use these to organize
- # and group your models. Label keys and values can be no longer
- # than 63 characters, can only contain lowercase letters, numeric
- # characters, underscores and dashes. International characters are allowed.
- # Label values are optional. Label keys must start with a letter and each
- # label in the list must have a different key.
- "a_key": "A String",
+ "location": "A String", # Output only. The geographic location where the model resides. This value
+ # is inherited from the dataset.
+ "friendlyName": "A String", # Optional. A descriptive name for this model.
+ "lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.
+ "labels": { # The labels associated with this model. You can use these to organize
+ # and group your models. Label keys and values can be no longer
+ # than 63 characters, can only contain lowercase letters, numeric
+ # characters, underscores and dashes. International characters are allowed.
+ # Label values are optional. Label keys must start with a letter and each
+ # label in the list must have a different key.
+ "a_key": "A String",
+ },
+ "labelColumns": [ # Output only. Label columns that were used to train this model.
+ # The output of the model will have a "predicted_" prefix to these columns.
+ { # A field or a column.
+ "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
+ "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
+ # specified (e.g., CREATE FUNCTION statement can omit the return type;
+ # in this case the output parameter does not have this "type" field).
+ # Examples:
+ # INT64: {type_kind="INT64"}
+ # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
+ # STRUCT<x STRING, y ARRAY<DATE>>:
+ # {type_kind="STRUCT",
+ # struct_type={fields=[
+ # {name="x", type={type_kind="STRING"}},
+ # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
+ # ]}}
+ "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
+ "fields": [
+ # Object with schema name: StandardSqlField
+ ],
+ },
+ "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
+ "typeKind": "A String", # Required. The top level type of this field.
+ # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
+ },
},
- "description": "A String", # Optional. A user-friendly description of this model.
- "trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.
- { # Information about a single training query run for the model.
- "evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
- # end of training.
- # data or just the eval data based on whether eval data was used during
- # training. These are not present for imported models.
- "clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.
- "meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
- "daviesBouldinIndex": 3.14, # Davies-Bouldin index.
- "clusters": [ # [Beta] Information for all clusters.
- { # Message containing the information about one cluster.
- "count": "A String", # Count of training data rows that were assigned to this cluster.
- "featureValues": [ # Values of highly variant features for this cluster.
- { # Representative value of a single feature within the cluster.
- "featureColumn": "A String", # The feature column name.
- "numericalValue": 3.14, # The numerical feature value. This is the centroid value for this
- # feature.
- "categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.
- "categoryCounts": [ # Counts of all categories for the categorical feature. If there are
- # more than ten categories, we return top ten (by count) and return
- # one more CategoryCount with category "_OTHER_" and count as
- # aggregate counts of remaining categories.
- { # Represents the count of a single category within the cluster.
- "category": "A String", # The name of category.
- "count": "A String", # The count of training samples matching the category within the
- # cluster.
- },
- ],
- },
- },
+ ],
+ "modelType": "A String", # Output only. Type of the model resource.
+ "featureColumns": [ # Output only. Input feature columns that were used to train this model.
+ { # A field or a column.
+ "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
+ "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
+ # specified (e.g., CREATE FUNCTION statement can omit the return type;
+ # in this case the output parameter does not have this "type" field).
+ # Examples:
+ # INT64: {type_kind="INT64"}
+ # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
+ # STRUCT<x STRING, y ARRAY<DATE>>:
+ # {type_kind="STRUCT",
+ # struct_type={fields=[
+ # {name="x", type={type_kind="STRING"}},
+ # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
+ # ]}}
+ "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
+ "fields": [
+ # Object with schema name: StandardSqlField
+ ],
+ },
+ "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
+ "typeKind": "A String", # Required. The top level type of this field.
+ # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
+ },
+ },
+ ],
+ "expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.
+ # If not present, the model will persist indefinitely. Expired models
+ # will be deleted and their storage reclaimed. The defaultTableExpirationMs
+ # property of the encapsulating dataset can be used to set a default
+ # expirationTime on newly created models.
+ "trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.
+ { # Information about a single training query run for the model.
+ "startTime": "A String", # The start time of this training run.
+ "results": [ # Output of each iteration run, results.size() <= max_iterations.
+ { # Information about a single iteration of the training run.
+ "trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
+ "evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
+ "index": 42, # Index of the iteration, 0 based.
+ "learnRate": 3.14, # Learn rate used for this iteration.
+ "durationMs": "A String", # Time taken to run the iteration in milliseconds.
+ "arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
+ # refactoring if we want to use model-specific iteration results.
+ "arimaModelInfo": [ # This message is repeated because there are multiple arima models
+ # fitted in auto-arima. For non-auto-arima model, its size is one.
+ { # Arima model information.
+ "arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.
+ "aic": 3.14, # AIC.
+ "logLikelihood": 3.14, # Log-likelihood.
+ "variance": 3.14, # Variance.
+ },
+ "timeSeriesId": "A String", # The id to indicate different time series.
+ "arimaCoefficients": { # Arima coefficients. # Arima coefficients.
+ "movingAverageCoefficients": [ # Moving-average coefficients, an array of double.
+ 3.14,
+ ],
+ "autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.
+ 3.14,
+ ],
+ "interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.
+ },
+ "hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false
+ # when d is not 1.
+ "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported
+ # for one time series.
+ "A String",
],
- "centroidId": "A String", # Centroid id.
+ "nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.
+ "q": "A String", # Order of the moving-average part.
+ "d": "A String", # Order of the differencing part.
+ "p": "A String", # Order of the autoregressive part.
+ },
},
],
- },
- "regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix
- # factorization models.
- # factorization models.
- "meanSquaredLogError": 3.14, # Mean squared log error.
- "meanAbsoluteError": 3.14, # Mean absolute error.
- "meanSquaredError": 3.14, # Mean squared error.
- "medianAbsoluteError": 3.14, # Median absolute error.
- "rSquared": 3.14, # R^2 score.
- },
- "rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization
- # models.
- # feedback_type=implicit.
- "meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit
- # recommendation models except instead of computing the rating directly,
- # the output from evaluate is computed against a preference which is 1 or 0
- # depending on if the rating exists or not.
- "meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and
- # then averages all the precisions across all the users.
- "averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank
- # from the predicted confidence and dividing it by the original rank.
- "normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the
- # predicted confidence by comparing it to an ideal rank measured by the
- # original ratings.
- },
- "binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
- "negativeLabel": "A String", # Label representing the negative class.
- "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
- # models, the metrics are either macro-averaged or micro-averaged. When
- # macro-averaged, the metrics are calculated for each label and then an
- # unweighted average is taken of those values. When micro-averaged, the
- # metric is calculated globally by counting the total number of correctly
- # predicted rows.
- "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
- # positive prediction. For multiclass this is a macro-averaged metric.
- "precision": 3.14, # Precision is the fraction of actual positive predictions that had
- # positive actual labels. For multiclass this is a macro-averaged
- # metric treating each class as a binary classifier.
- "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
- "threshold": 3.14, # Threshold at which the metrics are computed. For binary
- # classification models this is the positive class threshold.
- # For multi-class classfication models this is the confidence
- # threshold.
- "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
- # multiclass this is a micro-averaged metric.
- "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
- # this is a macro-averaged metric.
- "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
- # metric.
- },
- "positiveLabel": "A String", # Label representing the positive class.
- "binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
- { # Confusion matrix for binary classification models.
- "truePositives": "A String", # Number of true samples predicted as true.
- "recall": 3.14, # The fraction of actual positive labels that were given a positive
- # prediction.
- "precision": 3.14, # The fraction of actual positive predictions that had positive actual
- # labels.
- "falseNegatives": "A String", # Number of false samples predicted as false.
- "trueNegatives": "A String", # Number of true samples predicted as false.
- "falsePositives": "A String", # Number of false samples predicted as true.
- "f1Score": 3.14, # The equally weighted average of recall and precision.
- "positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
- "accuracy": 3.14, # The fraction of predictions given the correct label.
- },
+ "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for
+ # one time series.
+ "A String",
],
},
- "multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
- "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
- # models, the metrics are either macro-averaged or micro-averaged. When
- # macro-averaged, the metrics are calculated for each label and then an
- # unweighted average is taken of those values. When micro-averaged, the
- # metric is calculated globally by counting the total number of correctly
- # predicted rows.
- "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
- # positive prediction. For multiclass this is a macro-averaged metric.
- "precision": 3.14, # Precision is the fraction of actual positive predictions that had
- # positive actual labels. For multiclass this is a macro-averaged
- # metric treating each class as a binary classifier.
- "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
- "threshold": 3.14, # Threshold at which the metrics are computed. For binary
- # classification models this is the positive class threshold.
- # For multi-class classfication models this is the confidence
- # threshold.
- "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
- # multiclass this is a micro-averaged metric.
- "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
- # this is a macro-averaged metric.
- "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
- # metric.
+ "clusterInfos": [ # Information about top clusters for clustering models.
+ { # Information about a single cluster for clustering model.
+ "clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
+ "centroidId": "A String", # Centroid id.
+ "clusterRadius": 3.14, # Cluster radius, the average distance from centroid
+ # to each point assigned to the cluster.
},
- "confusionMatrixList": [ # Confusion matrix at different thresholds.
- { # Confusion matrix for multi-class classification models.
- "confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
- # confusion matrix.
- "rows": [ # One row per actual label.
- { # A single row in the confusion matrix.
- "actualLabel": "A String", # The original label of this row.
- "entries": [ # Info describing predicted label distribution.
- { # A single entry in the confusion matrix.
- "predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
- # also add an entry indicating the number of items under the
- # confidence threshold.
- "itemCount": "A String", # Number of items being predicted as this label.
+ ],
+ },
+ ],
+ "evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
+ # end of training.
+ # data or just the eval data based on whether eval data was used during
+ # training. These are not present for imported models.
+ "binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
+ "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
+ # models, the metrics are either macro-averaged or micro-averaged. When
+ # macro-averaged, the metrics are calculated for each label and then an
+ # unweighted average is taken of those values. When micro-averaged, the
+ # metric is calculated globally by counting the total number of correctly
+ # predicted rows.
+ "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
+ # positive prediction. For multiclass this is a macro-averaged metric.
+ "threshold": 3.14, # Threshold at which the metrics are computed. For binary
+ # classification models this is the positive class threshold.
+ # For multi-class classfication models this is the confidence
+ # threshold.
+ "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
+ # metric.
+ "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
+ "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
+ # this is a macro-averaged metric.
+ "precision": 3.14, # Precision is the fraction of actual positive predictions that had
+ # positive actual labels. For multiclass this is a macro-averaged
+ # metric treating each class as a binary classifier.
+ "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
+ # multiclass this is a micro-averaged metric.
+ },
+ "negativeLabel": "A String", # Label representing the negative class.
+ "positiveLabel": "A String", # Label representing the positive class.
+ "binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
+ { # Confusion matrix for binary classification models.
+ "falseNegatives": "A String", # Number of false samples predicted as false.
+ "falsePositives": "A String", # Number of false samples predicted as true.
+ "trueNegatives": "A String", # Number of true samples predicted as false.
+ "f1Score": 3.14, # The equally weighted average of recall and precision.
+ "precision": 3.14, # The fraction of actual positive predictions that had positive actual
+ # labels.
+ "positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
+ "accuracy": 3.14, # The fraction of predictions given the correct label.
+ "truePositives": "A String", # Number of true samples predicted as true.
+ "recall": 3.14, # The fraction of actual positive labels that were given a positive
+ # prediction.
+ },
+ ],
+ },
+ "regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix
+ # factorization models.
+ # factorization models.
+ "meanSquaredError": 3.14, # Mean squared error.
+ "rSquared": 3.14, # R^2 score.
+ "medianAbsoluteError": 3.14, # Median absolute error.
+ "meanSquaredLogError": 3.14, # Mean squared log error.
+ "meanAbsoluteError": 3.14, # Mean absolute error.
+ },
+ "rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization
+ # models.
+ # feedback_type=implicit.
+ "meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and
+ # then averages all the precisions across all the users.
+ "normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the
+ # predicted confidence by comparing it to an ideal rank measured by the
+ # original ratings.
+ "averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank
+ # from the predicted confidence and dividing it by the original rank.
+ "meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit
+ # recommendation models except instead of computing the rating directly,
+ # the output from evaluate is computed against a preference which is 1 or 0
+ # depending on if the rating exists or not.
+ },
+ "multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
+ "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
+ # models, the metrics are either macro-averaged or micro-averaged. When
+ # macro-averaged, the metrics are calculated for each label and then an
+ # unweighted average is taken of those values. When micro-averaged, the
+ # metric is calculated globally by counting the total number of correctly
+ # predicted rows.
+ "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
+ # positive prediction. For multiclass this is a macro-averaged metric.
+ "threshold": 3.14, # Threshold at which the metrics are computed. For binary
+ # classification models this is the positive class threshold.
+ # For multi-class classfication models this is the confidence
+ # threshold.
+ "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
+ # metric.
+ "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
+ "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
+ # this is a macro-averaged metric.
+ "precision": 3.14, # Precision is the fraction of actual positive predictions that had
+ # positive actual labels. For multiclass this is a macro-averaged
+ # metric treating each class as a binary classifier.
+ "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
+ # multiclass this is a micro-averaged metric.
+ },
+ "confusionMatrixList": [ # Confusion matrix at different thresholds.
+ { # Confusion matrix for multi-class classification models.
+ "confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
+ # confusion matrix.
+ "rows": [ # One row per actual label.
+ { # A single row in the confusion matrix.
+ "entries": [ # Info describing predicted label distribution.
+ { # A single entry in the confusion matrix.
+ "predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
+ # also add an entry indicating the number of items under the
+ # confidence threshold.
+ "itemCount": "A String", # Number of items being predicted as this label.
+ },
+ ],
+ "actualLabel": "A String", # The original label of this row.
+ },
+ ],
+ },
+ ],
+ },
+ "clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.
+ "meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
+ "daviesBouldinIndex": 3.14, # Davies-Bouldin index.
+ "clusters": [ # [Beta] Information for all clusters.
+ { # Message containing the information about one cluster.
+ "count": "A String", # Count of training data rows that were assigned to this cluster.
+ "featureValues": [ # Values of highly variant features for this cluster.
+ { # Representative value of a single feature within the cluster.
+ "numericalValue": 3.14, # The numerical feature value. This is the centroid value for this
+ # feature.
+ "featureColumn": "A String", # The feature column name.
+ "categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.
+ "categoryCounts": [ # Counts of all categories for the categorical feature. If there are
+ # more than ten categories, we return top ten (by count) and return
+ # one more CategoryCount with category "_OTHER_" and count as
+ # aggregate counts of remaining categories.
+ { # Represents the count of a single category within the cluster.
+ "category": "A String", # The name of category.
+ "count": "A String", # The count of training samples matching the category within the
+ # cluster.
},
],
},
- ],
- },
- ],
- },
- },
- "dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is
- # actually split.
- # data tables that were used to train the model.
- "trainingTable": { # Table reference of the training data after split.
- "projectId": "A String", # [Required] The ID of the project containing this table.
- "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- "datasetId": "A String", # [Required] The ID of the dataset containing this table.
- },
- "evaluationTable": { # Table reference of the evaluation data after split.
- "projectId": "A String", # [Required] The ID of the project containing this table.
- "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- "datasetId": "A String", # [Required] The ID of the dataset containing this table.
- },
- },
- "results": [ # Output of each iteration run, results.size() <= max_iterations.
- { # Information about a single iteration of the training run.
- "arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
- # refactoring if we want to use model-specific iteration results.
- "arimaModelInfo": [ # This message is repeated because there are multiple arima models
- # fitted in auto-arima. For non-auto-arima model, its size is one.
- { # Arima model information.
- "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported
- # for one time series.
- "A String",
- ],
- "hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false
- # when d is not 1.
- "arimaCoefficients": { # Arima coefficients. # Arima coefficients.
- "movingAverageCoefficients": [ # Moving-average coefficients, an array of double.
- 3.14,
- ],
- "autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.
- 3.14,
- ],
- "interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.
- },
- "nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.
- "q": "A String", # Order of the moving-average part.
- "p": "A String", # Order of the autoregressive part.
- "d": "A String", # Order of the differencing part.
- },
- "arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.
- "variance": 3.14, # Variance.
- "logLikelihood": 3.14, # Log-likelihood.
- "aic": 3.14, # AIC.
- },
- "timeSeriesId": "A String", # The id to indicate different time series.
},
],
- "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for
- # one time series.
- "A String",
- ],
+ "centroidId": "A String", # Centroid id.
},
- "index": 42, # Index of the iteration, 0 based.
- "evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
- "durationMs": "A String", # Time taken to run the iteration in milliseconds.
- "learnRate": 3.14, # Learn rate used for this iteration.
- "trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
- "clusterInfos": [ # Information about top clusters for clustering models.
- { # Information about a single cluster for clustering model.
- "centroidId": "A String", # Centroid id.
- "clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
- "clusterRadius": 3.14, # Cluster radius, the average distance from centroid
- # to each point assigned to the cluster.
- },
- ],
- },
+ ],
+ },
+ },
+ "trainingOptions": { # Options that were used for this training run, includes
+ # user specified and default options that were used.
+ "dropout": 3.14, # Dropout probability for dnn models.
+ "learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
+ "labelClassWeights": { # Weights associated with each label class, for rebalancing the
+ # training data. Only applicable for classification models.
+ "a_key": 3.14,
+ },
+ "subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent
+ # overfitting for boosted tree models.
+ "earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
+ # any more (compared to min_relative_progress). Used only for iterative
+ # training algorithms.
+ "dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
+ # of data will be used as training data. The format should be double.
+ # Accurate to two decimal places.
+ # Default value is 0.2.
+ "initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
+ # strategy.
+ "itemColumn": "A String", # Item column specified for matrix factorization models.
+ "inputLabelColumns": [ # Name of input label columns in training data.
+ "A String",
],
- "startTime": "A String", # The start time of this training run.
- "trainingOptions": { # Options that were used for this training run, includes
- # user specified and default options that were used.
- "optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
- "itemColumn": "A String", # Item column specified for matrix factorization models.
- "feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix
- # factorization.
- "numFactors": "A String", # Num factors specified for matrix factorization models.
- "inputLabelColumns": [ # Name of input label columns in training data.
- "A String",
- ],
- "batchSize": "A String", # Batch size for dnn models.
- "distanceType": "A String", # Distance type for clustering models.
- "kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm
- # when kmeans_initialization_method is CUSTOM.
- "l2Regularization": 3.14, # L2 regularization coefficient.
- "dropout": 3.14, # Dropout probability for dnn models.
- "minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
- # less than 'min_relative_progress'. Used only for iterative training
- # algorithms.
- "l1Regularization": 3.14, # L1 regularization coefficient.
- "maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
- # training algorithms.
- "earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
- # any more (compared to min_relative_progress). Used only for iterative
- # training algorithms.
- "initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
- # strategy.
- "dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
- # feature.
- # 1. When data_split_method is CUSTOM, the corresponding column should
- # be boolean. The rows with true value tag are eval data, and the false
- # are training data.
- # 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
- # rows (from smallest to largest) in the corresponding column are used
- # as training data, and the rest are eval data. It respects the order
- # in Orderable data types:
- # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
- "numClusters": "A String", # Number of clusters for clustering models.
- "warmStart": True or False, # Whether to train a model from the last checkpoint.
- "hiddenUnits": [ # Hidden units for dnn models.
- "A String",
- ],
- "maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.
- "userColumn": "A String", # User column specified for matrix factorization models.
- "kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.
- "learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
- "dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
- # of data will be used as training data. The format should be double.
- # Accurate to two decimal places.
- # Default value is 0.2.
- "dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
- "subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent
- # overfitting for boosted tree models.
- "labelClassWeights": { # Weights associated with each label class, for rebalancing the
- # training data. Only applicable for classification models.
- "a_key": 3.14,
- },
- "learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
- "modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
- # applicable for imported models.
- "walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is
- # specified.
- "minSplitLoss": 3.14, # Minimum split loss for boosted tree models.
- "lossType": "A String", # Type of loss function used during training run.
+ "warmStart": True or False, # Whether to train a model from the last checkpoint.
+ "learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
+ "numFactors": "A String", # Num factors specified for matrix factorization models.
+ "lossType": "A String", # Type of loss function used during training run.
+ "hiddenUnits": [ # Hidden units for dnn models.
+ "A String",
+ ],
+ "kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.
+ "l1Regularization": 3.14, # L1 regularization coefficient.
+ "distanceType": "A String", # Distance type for clustering models.
+ "walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is
+ # specified.
+ "feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix
+ # factorization.
+ "optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
+ "dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
+ # feature.
+ # 1. When data_split_method is CUSTOM, the corresponding column should
+ # be boolean. The rows with true value tag are eval data, and the false
+ # are training data.
+ # 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
+ # rows (from smallest to largest) in the corresponding column are used
+ # as training data, and the rest are eval data. It respects the order
+ # in Orderable data types:
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
+ "maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
+ # training algorithms.
+ "userColumn": "A String", # User column specified for matrix factorization models.
+ "maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.
+ "l2Regularization": 3.14, # L2 regularization coefficient.
+ "modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
+ # applicable for imported models.
+ "batchSize": "A String", # Batch size for dnn models.
+ "minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
+ # less than 'min_relative_progress'. Used only for iterative training
+ # algorithms.
+ "kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm
+ # when kmeans_initialization_method is CUSTOM.
+ "numClusters": "A String", # Number of clusters for clustering models.
+ "dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
+ "minSplitLoss": 3.14, # Minimum split loss for boosted tree models.
+ },
+ "dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is
+ # actually split.
+ # data tables that were used to train the model.
+ "trainingTable": { # Table reference of the training data after split.
+ "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this table.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this table.
+ },
+ "evaluationTable": { # Table reference of the evaluation data after split.
+ "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this table.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this table.
},
},
- ],
- "featureColumns": [ # Output only. Input feature columns that were used to train this model.
- { # A field or a column.
- "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
- # specified (e.g., CREATE FUNCTION statement can omit the return type;
- # in this case the output parameter does not have this "type" field).
- # Examples:
- # INT64: {type_kind="INT64"}
- # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
- # STRUCT<x STRING, y ARRAY<DATE>>:
- # {type_kind="STRUCT",
- # struct_type={fields=[
- # {name="x", type={type_kind="STRING"}},
- # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
- # ]}}
- "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
- "fields": [
- # Object with schema name: StandardSqlField
- ],
- },
- "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
- "typeKind": "A String", # Required. The top level type of this field.
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
- },
- "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
- },
- ],
- "labelColumns": [ # Output only. Label columns that were used to train this model.
- # The output of the model will have a "predicted_" prefix to these columns.
- { # A field or a column.
- "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
- # specified (e.g., CREATE FUNCTION statement can omit the return type;
- # in this case the output parameter does not have this "type" field).
- # Examples:
- # INT64: {type_kind="INT64"}
- # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
- # STRUCT<x STRING, y ARRAY<DATE>>:
- # {type_kind="STRUCT",
- # struct_type={fields=[
- # {name="x", type={type_kind="STRING"}},
- # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
- # ]}}
- "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
- "fields": [
- # Object with schema name: StandardSqlField
- ],
- },
- "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
- "typeKind": "A String", # Required. The top level type of this field.
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
- },
- "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
- },
- ],
- "creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.
- "modelType": "A String", # Output only. Type of the model resource.
- "encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the
- # encryption configuration of the model data while stored in BigQuery
- # storage. This field can be used with PatchModel to update encryption key
- # for an already encrypted model.
- "kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.
},
- "modelReference": { # Required. Unique identifier for this model.
- "projectId": "A String", # [Required] The ID of the project containing this model.
- "datasetId": "A String", # [Required] The ID of the dataset containing this model.
- "modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- },
- "etag": "A String", # Output only. A hash of this resource.
- "location": "A String", # Output only. The geographic location where the model resides. This value
- # is inherited from the dataset.
- "friendlyName": "A String", # Optional. A descriptive name for this model.
- "expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.
- # If not present, the model will persist indefinitely. Expired models
- # will be deleted and their storage reclaimed. The defaultTableExpirationMs
- # property of the encapsulating dataset can be used to set a default
- # expirationTime on newly created models.
- "lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.
+ ],
+ "modelReference": { # Required. Unique identifier for this model.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this model.
+ "modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this model.
},
+ "description": "A String", # Optional. A user-friendly description of this model.
+ "etag": "A String", # Output only. A hash of this resource.
+ "creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.
+ "encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the
+ # encryption configuration of the model data while stored in BigQuery
+ # storage. This field can be used with PatchModel to update encryption key
+ # for an already encrypted model.
+ "kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.
+ },
+ },
],
}</pre>
</div>
@@ -889,7 +889,7 @@
previous_response: The response from the request for the previous page. (required)
Returns:
- A request object that you can call 'execute()' on to request the next
+ A request object that you can call 'execute()' on to request the next
page. Returns None if there are no more items in the collection.
</pre>
</div>
@@ -906,259 +906,693 @@
The object takes the form of:
{
- "labels": { # The labels associated with this model. You can use these to organize
+ "location": "A String", # Output only. The geographic location where the model resides. This value
+ # is inherited from the dataset.
+ "friendlyName": "A String", # Optional. A descriptive name for this model.
+ "lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.
+ "labels": { # The labels associated with this model. You can use these to organize
+ # and group your models. Label keys and values can be no longer
+ # than 63 characters, can only contain lowercase letters, numeric
+ # characters, underscores and dashes. International characters are allowed.
+ # Label values are optional. Label keys must start with a letter and each
+ # label in the list must have a different key.
+ "a_key": "A String",
+ },
+ "labelColumns": [ # Output only. Label columns that were used to train this model.
+ # The output of the model will have a "predicted_" prefix to these columns.
+ { # A field or a column.
+ "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
+ "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
+ # specified (e.g., CREATE FUNCTION statement can omit the return type;
+ # in this case the output parameter does not have this "type" field).
+ # Examples:
+ # INT64: {type_kind="INT64"}
+ # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
+ # STRUCT<x STRING, y ARRAY<DATE>>:
+ # {type_kind="STRUCT",
+ # struct_type={fields=[
+ # {name="x", type={type_kind="STRING"}},
+ # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
+ # ]}}
+ "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
+ "fields": [
+ # Object with schema name: StandardSqlField
+ ],
+ },
+ "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
+ "typeKind": "A String", # Required. The top level type of this field.
+ # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
+ },
+ },
+ ],
+ "modelType": "A String", # Output only. Type of the model resource.
+ "featureColumns": [ # Output only. Input feature columns that were used to train this model.
+ { # A field or a column.
+ "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
+ "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
+ # specified (e.g., CREATE FUNCTION statement can omit the return type;
+ # in this case the output parameter does not have this "type" field).
+ # Examples:
+ # INT64: {type_kind="INT64"}
+ # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
+ # STRUCT<x STRING, y ARRAY<DATE>>:
+ # {type_kind="STRUCT",
+ # struct_type={fields=[
+ # {name="x", type={type_kind="STRING"}},
+ # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
+ # ]}}
+ "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
+ "fields": [
+ # Object with schema name: StandardSqlField
+ ],
+ },
+ "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
+ "typeKind": "A String", # Required. The top level type of this field.
+ # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
+ },
+ },
+ ],
+ "expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.
+ # If not present, the model will persist indefinitely. Expired models
+ # will be deleted and their storage reclaimed. The defaultTableExpirationMs
+ # property of the encapsulating dataset can be used to set a default
+ # expirationTime on newly created models.
+ "trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.
+ { # Information about a single training query run for the model.
+ "startTime": "A String", # The start time of this training run.
+ "results": [ # Output of each iteration run, results.size() <= max_iterations.
+ { # Information about a single iteration of the training run.
+ "trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
+ "evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
+ "index": 42, # Index of the iteration, 0 based.
+ "learnRate": 3.14, # Learn rate used for this iteration.
+ "durationMs": "A String", # Time taken to run the iteration in milliseconds.
+ "arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
+ # refactoring if we want to use model-specific iteration results.
+ "arimaModelInfo": [ # This message is repeated because there are multiple arima models
+ # fitted in auto-arima. For non-auto-arima model, its size is one.
+ { # Arima model information.
+ "arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.
+ "aic": 3.14, # AIC.
+ "logLikelihood": 3.14, # Log-likelihood.
+ "variance": 3.14, # Variance.
+ },
+ "timeSeriesId": "A String", # The id to indicate different time series.
+ "arimaCoefficients": { # Arima coefficients. # Arima coefficients.
+ "movingAverageCoefficients": [ # Moving-average coefficients, an array of double.
+ 3.14,
+ ],
+ "autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.
+ 3.14,
+ ],
+ "interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.
+ },
+ "hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false
+ # when d is not 1.
+ "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported
+ # for one time series.
+ "A String",
+ ],
+ "nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.
+ "q": "A String", # Order of the moving-average part.
+ "d": "A String", # Order of the differencing part.
+ "p": "A String", # Order of the autoregressive part.
+ },
+ },
+ ],
+ "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for
+ # one time series.
+ "A String",
+ ],
+ },
+ "clusterInfos": [ # Information about top clusters for clustering models.
+ { # Information about a single cluster for clustering model.
+ "clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
+ "centroidId": "A String", # Centroid id.
+ "clusterRadius": 3.14, # Cluster radius, the average distance from centroid
+ # to each point assigned to the cluster.
+ },
+ ],
+ },
+ ],
+ "evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
+ # end of training.
+ # data or just the eval data based on whether eval data was used during
+ # training. These are not present for imported models.
+ "binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
+ "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
+ # models, the metrics are either macro-averaged or micro-averaged. When
+ # macro-averaged, the metrics are calculated for each label and then an
+ # unweighted average is taken of those values. When micro-averaged, the
+ # metric is calculated globally by counting the total number of correctly
+ # predicted rows.
+ "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
+ # positive prediction. For multiclass this is a macro-averaged metric.
+ "threshold": 3.14, # Threshold at which the metrics are computed. For binary
+ # classification models this is the positive class threshold.
+ # For multi-class classfication models this is the confidence
+ # threshold.
+ "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
+ # metric.
+ "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
+ "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
+ # this is a macro-averaged metric.
+ "precision": 3.14, # Precision is the fraction of actual positive predictions that had
+ # positive actual labels. For multiclass this is a macro-averaged
+ # metric treating each class as a binary classifier.
+ "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
+ # multiclass this is a micro-averaged metric.
+ },
+ "negativeLabel": "A String", # Label representing the negative class.
+ "positiveLabel": "A String", # Label representing the positive class.
+ "binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
+ { # Confusion matrix for binary classification models.
+ "falseNegatives": "A String", # Number of false samples predicted as false.
+ "falsePositives": "A String", # Number of false samples predicted as true.
+ "trueNegatives": "A String", # Number of true samples predicted as false.
+ "f1Score": 3.14, # The equally weighted average of recall and precision.
+ "precision": 3.14, # The fraction of actual positive predictions that had positive actual
+ # labels.
+ "positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
+ "accuracy": 3.14, # The fraction of predictions given the correct label.
+ "truePositives": "A String", # Number of true samples predicted as true.
+ "recall": 3.14, # The fraction of actual positive labels that were given a positive
+ # prediction.
+ },
+ ],
+ },
+ "regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix
+ # factorization models.
+ # factorization models.
+ "meanSquaredError": 3.14, # Mean squared error.
+ "rSquared": 3.14, # R^2 score.
+ "medianAbsoluteError": 3.14, # Median absolute error.
+ "meanSquaredLogError": 3.14, # Mean squared log error.
+ "meanAbsoluteError": 3.14, # Mean absolute error.
+ },
+ "rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization
+ # models.
+ # feedback_type=implicit.
+ "meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and
+ # then averages all the precisions across all the users.
+ "normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the
+ # predicted confidence by comparing it to an ideal rank measured by the
+ # original ratings.
+ "averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank
+ # from the predicted confidence and dividing it by the original rank.
+ "meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit
+ # recommendation models except instead of computing the rating directly,
+ # the output from evaluate is computed against a preference which is 1 or 0
+ # depending on if the rating exists or not.
+ },
+ "multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
+ "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
+ # models, the metrics are either macro-averaged or micro-averaged. When
+ # macro-averaged, the metrics are calculated for each label and then an
+ # unweighted average is taken of those values. When micro-averaged, the
+ # metric is calculated globally by counting the total number of correctly
+ # predicted rows.
+ "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
+ # positive prediction. For multiclass this is a macro-averaged metric.
+ "threshold": 3.14, # Threshold at which the metrics are computed. For binary
+ # classification models this is the positive class threshold.
+ # For multi-class classfication models this is the confidence
+ # threshold.
+ "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
+ # metric.
+ "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
+ "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
+ # this is a macro-averaged metric.
+ "precision": 3.14, # Precision is the fraction of actual positive predictions that had
+ # positive actual labels. For multiclass this is a macro-averaged
+ # metric treating each class as a binary classifier.
+ "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
+ # multiclass this is a micro-averaged metric.
+ },
+ "confusionMatrixList": [ # Confusion matrix at different thresholds.
+ { # Confusion matrix for multi-class classification models.
+ "confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
+ # confusion matrix.
+ "rows": [ # One row per actual label.
+ { # A single row in the confusion matrix.
+ "entries": [ # Info describing predicted label distribution.
+ { # A single entry in the confusion matrix.
+ "predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
+ # also add an entry indicating the number of items under the
+ # confidence threshold.
+ "itemCount": "A String", # Number of items being predicted as this label.
+ },
+ ],
+ "actualLabel": "A String", # The original label of this row.
+ },
+ ],
+ },
+ ],
+ },
+ "clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.
+ "meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
+ "daviesBouldinIndex": 3.14, # Davies-Bouldin index.
+ "clusters": [ # [Beta] Information for all clusters.
+ { # Message containing the information about one cluster.
+ "count": "A String", # Count of training data rows that were assigned to this cluster.
+ "featureValues": [ # Values of highly variant features for this cluster.
+ { # Representative value of a single feature within the cluster.
+ "numericalValue": 3.14, # The numerical feature value. This is the centroid value for this
+ # feature.
+ "featureColumn": "A String", # The feature column name.
+ "categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.
+ "categoryCounts": [ # Counts of all categories for the categorical feature. If there are
+ # more than ten categories, we return top ten (by count) and return
+ # one more CategoryCount with category "_OTHER_" and count as
+ # aggregate counts of remaining categories.
+ { # Represents the count of a single category within the cluster.
+ "category": "A String", # The name of category.
+ "count": "A String", # The count of training samples matching the category within the
+ # cluster.
+ },
+ ],
+ },
+ },
+ ],
+ "centroidId": "A String", # Centroid id.
+ },
+ ],
+ },
+ },
+ "trainingOptions": { # Options that were used for this training run, includes
+ # user specified and default options that were used.
+ "dropout": 3.14, # Dropout probability for dnn models.
+ "learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
+ "labelClassWeights": { # Weights associated with each label class, for rebalancing the
+ # training data. Only applicable for classification models.
+ "a_key": 3.14,
+ },
+ "subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent
+ # overfitting for boosted tree models.
+ "earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
+ # any more (compared to min_relative_progress). Used only for iterative
+ # training algorithms.
+ "dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
+ # of data will be used as training data. The format should be double.
+ # Accurate to two decimal places.
+ # Default value is 0.2.
+ "initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
+ # strategy.
+ "itemColumn": "A String", # Item column specified for matrix factorization models.
+ "inputLabelColumns": [ # Name of input label columns in training data.
+ "A String",
+ ],
+ "warmStart": True or False, # Whether to train a model from the last checkpoint.
+ "learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
+ "numFactors": "A String", # Num factors specified for matrix factorization models.
+ "lossType": "A String", # Type of loss function used during training run.
+ "hiddenUnits": [ # Hidden units for dnn models.
+ "A String",
+ ],
+ "kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.
+ "l1Regularization": 3.14, # L1 regularization coefficient.
+ "distanceType": "A String", # Distance type for clustering models.
+ "walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is
+ # specified.
+ "feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix
+ # factorization.
+ "optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
+ "dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
+ # feature.
+ # 1. When data_split_method is CUSTOM, the corresponding column should
+ # be boolean. The rows with true value tag are eval data, and the false
+ # are training data.
+ # 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
+ # rows (from smallest to largest) in the corresponding column are used
+ # as training data, and the rest are eval data. It respects the order
+ # in Orderable data types:
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
+ "maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
+ # training algorithms.
+ "userColumn": "A String", # User column specified for matrix factorization models.
+ "maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.
+ "l2Regularization": 3.14, # L2 regularization coefficient.
+ "modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
+ # applicable for imported models.
+ "batchSize": "A String", # Batch size for dnn models.
+ "minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
+ # less than 'min_relative_progress'. Used only for iterative training
+ # algorithms.
+ "kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm
+ # when kmeans_initialization_method is CUSTOM.
+ "numClusters": "A String", # Number of clusters for clustering models.
+ "dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
+ "minSplitLoss": 3.14, # Minimum split loss for boosted tree models.
+ },
+ "dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is
+ # actually split.
+ # data tables that were used to train the model.
+ "trainingTable": { # Table reference of the training data after split.
+ "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this table.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this table.
+ },
+ "evaluationTable": { # Table reference of the evaluation data after split.
+ "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this table.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this table.
+ },
+ },
+ },
+ ],
+ "modelReference": { # Required. Unique identifier for this model.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this model.
+ "modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this model.
+ },
+ "description": "A String", # Optional. A user-friendly description of this model.
+ "etag": "A String", # Output only. A hash of this resource.
+ "creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.
+ "encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the
+ # encryption configuration of the model data while stored in BigQuery
+ # storage. This field can be used with PatchModel to update encryption key
+ # for an already encrypted model.
+ "kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.
+ },
+}
+
+
+Returns:
+ An object of the form:
+
+ {
+ "location": "A String", # Output only. The geographic location where the model resides. This value
+ # is inherited from the dataset.
+ "friendlyName": "A String", # Optional. A descriptive name for this model.
+ "lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.
+ "labels": { # The labels associated with this model. You can use these to organize
# and group your models. Label keys and values can be no longer
# than 63 characters, can only contain lowercase letters, numeric
# characters, underscores and dashes. International characters are allowed.
# Label values are optional. Label keys must start with a letter and each
# label in the list must have a different key.
- "a_key": "A String",
+ "a_key": "A String",
},
- "description": "A String", # Optional. A user-friendly description of this model.
- "trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.
+ "labelColumns": [ # Output only. Label columns that were used to train this model.
+ # The output of the model will have a "predicted_" prefix to these columns.
+ { # A field or a column.
+ "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
+ "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
+ # specified (e.g., CREATE FUNCTION statement can omit the return type;
+ # in this case the output parameter does not have this "type" field).
+ # Examples:
+ # INT64: {type_kind="INT64"}
+ # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
+ # STRUCT<x STRING, y ARRAY<DATE>>:
+ # {type_kind="STRUCT",
+ # struct_type={fields=[
+ # {name="x", type={type_kind="STRING"}},
+ # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
+ # ]}}
+ "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
+ "fields": [
+ # Object with schema name: StandardSqlField
+ ],
+ },
+ "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
+ "typeKind": "A String", # Required. The top level type of this field.
+ # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
+ },
+ },
+ ],
+ "modelType": "A String", # Output only. Type of the model resource.
+ "featureColumns": [ # Output only. Input feature columns that were used to train this model.
+ { # A field or a column.
+ "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
+ "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
+ # specified (e.g., CREATE FUNCTION statement can omit the return type;
+ # in this case the output parameter does not have this "type" field).
+ # Examples:
+ # INT64: {type_kind="INT64"}
+ # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
+ # STRUCT<x STRING, y ARRAY<DATE>>:
+ # {type_kind="STRUCT",
+ # struct_type={fields=[
+ # {name="x", type={type_kind="STRING"}},
+ # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
+ # ]}}
+ "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
+ "fields": [
+ # Object with schema name: StandardSqlField
+ ],
+ },
+ "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
+ "typeKind": "A String", # Required. The top level type of this field.
+ # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
+ },
+ },
+ ],
+ "expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.
+ # If not present, the model will persist indefinitely. Expired models
+ # will be deleted and their storage reclaimed. The defaultTableExpirationMs
+ # property of the encapsulating dataset can be used to set a default
+ # expirationTime on newly created models.
+ "trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.
{ # Information about a single training query run for the model.
- "evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
+ "startTime": "A String", # The start time of this training run.
+ "results": [ # Output of each iteration run, results.size() <= max_iterations.
+ { # Information about a single iteration of the training run.
+ "trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
+ "evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
+ "index": 42, # Index of the iteration, 0 based.
+ "learnRate": 3.14, # Learn rate used for this iteration.
+ "durationMs": "A String", # Time taken to run the iteration in milliseconds.
+ "arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
+ # refactoring if we want to use model-specific iteration results.
+ "arimaModelInfo": [ # This message is repeated because there are multiple arima models
+ # fitted in auto-arima. For non-auto-arima model, its size is one.
+ { # Arima model information.
+ "arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.
+ "aic": 3.14, # AIC.
+ "logLikelihood": 3.14, # Log-likelihood.
+ "variance": 3.14, # Variance.
+ },
+ "timeSeriesId": "A String", # The id to indicate different time series.
+ "arimaCoefficients": { # Arima coefficients. # Arima coefficients.
+ "movingAverageCoefficients": [ # Moving-average coefficients, an array of double.
+ 3.14,
+ ],
+ "autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.
+ 3.14,
+ ],
+ "interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.
+ },
+ "hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false
+ # when d is not 1.
+ "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported
+ # for one time series.
+ "A String",
+ ],
+ "nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.
+ "q": "A String", # Order of the moving-average part.
+ "d": "A String", # Order of the differencing part.
+ "p": "A String", # Order of the autoregressive part.
+ },
+ },
+ ],
+ "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for
+ # one time series.
+ "A String",
+ ],
+ },
+ "clusterInfos": [ # Information about top clusters for clustering models.
+ { # Information about a single cluster for clustering model.
+ "clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
+ "centroidId": "A String", # Centroid id.
+ "clusterRadius": 3.14, # Cluster radius, the average distance from centroid
+ # to each point assigned to the cluster.
+ },
+ ],
+ },
+ ],
+ "evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
# end of training.
# data or just the eval data based on whether eval data was used during
# training. These are not present for imported models.
- "clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.
- "meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
- "daviesBouldinIndex": 3.14, # Davies-Bouldin index.
- "clusters": [ # [Beta] Information for all clusters.
+ "binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
+ "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
+ # models, the metrics are either macro-averaged or micro-averaged. When
+ # macro-averaged, the metrics are calculated for each label and then an
+ # unweighted average is taken of those values. When micro-averaged, the
+ # metric is calculated globally by counting the total number of correctly
+ # predicted rows.
+ "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
+ # positive prediction. For multiclass this is a macro-averaged metric.
+ "threshold": 3.14, # Threshold at which the metrics are computed. For binary
+ # classification models this is the positive class threshold.
+ # For multi-class classfication models this is the confidence
+ # threshold.
+ "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
+ # metric.
+ "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
+ "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
+ # this is a macro-averaged metric.
+ "precision": 3.14, # Precision is the fraction of actual positive predictions that had
+ # positive actual labels. For multiclass this is a macro-averaged
+ # metric treating each class as a binary classifier.
+ "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
+ # multiclass this is a micro-averaged metric.
+ },
+ "negativeLabel": "A String", # Label representing the negative class.
+ "positiveLabel": "A String", # Label representing the positive class.
+ "binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
+ { # Confusion matrix for binary classification models.
+ "falseNegatives": "A String", # Number of false samples predicted as false.
+ "falsePositives": "A String", # Number of false samples predicted as true.
+ "trueNegatives": "A String", # Number of true samples predicted as false.
+ "f1Score": 3.14, # The equally weighted average of recall and precision.
+ "precision": 3.14, # The fraction of actual positive predictions that had positive actual
+ # labels.
+ "positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
+ "accuracy": 3.14, # The fraction of predictions given the correct label.
+ "truePositives": "A String", # Number of true samples predicted as true.
+ "recall": 3.14, # The fraction of actual positive labels that were given a positive
+ # prediction.
+ },
+ ],
+ },
+ "regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix
+ # factorization models.
+ # factorization models.
+ "meanSquaredError": 3.14, # Mean squared error.
+ "rSquared": 3.14, # R^2 score.
+ "medianAbsoluteError": 3.14, # Median absolute error.
+ "meanSquaredLogError": 3.14, # Mean squared log error.
+ "meanAbsoluteError": 3.14, # Mean absolute error.
+ },
+ "rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization
+ # models.
+ # feedback_type=implicit.
+ "meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and
+ # then averages all the precisions across all the users.
+ "normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the
+ # predicted confidence by comparing it to an ideal rank measured by the
+ # original ratings.
+ "averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank
+ # from the predicted confidence and dividing it by the original rank.
+ "meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit
+ # recommendation models except instead of computing the rating directly,
+ # the output from evaluate is computed against a preference which is 1 or 0
+ # depending on if the rating exists or not.
+ },
+ "multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
+ "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
+ # models, the metrics are either macro-averaged or micro-averaged. When
+ # macro-averaged, the metrics are calculated for each label and then an
+ # unweighted average is taken of those values. When micro-averaged, the
+ # metric is calculated globally by counting the total number of correctly
+ # predicted rows.
+ "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
+ # positive prediction. For multiclass this is a macro-averaged metric.
+ "threshold": 3.14, # Threshold at which the metrics are computed. For binary
+ # classification models this is the positive class threshold.
+ # For multi-class classfication models this is the confidence
+ # threshold.
+ "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
+ # metric.
+ "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
+ "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
+ # this is a macro-averaged metric.
+ "precision": 3.14, # Precision is the fraction of actual positive predictions that had
+ # positive actual labels. For multiclass this is a macro-averaged
+ # metric treating each class as a binary classifier.
+ "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
+ # multiclass this is a micro-averaged metric.
+ },
+ "confusionMatrixList": [ # Confusion matrix at different thresholds.
+ { # Confusion matrix for multi-class classification models.
+ "confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
+ # confusion matrix.
+ "rows": [ # One row per actual label.
+ { # A single row in the confusion matrix.
+ "entries": [ # Info describing predicted label distribution.
+ { # A single entry in the confusion matrix.
+ "predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
+ # also add an entry indicating the number of items under the
+ # confidence threshold.
+ "itemCount": "A String", # Number of items being predicted as this label.
+ },
+ ],
+ "actualLabel": "A String", # The original label of this row.
+ },
+ ],
+ },
+ ],
+ },
+ "clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.
+ "meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
+ "daviesBouldinIndex": 3.14, # Davies-Bouldin index.
+ "clusters": [ # [Beta] Information for all clusters.
{ # Message containing the information about one cluster.
- "count": "A String", # Count of training data rows that were assigned to this cluster.
- "featureValues": [ # Values of highly variant features for this cluster.
+ "count": "A String", # Count of training data rows that were assigned to this cluster.
+ "featureValues": [ # Values of highly variant features for this cluster.
{ # Representative value of a single feature within the cluster.
- "featureColumn": "A String", # The feature column name.
- "numericalValue": 3.14, # The numerical feature value. This is the centroid value for this
+ "numericalValue": 3.14, # The numerical feature value. This is the centroid value for this
# feature.
- "categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.
- "categoryCounts": [ # Counts of all categories for the categorical feature. If there are
+ "featureColumn": "A String", # The feature column name.
+ "categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.
+ "categoryCounts": [ # Counts of all categories for the categorical feature. If there are
# more than ten categories, we return top ten (by count) and return
- # one more CategoryCount with category "_OTHER_" and count as
+ # one more CategoryCount with category "_OTHER_" and count as
# aggregate counts of remaining categories.
{ # Represents the count of a single category within the cluster.
- "category": "A String", # The name of category.
- "count": "A String", # The count of training samples matching the category within the
+ "category": "A String", # The name of category.
+ "count": "A String", # The count of training samples matching the category within the
# cluster.
},
],
},
},
],
- "centroidId": "A String", # Centroid id.
- },
- ],
- },
- "regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix
- # factorization models.
- # factorization models.
- "meanSquaredLogError": 3.14, # Mean squared log error.
- "meanAbsoluteError": 3.14, # Mean absolute error.
- "meanSquaredError": 3.14, # Mean squared error.
- "medianAbsoluteError": 3.14, # Median absolute error.
- "rSquared": 3.14, # R^2 score.
- },
- "rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization
- # models.
- # feedback_type=implicit.
- "meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit
- # recommendation models except instead of computing the rating directly,
- # the output from evaluate is computed against a preference which is 1 or 0
- # depending on if the rating exists or not.
- "meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and
- # then averages all the precisions across all the users.
- "averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank
- # from the predicted confidence and dividing it by the original rank.
- "normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the
- # predicted confidence by comparing it to an ideal rank measured by the
- # original ratings.
- },
- "binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
- "negativeLabel": "A String", # Label representing the negative class.
- "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
- # models, the metrics are either macro-averaged or micro-averaged. When
- # macro-averaged, the metrics are calculated for each label and then an
- # unweighted average is taken of those values. When micro-averaged, the
- # metric is calculated globally by counting the total number of correctly
- # predicted rows.
- "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
- # positive prediction. For multiclass this is a macro-averaged metric.
- "precision": 3.14, # Precision is the fraction of actual positive predictions that had
- # positive actual labels. For multiclass this is a macro-averaged
- # metric treating each class as a binary classifier.
- "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
- "threshold": 3.14, # Threshold at which the metrics are computed. For binary
- # classification models this is the positive class threshold.
- # For multi-class classfication models this is the confidence
- # threshold.
- "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
- # multiclass this is a micro-averaged metric.
- "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
- # this is a macro-averaged metric.
- "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
- # metric.
- },
- "positiveLabel": "A String", # Label representing the positive class.
- "binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
- { # Confusion matrix for binary classification models.
- "truePositives": "A String", # Number of true samples predicted as true.
- "recall": 3.14, # The fraction of actual positive labels that were given a positive
- # prediction.
- "precision": 3.14, # The fraction of actual positive predictions that had positive actual
- # labels.
- "falseNegatives": "A String", # Number of false samples predicted as false.
- "trueNegatives": "A String", # Number of true samples predicted as false.
- "falsePositives": "A String", # Number of false samples predicted as true.
- "f1Score": 3.14, # The equally weighted average of recall and precision.
- "positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
- "accuracy": 3.14, # The fraction of predictions given the correct label.
- },
- ],
- },
- "multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
- "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
- # models, the metrics are either macro-averaged or micro-averaged. When
- # macro-averaged, the metrics are calculated for each label and then an
- # unweighted average is taken of those values. When micro-averaged, the
- # metric is calculated globally by counting the total number of correctly
- # predicted rows.
- "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
- # positive prediction. For multiclass this is a macro-averaged metric.
- "precision": 3.14, # Precision is the fraction of actual positive predictions that had
- # positive actual labels. For multiclass this is a macro-averaged
- # metric treating each class as a binary classifier.
- "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
- "threshold": 3.14, # Threshold at which the metrics are computed. For binary
- # classification models this is the positive class threshold.
- # For multi-class classfication models this is the confidence
- # threshold.
- "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
- # multiclass this is a micro-averaged metric.
- "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
- # this is a macro-averaged metric.
- "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
- # metric.
- },
- "confusionMatrixList": [ # Confusion matrix at different thresholds.
- { # Confusion matrix for multi-class classification models.
- "confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
- # confusion matrix.
- "rows": [ # One row per actual label.
- { # A single row in the confusion matrix.
- "actualLabel": "A String", # The original label of this row.
- "entries": [ # Info describing predicted label distribution.
- { # A single entry in the confusion matrix.
- "predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
- # also add an entry indicating the number of items under the
- # confidence threshold.
- "itemCount": "A String", # Number of items being predicted as this label.
- },
- ],
- },
- ],
+ "centroidId": "A String", # Centroid id.
},
],
},
},
- "dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is
- # actually split.
- # data tables that were used to train the model.
- "trainingTable": { # Table reference of the training data after split.
- "projectId": "A String", # [Required] The ID of the project containing this table.
- "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- "datasetId": "A String", # [Required] The ID of the dataset containing this table.
- },
- "evaluationTable": { # Table reference of the evaluation data after split.
- "projectId": "A String", # [Required] The ID of the project containing this table.
- "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- "datasetId": "A String", # [Required] The ID of the dataset containing this table.
- },
- },
- "results": [ # Output of each iteration run, results.size() <= max_iterations.
- { # Information about a single iteration of the training run.
- "arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
- # refactoring if we want to use model-specific iteration results.
- "arimaModelInfo": [ # This message is repeated because there are multiple arima models
- # fitted in auto-arima. For non-auto-arima model, its size is one.
- { # Arima model information.
- "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported
- # for one time series.
- "A String",
- ],
- "hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false
- # when d is not 1.
- "arimaCoefficients": { # Arima coefficients. # Arima coefficients.
- "movingAverageCoefficients": [ # Moving-average coefficients, an array of double.
- 3.14,
- ],
- "autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.
- 3.14,
- ],
- "interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.
- },
- "nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.
- "q": "A String", # Order of the moving-average part.
- "p": "A String", # Order of the autoregressive part.
- "d": "A String", # Order of the differencing part.
- },
- "arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.
- "variance": 3.14, # Variance.
- "logLikelihood": 3.14, # Log-likelihood.
- "aic": 3.14, # AIC.
- },
- "timeSeriesId": "A String", # The id to indicate different time series.
- },
- ],
- "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for
- # one time series.
- "A String",
- ],
- },
- "index": 42, # Index of the iteration, 0 based.
- "evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
- "durationMs": "A String", # Time taken to run the iteration in milliseconds.
- "learnRate": 3.14, # Learn rate used for this iteration.
- "trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
- "clusterInfos": [ # Information about top clusters for clustering models.
- { # Information about a single cluster for clustering model.
- "centroidId": "A String", # Centroid id.
- "clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
- "clusterRadius": 3.14, # Cluster radius, the average distance from centroid
- # to each point assigned to the cluster.
- },
- ],
- },
- ],
- "startTime": "A String", # The start time of this training run.
- "trainingOptions": { # Options that were used for this training run, includes
+ "trainingOptions": { # Options that were used for this training run, includes
# user specified and default options that were used.
- "optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
- "itemColumn": "A String", # Item column specified for matrix factorization models.
- "feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix
- # factorization.
- "numFactors": "A String", # Num factors specified for matrix factorization models.
- "inputLabelColumns": [ # Name of input label columns in training data.
- "A String",
- ],
- "batchSize": "A String", # Batch size for dnn models.
- "distanceType": "A String", # Distance type for clustering models.
- "kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm
- # when kmeans_initialization_method is CUSTOM.
- "l2Regularization": 3.14, # L2 regularization coefficient.
- "dropout": 3.14, # Dropout probability for dnn models.
- "minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
- # less than 'min_relative_progress'. Used only for iterative training
- # algorithms.
- "l1Regularization": 3.14, # L1 regularization coefficient.
- "maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
- # training algorithms.
- "earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
+ "dropout": 3.14, # Dropout probability for dnn models.
+ "learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
+ "labelClassWeights": { # Weights associated with each label class, for rebalancing the
+ # training data. Only applicable for classification models.
+ "a_key": 3.14,
+ },
+ "subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent
+ # overfitting for boosted tree models.
+ "earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
# any more (compared to min_relative_progress). Used only for iterative
# training algorithms.
- "initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
+ "dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
+ # of data will be used as training data. The format should be double.
+ # Accurate to two decimal places.
+ # Default value is 0.2.
+ "initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
# strategy.
- "dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
+ "itemColumn": "A String", # Item column specified for matrix factorization models.
+ "inputLabelColumns": [ # Name of input label columns in training data.
+ "A String",
+ ],
+ "warmStart": True or False, # Whether to train a model from the last checkpoint.
+ "learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
+ "numFactors": "A String", # Num factors specified for matrix factorization models.
+ "lossType": "A String", # Type of loss function used during training run.
+ "hiddenUnits": [ # Hidden units for dnn models.
+ "A String",
+ ],
+ "kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.
+ "l1Regularization": 3.14, # L1 regularization coefficient.
+ "distanceType": "A String", # Distance type for clustering models.
+ "walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is
+ # specified.
+ "feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix
+ # factorization.
+ "optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
+ "dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
# feature.
# 1. When data_split_method is CUSTOM, the corresponding column should
# be boolean. The rows with true value tag are eval data, and the false
@@ -1168,488 +1602,54 @@
# as training data, and the rest are eval data. It respects the order
# in Orderable data types:
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
- "numClusters": "A String", # Number of clusters for clustering models.
- "warmStart": True or False, # Whether to train a model from the last checkpoint.
- "hiddenUnits": [ # Hidden units for dnn models.
- "A String",
- ],
- "maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.
- "userColumn": "A String", # User column specified for matrix factorization models.
- "kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.
- "learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
- "dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
- # of data will be used as training data. The format should be double.
- # Accurate to two decimal places.
- # Default value is 0.2.
- "dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
- "subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent
- # overfitting for boosted tree models.
- "labelClassWeights": { # Weights associated with each label class, for rebalancing the
- # training data. Only applicable for classification models.
- "a_key": 3.14,
- },
- "learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
- "modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
+ "maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
+ # training algorithms.
+ "userColumn": "A String", # User column specified for matrix factorization models.
+ "maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.
+ "l2Regularization": 3.14, # L2 regularization coefficient.
+ "modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
# applicable for imported models.
- "walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is
- # specified.
- "minSplitLoss": 3.14, # Minimum split loss for boosted tree models.
- "lossType": "A String", # Type of loss function used during training run.
+ "batchSize": "A String", # Batch size for dnn models.
+ "minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
+ # less than 'min_relative_progress'. Used only for iterative training
+ # algorithms.
+ "kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm
+ # when kmeans_initialization_method is CUSTOM.
+ "numClusters": "A String", # Number of clusters for clustering models.
+ "dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
+ "minSplitLoss": 3.14, # Minimum split loss for boosted tree models.
},
- },
- ],
- "featureColumns": [ # Output only. Input feature columns that were used to train this model.
- { # A field or a column.
- "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
- # specified (e.g., CREATE FUNCTION statement can omit the return type;
- # in this case the output parameter does not have this "type" field).
- # Examples:
- # INT64: {type_kind="INT64"}
- # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
- # STRUCT<x STRING, y ARRAY<DATE>>:
- # {type_kind="STRUCT",
- # struct_type={fields=[
- # {name="x", type={type_kind="STRING"}},
- # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
- # ]}}
- "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
- "fields": [
- # Object with schema name: StandardSqlField
- ],
+ "dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is
+ # actually split.
+ # data tables that were used to train the model.
+ "trainingTable": { # Table reference of the training data after split.
+ "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this table.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this table.
},
- "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
- "typeKind": "A String", # Required. The top level type of this field.
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
- },
- "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
- },
- ],
- "labelColumns": [ # Output only. Label columns that were used to train this model.
- # The output of the model will have a "predicted_" prefix to these columns.
- { # A field or a column.
- "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
- # specified (e.g., CREATE FUNCTION statement can omit the return type;
- # in this case the output parameter does not have this "type" field).
- # Examples:
- # INT64: {type_kind="INT64"}
- # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
- # STRUCT<x STRING, y ARRAY<DATE>>:
- # {type_kind="STRUCT",
- # struct_type={fields=[
- # {name="x", type={type_kind="STRING"}},
- # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
- # ]}}
- "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
- "fields": [
- # Object with schema name: StandardSqlField
- ],
+ "evaluationTable": { # Table reference of the evaluation data after split.
+ "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this table.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this table.
},
- "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
- "typeKind": "A String", # Required. The top level type of this field.
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
},
- "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
},
],
- "creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.
- "modelType": "A String", # Output only. Type of the model resource.
- "encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the
+ "modelReference": { # Required. Unique identifier for this model.
+ "datasetId": "A String", # [Required] The ID of the dataset containing this model.
+ "modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
+ "projectId": "A String", # [Required] The ID of the project containing this model.
+ },
+ "description": "A String", # Optional. A user-friendly description of this model.
+ "etag": "A String", # Output only. A hash of this resource.
+ "creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.
+ "encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the
# encryption configuration of the model data while stored in BigQuery
# storage. This field can be used with PatchModel to update encryption key
# for an already encrypted model.
- "kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.
+ "kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.
},
- "modelReference": { # Required. Unique identifier for this model.
- "projectId": "A String", # [Required] The ID of the project containing this model.
- "datasetId": "A String", # [Required] The ID of the dataset containing this model.
- "modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- },
- "etag": "A String", # Output only. A hash of this resource.
- "location": "A String", # Output only. The geographic location where the model resides. This value
- # is inherited from the dataset.
- "friendlyName": "A String", # Optional. A descriptive name for this model.
- "expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.
- # If not present, the model will persist indefinitely. Expired models
- # will be deleted and their storage reclaimed. The defaultTableExpirationMs
- # property of the encapsulating dataset can be used to set a default
- # expirationTime on newly created models.
- "lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.
- }
-
-
-Returns:
- An object of the form:
-
- {
- "labels": { # The labels associated with this model. You can use these to organize
- # and group your models. Label keys and values can be no longer
- # than 63 characters, can only contain lowercase letters, numeric
- # characters, underscores and dashes. International characters are allowed.
- # Label values are optional. Label keys must start with a letter and each
- # label in the list must have a different key.
- "a_key": "A String",
- },
- "description": "A String", # Optional. A user-friendly description of this model.
- "trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.
- { # Information about a single training query run for the model.
- "evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the
- # end of training.
- # data or just the eval data based on whether eval data was used during
- # training. These are not present for imported models.
- "clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.
- "meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.
- "daviesBouldinIndex": 3.14, # Davies-Bouldin index.
- "clusters": [ # [Beta] Information for all clusters.
- { # Message containing the information about one cluster.
- "count": "A String", # Count of training data rows that were assigned to this cluster.
- "featureValues": [ # Values of highly variant features for this cluster.
- { # Representative value of a single feature within the cluster.
- "featureColumn": "A String", # The feature column name.
- "numericalValue": 3.14, # The numerical feature value. This is the centroid value for this
- # feature.
- "categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.
- "categoryCounts": [ # Counts of all categories for the categorical feature. If there are
- # more than ten categories, we return top ten (by count) and return
- # one more CategoryCount with category "_OTHER_" and count as
- # aggregate counts of remaining categories.
- { # Represents the count of a single category within the cluster.
- "category": "A String", # The name of category.
- "count": "A String", # The count of training samples matching the category within the
- # cluster.
- },
- ],
- },
- },
- ],
- "centroidId": "A String", # Centroid id.
- },
- ],
- },
- "regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix
- # factorization models.
- # factorization models.
- "meanSquaredLogError": 3.14, # Mean squared log error.
- "meanAbsoluteError": 3.14, # Mean absolute error.
- "meanSquaredError": 3.14, # Mean squared error.
- "medianAbsoluteError": 3.14, # Median absolute error.
- "rSquared": 3.14, # R^2 score.
- },
- "rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization
- # models.
- # feedback_type=implicit.
- "meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit
- # recommendation models except instead of computing the rating directly,
- # the output from evaluate is computed against a preference which is 1 or 0
- # depending on if the rating exists or not.
- "meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and
- # then averages all the precisions across all the users.
- "averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank
- # from the predicted confidence and dividing it by the original rank.
- "normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the
- # predicted confidence by comparing it to an ideal rank measured by the
- # original ratings.
- },
- "binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.
- "negativeLabel": "A String", # Label representing the negative class.
- "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
- # models, the metrics are either macro-averaged or micro-averaged. When
- # macro-averaged, the metrics are calculated for each label and then an
- # unweighted average is taken of those values. When micro-averaged, the
- # metric is calculated globally by counting the total number of correctly
- # predicted rows.
- "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
- # positive prediction. For multiclass this is a macro-averaged metric.
- "precision": 3.14, # Precision is the fraction of actual positive predictions that had
- # positive actual labels. For multiclass this is a macro-averaged
- # metric treating each class as a binary classifier.
- "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
- "threshold": 3.14, # Threshold at which the metrics are computed. For binary
- # classification models this is the positive class threshold.
- # For multi-class classfication models this is the confidence
- # threshold.
- "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
- # multiclass this is a micro-averaged metric.
- "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
- # this is a macro-averaged metric.
- "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
- # metric.
- },
- "positiveLabel": "A String", # Label representing the positive class.
- "binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.
- { # Confusion matrix for binary classification models.
- "truePositives": "A String", # Number of true samples predicted as true.
- "recall": 3.14, # The fraction of actual positive labels that were given a positive
- # prediction.
- "precision": 3.14, # The fraction of actual positive predictions that had positive actual
- # labels.
- "falseNegatives": "A String", # Number of false samples predicted as false.
- "trueNegatives": "A String", # Number of true samples predicted as false.
- "falsePositives": "A String", # Number of false samples predicted as true.
- "f1Score": 3.14, # The equally weighted average of recall and precision.
- "positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.
- "accuracy": 3.14, # The fraction of predictions given the correct label.
- },
- ],
- },
- "multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.
- "aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.
- # models, the metrics are either macro-averaged or micro-averaged. When
- # macro-averaged, the metrics are calculated for each label and then an
- # unweighted average is taken of those values. When micro-averaged, the
- # metric is calculated globally by counting the total number of correctly
- # predicted rows.
- "recall": 3.14, # Recall is the fraction of actual positive labels that were given a
- # positive prediction. For multiclass this is a macro-averaged metric.
- "precision": 3.14, # Precision is the fraction of actual positive predictions that had
- # positive actual labels. For multiclass this is a macro-averaged
- # metric treating each class as a binary classifier.
- "logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.
- "threshold": 3.14, # Threshold at which the metrics are computed. For binary
- # classification models this is the positive class threshold.
- # For multi-class classfication models this is the confidence
- # threshold.
- "accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For
- # multiclass this is a micro-averaged metric.
- "f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass
- # this is a macro-averaged metric.
- "rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged
- # metric.
- },
- "confusionMatrixList": [ # Confusion matrix at different thresholds.
- { # Confusion matrix for multi-class classification models.
- "confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the
- # confusion matrix.
- "rows": [ # One row per actual label.
- { # A single row in the confusion matrix.
- "actualLabel": "A String", # The original label of this row.
- "entries": [ # Info describing predicted label distribution.
- { # A single entry in the confusion matrix.
- "predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will
- # also add an entry indicating the number of items under the
- # confidence threshold.
- "itemCount": "A String", # Number of items being predicted as this label.
- },
- ],
- },
- ],
- },
- ],
- },
- },
- "dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is
- # actually split.
- # data tables that were used to train the model.
- "trainingTable": { # Table reference of the training data after split.
- "projectId": "A String", # [Required] The ID of the project containing this table.
- "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- "datasetId": "A String", # [Required] The ID of the dataset containing this table.
- },
- "evaluationTable": { # Table reference of the evaluation data after split.
- "projectId": "A String", # [Required] The ID of the project containing this table.
- "tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- "datasetId": "A String", # [Required] The ID of the dataset containing this table.
- },
- },
- "results": [ # Output of each iteration run, results.size() <= max_iterations.
- { # Information about a single iteration of the training run.
- "arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
- # refactoring if we want to use model-specific iteration results.
- "arimaModelInfo": [ # This message is repeated because there are multiple arima models
- # fitted in auto-arima. For non-auto-arima model, its size is one.
- { # Arima model information.
- "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported
- # for one time series.
- "A String",
- ],
- "hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false
- # when d is not 1.
- "arimaCoefficients": { # Arima coefficients. # Arima coefficients.
- "movingAverageCoefficients": [ # Moving-average coefficients, an array of double.
- 3.14,
- ],
- "autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.
- 3.14,
- ],
- "interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.
- },
- "nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.
- "q": "A String", # Order of the moving-average part.
- "p": "A String", # Order of the autoregressive part.
- "d": "A String", # Order of the differencing part.
- },
- "arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.
- "variance": 3.14, # Variance.
- "logLikelihood": 3.14, # Log-likelihood.
- "aic": 3.14, # AIC.
- },
- "timeSeriesId": "A String", # The id to indicate different time series.
- },
- ],
- "seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for
- # one time series.
- "A String",
- ],
- },
- "index": 42, # Index of the iteration, 0 based.
- "evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.
- "durationMs": "A String", # Time taken to run the iteration in milliseconds.
- "learnRate": 3.14, # Learn rate used for this iteration.
- "trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.
- "clusterInfos": [ # Information about top clusters for clustering models.
- { # Information about a single cluster for clustering model.
- "centroidId": "A String", # Centroid id.
- "clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.
- "clusterRadius": 3.14, # Cluster radius, the average distance from centroid
- # to each point assigned to the cluster.
- },
- ],
- },
- ],
- "startTime": "A String", # The start time of this training run.
- "trainingOptions": { # Options that were used for this training run, includes
- # user specified and default options that were used.
- "optimizationStrategy": "A String", # Optimization strategy for training linear regression models.
- "itemColumn": "A String", # Item column specified for matrix factorization models.
- "feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix
- # factorization.
- "numFactors": "A String", # Num factors specified for matrix factorization models.
- "inputLabelColumns": [ # Name of input label columns in training data.
- "A String",
- ],
- "batchSize": "A String", # Batch size for dnn models.
- "distanceType": "A String", # Distance type for clustering models.
- "kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm
- # when kmeans_initialization_method is CUSTOM.
- "l2Regularization": 3.14, # L2 regularization coefficient.
- "dropout": 3.14, # Dropout probability for dnn models.
- "minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is
- # less than 'min_relative_progress'. Used only for iterative training
- # algorithms.
- "l1Regularization": 3.14, # L1 regularization coefficient.
- "maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative
- # training algorithms.
- "earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly
- # any more (compared to min_relative_progress). Used only for iterative
- # training algorithms.
- "initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate
- # strategy.
- "dataSplitColumn": "A String", # The column to split data with. This column won't be used as a
- # feature.
- # 1. When data_split_method is CUSTOM, the corresponding column should
- # be boolean. The rows with true value tag are eval data, and the false
- # are training data.
- # 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
- # rows (from smallest to largest) in the corresponding column are used
- # as training data, and the rest are eval data. It respects the order
- # in Orderable data types:
- # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
- "numClusters": "A String", # Number of clusters for clustering models.
- "warmStart": True or False, # Whether to train a model from the last checkpoint.
- "hiddenUnits": [ # Hidden units for dnn models.
- "A String",
- ],
- "maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.
- "userColumn": "A String", # User column specified for matrix factorization models.
- "kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.
- "learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.
- "dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest
- # of data will be used as training data. The format should be double.
- # Accurate to two decimal places.
- # Default value is 0.2.
- "dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.
- "subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent
- # overfitting for boosted tree models.
- "labelClassWeights": { # Weights associated with each label class, for rebalancing the
- # training data. Only applicable for classification models.
- "a_key": 3.14,
- },
- "learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.
- "modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only
- # applicable for imported models.
- "walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is
- # specified.
- "minSplitLoss": 3.14, # Minimum split loss for boosted tree models.
- "lossType": "A String", # Type of loss function used during training run.
- },
- },
- ],
- "featureColumns": [ # Output only. Input feature columns that were used to train this model.
- { # A field or a column.
- "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
- # specified (e.g., CREATE FUNCTION statement can omit the return type;
- # in this case the output parameter does not have this "type" field).
- # Examples:
- # INT64: {type_kind="INT64"}
- # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
- # STRUCT<x STRING, y ARRAY<DATE>>:
- # {type_kind="STRUCT",
- # struct_type={fields=[
- # {name="x", type={type_kind="STRING"}},
- # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
- # ]}}
- "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
- "fields": [
- # Object with schema name: StandardSqlField
- ],
- },
- "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
- "typeKind": "A String", # Required. The top level type of this field.
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
- },
- "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
- },
- ],
- "labelColumns": [ # Output only. Label columns that were used to train this model.
- # The output of the model will have a "predicted_" prefix to these columns.
- { # A field or a column.
- "type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly
- # specified (e.g., CREATE FUNCTION statement can omit the return type;
- # in this case the output parameter does not have this "type" field).
- # Examples:
- # INT64: {type_kind="INT64"}
- # ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}
- # STRUCT<x STRING, y ARRAY<DATE>>:
- # {type_kind="STRUCT",
- # struct_type={fields=[
- # {name="x", type={type_kind="STRING"}},
- # {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
- # ]}}
- "structType": { # The fields of this struct, in order, if type_kind = "STRUCT".
- "fields": [
- # Object with schema name: StandardSqlField
- ],
- },
- "arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".
- "typeKind": "A String", # Required. The top level type of this field.
- # Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
- },
- "name": "A String", # Optional. The name of this field. Can be absent for struct fields.
- },
- ],
- "creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.
- "modelType": "A String", # Output only. Type of the model resource.
- "encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the
- # encryption configuration of the model data while stored in BigQuery
- # storage. This field can be used with PatchModel to update encryption key
- # for an already encrypted model.
- "kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.
- },
- "modelReference": { # Required. Unique identifier for this model.
- "projectId": "A String", # [Required] The ID of the project containing this model.
- "datasetId": "A String", # [Required] The ID of the dataset containing this model.
- "modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.
- },
- "etag": "A String", # Output only. A hash of this resource.
- "location": "A String", # Output only. The geographic location where the model resides. This value
- # is inherited from the dataset.
- "friendlyName": "A String", # Optional. A descriptive name for this model.
- "expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.
- # If not present, the model will persist indefinitely. Expired models
- # will be deleted and their storage reclaimed. The defaultTableExpirationMs
- # property of the encapsulating dataset can be used to set a default
- # expirationTime on newly created models.
- "lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.
- }</pre>
+ }</pre>
</div>
</body></html>
\ No newline at end of file