Blame - docs/dyn/bigquery_v2.models.html - platform/external/python/google-api-python-client

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

129

# end of training.

130

# data or just the eval data based on whether eval data was used during

131

# training. These are not present for imported models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

132

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

133

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

134

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

135

"clusters": [ # [Beta] Information for all clusters.

136

{ # Message containing the information about one cluster.

137

"count": "A String", # Count of training data rows that were assigned to this cluster.

138

"featureValues": [ # Values of highly variant features for this cluster.

139

{ # Representative value of a single feature within the cluster.

140

"featureColumn": "A String", # The feature column name.

141

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

142

# feature.

143

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

144

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

145

# more than ten categories, we return top ten (by count) and return

146

# one more CategoryCount with category "_OTHER_" and count as

147

# aggregate counts of remaining categories.

148

{ # Represents the count of a single category within the cluster.

149

"category": "A String", # The name of category.

150

"count": "A String", # The count of training samples matching the category within the

# cluster.

},

],

},

},

],

"centroidId": "A String", # Centroid id.

158

},

159

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

160

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

161

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

162

# factorization models.

163

# factorization models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

164

"meanSquaredLogError": 3.14, # Mean squared log error.

165

"meanAbsoluteError": 3.14, # Mean absolute error.

166

"meanSquaredError": 3.14, # Mean squared error.

167

"medianAbsoluteError": 3.14, # Median absolute error.

168

"rSquared": 3.14, # R^2 score.

169

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

170

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

171

# models.

172

# feedback_type=implicit.

173

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

174

# recommendation models except instead of computing the rating directly,

175

# the output from evaluate is computed against a preference which is 1 or 0

176

# depending on if the rating exists or not.

177

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

178

# then averages all the precisions across all the users.

179

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

180

# from the predicted confidence and dividing it by the original rank.

181

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

182

# predicted confidence by comparing it to an ideal rank measured by the

183

# original ratings.

184

},

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

185

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

186

"negativeLabel": "A String", # Label representing the negative class.

187

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

188

# models, the metrics are either macro-averaged or micro-averaged. When

189

# macro-averaged, the metrics are calculated for each label and then an

190

# unweighted average is taken of those values. When micro-averaged, the

191

# metric is calculated globally by counting the total number of correctly

192

# predicted rows.

193

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

194

# positive prediction. For multiclass this is a macro-averaged metric.

195

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

196

# positive actual labels. For multiclass this is a macro-averaged

197

# metric treating each class as a binary classifier.

198

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

199

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

200

# classification models this is the positive class threshold.

201

# For multi-class classfication models this is the confidence

202

# threshold.

203

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

204

# multiclass this is a micro-averaged metric.

205

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

206

# this is a macro-averaged metric.

207

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

208

# metric.

209

},

210

"positiveLabel": "A String", # Label representing the positive class.

211

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

212

{ # Confusion matrix for binary classification models.

213

"truePositives": "A String", # Number of true samples predicted as true.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

214

"recall": 3.14, # The fraction of actual positive labels that were given a positive

215

# prediction.

216

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

217

# labels.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

218

"falseNegatives": "A String", # Number of false samples predicted as false.

219

"trueNegatives": "A String", # Number of true samples predicted as false.

220

"falsePositives": "A String", # Number of false samples predicted as true.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

221

"f1Score": 3.14, # The equally weighted average of recall and precision.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

222

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

223

"accuracy": 3.14, # The fraction of predictions given the correct label.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

228

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

229

# models, the metrics are either macro-averaged or micro-averaged. When

230

# macro-averaged, the metrics are calculated for each label and then an

231

# unweighted average is taken of those values. When micro-averaged, the

232

# metric is calculated globally by counting the total number of correctly

233

# predicted rows.

234

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

235

# positive prediction. For multiclass this is a macro-averaged metric.

236

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

237

# positive actual labels. For multiclass this is a macro-averaged

238

# metric treating each class as a binary classifier.

239

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

240

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

241

# classification models this is the positive class threshold.

242

# For multi-class classfication models this is the confidence

243

# threshold.

244

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

245

# multiclass this is a micro-averaged metric.

246

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

247

# this is a macro-averaged metric.

248

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

249

# metric.

250

},

251

"confusionMatrixList": [ # Confusion matrix at different thresholds.

252

{ # Confusion matrix for multi-class classification models.

253

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

254

# confusion matrix.

255

"rows": [ # One row per actual label.

256

{ # A single row in the confusion matrix.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

257

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

258

"entries": [ # Info describing predicted label distribution.

259

{ # A single entry in the confusion matrix.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

260

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

261

# also add an entry indicating the number of items under the

262

# confidence threshold.

263

"itemCount": "A String", # Number of items being predicted as this label.

264

},

265

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

],

},

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

272

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

273

# actually split.

274

# data tables that were used to train the model.

275

"trainingTable": { # Table reference of the training data after split.

276

"projectId": "A String", # [Required] The ID of the project containing this table.

277

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

278

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

279

},

280

"evaluationTable": { # Table reference of the evaluation data after split.

281

"projectId": "A String", # [Required] The ID of the project containing this table.

282

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

283

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

284

},

285

},

286

"results": [ # Output of each iteration run, results.size() <= max_iterations.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

287

{ # Information about a single iteration of the training run.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

288

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

289

# refactoring if we want to use model-specific iteration results.

290

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

291

# fitted in auto-arima. For non-auto-arima model, its size is one.

292

{ # Arima model information.

293

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

294

# for one time series.

295

"A String",

296

],

297

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

298

# when d is not 1.

299

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

300

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

301

3.14,

302

],

303

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

304

3.14,

305

],

306

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

307

},

308

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

309

"q": "A String", # Order of the moving-average part.

310

"p": "A String", # Order of the autoregressive part.

311

"d": "A String", # Order of the differencing part.

312

},

313

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

314

"variance": 3.14, # Variance.

315

"logLikelihood": 3.14, # Log-likelihood.

316

"aic": 3.14, # AIC.

317

},

318

"timeSeriesId": "A String", # The id to indicate different time series.

319

},

320

],

321

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

# one time series.

"A String",

],

},

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

326

"index": 42, # Index of the iteration, 0 based.

327

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

328

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

329

"learnRate": 3.14, # Learn rate used for this iteration.

330

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

331

"clusterInfos": [ # Information about top clusters for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

332

{ # Information about a single cluster for clustering model.

333

"centroidId": "A String", # Centroid id.

334

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

335

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

336

# to each point assigned to the cluster.

},

],

},

],

"startTime": "A String", # The start time of this training run.

342

"trainingOptions": { # Options that were used for this training run, includes

343

# user specified and default options that were used.

344

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

345

"itemColumn": "A String", # Item column specified for matrix factorization models.

346

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

347

# factorization.

348

"numFactors": "A String", # Num factors specified for matrix factorization models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

349

"inputLabelColumns": [ # Name of input label columns in training data.

350

"A String",

351

],

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

352

"batchSize": "A String", # Batch size for dnn models.

353

"distanceType": "A String", # Distance type for clustering models.

354

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

355

# when kmeans_initialization_method is CUSTOM.

356

"l2Regularization": 3.14, # L2 regularization coefficient.

357

"dropout": 3.14, # Dropout probability for dnn models.

358

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

359

# less than 'min_relative_progress'. Used only for iterative training

360

# algorithms.

361

"l1Regularization": 3.14, # L1 regularization coefficient.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

362

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

363

# training algorithms.

364

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

365

# any more (compared to min_relative_progress). Used only for iterative

366

# training algorithms.

367

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

368

# strategy.

369

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

370

# feature.

371

# 1. When data_split_method is CUSTOM, the corresponding column should

372

# be boolean. The rows with true value tag are eval data, and the false

373

# are training data.

374

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

375

# rows (from smallest to largest) in the corresponding column are used

376

# as training data, and the rest are eval data. It respects the order

377

# in Orderable data types:

378

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

379

"numClusters": "A String", # Number of clusters for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

380

"warmStart": True or False, # Whether to train a model from the last checkpoint.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

381

"hiddenUnits": [ # Hidden units for dnn models.

382

"A String",

383

],

384

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

385

"userColumn": "A String", # User column specified for matrix factorization models.

386

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

387

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

388

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

389

# of data will be used as training data. The format should be double.

390

# Accurate to two decimal places.

391

# Default value is 0.2.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

392

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

393

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

394

# overfitting for boosted tree models.

395

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

396

# training data. Only applicable for classification models.

397

"a_key": 3.14,

398

},

399

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

400

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

401

# applicable for imported models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

402

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

403

# specified.

404

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

405

"lossType": "A String", # Type of loss function used during training run.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

},

],

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

410

{ # A field or a column.

411

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

412

# specified (e.g., CREATE FUNCTION statement can omit the return type;

413

# in this case the output parameter does not have this "type" field).

414

# Examples:

415

# INT64: {type_kind="INT64"}

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

416

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

417

# STRUCT<x STRING, y ARRAY<DATE>>:

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

418

# {type_kind="STRUCT",

419

# struct_type={fields=[

420

# {name="x", type={type_kind="STRING"}},

421

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

422

# ]}}

423

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

424

"fields": [

425

# Object with schema name: StandardSqlField

426

],

427

},

428

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

429

"typeKind": "A String", # Required. The top level type of this field.

430

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

431

},

432

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

433

},

434

],

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

435

"labelColumns": [ # Output only. Label columns that were used to train this model.

436

# The output of the model will have a "predicted_" prefix to these columns.

437

{ # A field or a column.

438

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

439

# specified (e.g., CREATE FUNCTION statement can omit the return type;

440

# in this case the output parameter does not have this "type" field).

441

# Examples:

442

# INT64: {type_kind="INT64"}

443

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

444

# STRUCT<x STRING, y ARRAY<DATE>>:

445

# {type_kind="STRUCT",

446

# struct_type={fields=[

447

# {name="x", type={type_kind="STRING"}},

448

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

449

# ]}}

450

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

451

"fields": [

452

# Object with schema name: StandardSqlField

453

],

454

},

455

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

456

"typeKind": "A String", # Required. The top level type of this field.

457

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

458

},

459

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

460

},

461

],

462

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

463

"modelType": "A String", # Output only. Type of the model resource.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

464

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

465

# encryption configuration of the model data while stored in BigQuery

466

# storage. This field can be used with PatchModel to update encryption key

467

# for an already encrypted model.

468

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

469

},

470

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

471

"projectId": "A String", # [Required] The ID of the project containing this model.

472

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

473

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

474

},

475

"etag": "A String", # Output only. A hash of this resource.

476

"location": "A String", # Output only. The geographic location where the model resides. This value

477

# is inherited from the dataset.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

478

"friendlyName": "A String", # Optional. A descriptive name for this model.

479

"expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.

480

# If not present, the model will persist indefinitely. Expired models

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

481

# will be deleted and their storage reclaimed. The defaultTableExpirationMs

482

# property of the encapsulating dataset can be used to set a default

483

# expirationTime on newly created models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

484

"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

}</pre>

</div>

<code class="details" id="list">list(projectId, datasetId, pageToken=None, maxResults=None)</code>

490

<pre>Lists all models in the specified dataset. Requires the READER dataset

491

role.

492

493

Args:

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

494

projectId: string, Required. Project ID of the models to list. (required)

495

datasetId: string, Required. Dataset ID of the models to list. (required)

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

496

pageToken: string, Page token, returned by a previous call to request the next page of

497

results

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

498

maxResults: integer, The maximum number of results to return in a single response page.

499

Leverage the page tokens to iterate through the entire collection.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

500

501

Returns:

502

An object of the form:

503

504

{

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

505

"nextPageToken": "A String", # A token to request the next page of results.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

506

"models": [ # Models in the requested dataset. Only the following fields are populated:

507

# model_reference, model_type, creation_time, last_modified_time and

508

# labels.

509

{

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

510

"labels": { # The labels associated with this model. You can use these to organize

511

# and group your models. Label keys and values can be no longer

512

# than 63 characters, can only contain lowercase letters, numeric

513

# characters, underscores and dashes. International characters are allowed.

514

# Label values are optional. Label keys must start with a letter and each

515

# label in the list must have a different key.

516

"a_key": "A String",

517

},

518

"description": "A String", # Optional. A user-friendly description of this model.

519

"trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

520

{ # Information about a single training query run for the model.

521

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

522

# end of training.

523

# data or just the eval data based on whether eval data was used during

524

# training. These are not present for imported models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

525

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

526

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

527

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

528

"clusters": [ # [Beta] Information for all clusters.

529

{ # Message containing the information about one cluster.

530

"count": "A String", # Count of training data rows that were assigned to this cluster.

531

"featureValues": [ # Values of highly variant features for this cluster.

532

{ # Representative value of a single feature within the cluster.

533

"featureColumn": "A String", # The feature column name.

534

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

535

# feature.

536

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

537

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

538

# more than ten categories, we return top ten (by count) and return

539

# one more CategoryCount with category "_OTHER_" and count as

540

# aggregate counts of remaining categories.

541

{ # Represents the count of a single category within the cluster.

542

"category": "A String", # The name of category.

543

"count": "A String", # The count of training samples matching the category within the

# cluster.

},

],

},

},

],

"centroidId": "A String", # Centroid id.

551

},

552

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

553

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

554

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

555

# factorization models.

556

# factorization models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

557

"meanSquaredLogError": 3.14, # Mean squared log error.

558

"meanAbsoluteError": 3.14, # Mean absolute error.

559

"meanSquaredError": 3.14, # Mean squared error.

560

"medianAbsoluteError": 3.14, # Median absolute error.

561

"rSquared": 3.14, # R^2 score.

562

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

563

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

564

# models.

565

# feedback_type=implicit.

566

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

567

# recommendation models except instead of computing the rating directly,

568

# the output from evaluate is computed against a preference which is 1 or 0

569

# depending on if the rating exists or not.

570

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

571

# then averages all the precisions across all the users.

572

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

573

# from the predicted confidence and dividing it by the original rank.

574

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

575

# predicted confidence by comparing it to an ideal rank measured by the

576

# original ratings.

577

},

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

578

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

579

"negativeLabel": "A String", # Label representing the negative class.

580

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

581

# models, the metrics are either macro-averaged or micro-averaged. When

582

# macro-averaged, the metrics are calculated for each label and then an

583

# unweighted average is taken of those values. When micro-averaged, the

584

# metric is calculated globally by counting the total number of correctly

585

# predicted rows.

586

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

587

# positive prediction. For multiclass this is a macro-averaged metric.

588

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

589

# positive actual labels. For multiclass this is a macro-averaged

590

# metric treating each class as a binary classifier.

591

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

592

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

593

# classification models this is the positive class threshold.

594

# For multi-class classfication models this is the confidence

595

# threshold.

596

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

597

# multiclass this is a micro-averaged metric.

598

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

599

# this is a macro-averaged metric.

600

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

601

# metric.

602

},

603

"positiveLabel": "A String", # Label representing the positive class.

604

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

605

{ # Confusion matrix for binary classification models.

606

"truePositives": "A String", # Number of true samples predicted as true.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

607

"recall": 3.14, # The fraction of actual positive labels that were given a positive

608

# prediction.

609

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

610

# labels.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

611

"falseNegatives": "A String", # Number of false samples predicted as false.

612

"trueNegatives": "A String", # Number of true samples predicted as false.

613

"falsePositives": "A String", # Number of false samples predicted as true.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

614

"f1Score": 3.14, # The equally weighted average of recall and precision.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

615

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

616

"accuracy": 3.14, # The fraction of predictions given the correct label.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

621

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

622

# models, the metrics are either macro-averaged or micro-averaged. When

623

# macro-averaged, the metrics are calculated for each label and then an

624

# unweighted average is taken of those values. When micro-averaged, the

625

# metric is calculated globally by counting the total number of correctly

626

# predicted rows.

627

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

628

# positive prediction. For multiclass this is a macro-averaged metric.

629

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

630

# positive actual labels. For multiclass this is a macro-averaged

631

# metric treating each class as a binary classifier.

632

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

633

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

634

# classification models this is the positive class threshold.

635

# For multi-class classfication models this is the confidence

636

# threshold.

637

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

638

# multiclass this is a micro-averaged metric.

639

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

640

# this is a macro-averaged metric.

641

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

642

# metric.

643

},

644

"confusionMatrixList": [ # Confusion matrix at different thresholds.

645

{ # Confusion matrix for multi-class classification models.

646

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

647

# confusion matrix.

648

"rows": [ # One row per actual label.

649

{ # A single row in the confusion matrix.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

650

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

651

"entries": [ # Info describing predicted label distribution.

652

{ # A single entry in the confusion matrix.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

653

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

654

# also add an entry indicating the number of items under the

655

# confidence threshold.

656

"itemCount": "A String", # Number of items being predicted as this label.

657

},

658

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

],

},

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

665

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

666

# actually split.

667

# data tables that were used to train the model.

668

"trainingTable": { # Table reference of the training data after split.

669

"projectId": "A String", # [Required] The ID of the project containing this table.

670

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

671

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

672

},

673

"evaluationTable": { # Table reference of the evaluation data after split.

674

"projectId": "A String", # [Required] The ID of the project containing this table.

675

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

676

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

677

},

678

},

679

"results": [ # Output of each iteration run, results.size() <= max_iterations.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

680

{ # Information about a single iteration of the training run.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

681

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

682

# refactoring if we want to use model-specific iteration results.

683

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

684

# fitted in auto-arima. For non-auto-arima model, its size is one.

685

{ # Arima model information.

686

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

687

# for one time series.

688

"A String",

689

],

690

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

691

# when d is not 1.

692

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

693

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

694

3.14,

695

],

696

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

697

3.14,

698

],

699

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

700

},

701

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

702

"q": "A String", # Order of the moving-average part.

703

"p": "A String", # Order of the autoregressive part.

704

"d": "A String", # Order of the differencing part.

705

},

706

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

707

"variance": 3.14, # Variance.

708

"logLikelihood": 3.14, # Log-likelihood.

709

"aic": 3.14, # AIC.

710

},

711

"timeSeriesId": "A String", # The id to indicate different time series.

712

},

713

],

714

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

# one time series.

"A String",

],

},

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

719

"index": 42, # Index of the iteration, 0 based.

720

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

721

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

722

"learnRate": 3.14, # Learn rate used for this iteration.

723

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

724

"clusterInfos": [ # Information about top clusters for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

725

{ # Information about a single cluster for clustering model.

726

"centroidId": "A String", # Centroid id.

727

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

728

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

729

# to each point assigned to the cluster.

},

],

},

],

"startTime": "A String", # The start time of this training run.

735

"trainingOptions": { # Options that were used for this training run, includes

736

# user specified and default options that were used.

737

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

738

"itemColumn": "A String", # Item column specified for matrix factorization models.

739

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

740

# factorization.

741

"numFactors": "A String", # Num factors specified for matrix factorization models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

742

"inputLabelColumns": [ # Name of input label columns in training data.

743

"A String",

744

],

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

745

"batchSize": "A String", # Batch size for dnn models.

746

"distanceType": "A String", # Distance type for clustering models.

747

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

748

# when kmeans_initialization_method is CUSTOM.

749

"l2Regularization": 3.14, # L2 regularization coefficient.

750

"dropout": 3.14, # Dropout probability for dnn models.

751

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

752

# less than 'min_relative_progress'. Used only for iterative training

753

# algorithms.

754

"l1Regularization": 3.14, # L1 regularization coefficient.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

755

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

756

# training algorithms.

757

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

758

# any more (compared to min_relative_progress). Used only for iterative

759

# training algorithms.

760

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

761

# strategy.

762

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

763

# feature.

764

# 1. When data_split_method is CUSTOM, the corresponding column should

765

# be boolean. The rows with true value tag are eval data, and the false

766

# are training data.

767

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

768

# rows (from smallest to largest) in the corresponding column are used

769

# as training data, and the rest are eval data. It respects the order

770

# in Orderable data types:

771

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

772

"numClusters": "A String", # Number of clusters for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

773

"warmStart": True or False, # Whether to train a model from the last checkpoint.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

774

"hiddenUnits": [ # Hidden units for dnn models.

775

"A String",

776

],

777

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

778

"userColumn": "A String", # User column specified for matrix factorization models.

779

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

780

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

781

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

782

# of data will be used as training data. The format should be double.

783

# Accurate to two decimal places.

784

# Default value is 0.2.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

785

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

786

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

787

# overfitting for boosted tree models.

788

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

789

# training data. Only applicable for classification models.

790

"a_key": 3.14,

791

},

792

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

793

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

794

# applicable for imported models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

795

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

796

# specified.

797

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

798

"lossType": "A String", # Type of loss function used during training run.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

},

],

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

803

{ # A field or a column.

804

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

805

# specified (e.g., CREATE FUNCTION statement can omit the return type;

806

# in this case the output parameter does not have this "type" field).

807

# Examples:

808

# INT64: {type_kind="INT64"}

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

809

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

810

# STRUCT<x STRING, y ARRAY<DATE>>:

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

811

# {type_kind="STRUCT",

812

# struct_type={fields=[

813

# {name="x", type={type_kind="STRING"}},

814

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

815

# ]}}

816

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

817

"fields": [

818

# Object with schema name: StandardSqlField

819

],

820

},

821

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

822

"typeKind": "A String", # Required. The top level type of this field.

823

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

824

},

825

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

826

},

827

],

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

828

"labelColumns": [ # Output only. Label columns that were used to train this model.

829

# The output of the model will have a "predicted_" prefix to these columns.

830

{ # A field or a column.

831

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

832

# specified (e.g., CREATE FUNCTION statement can omit the return type;

833

# in this case the output parameter does not have this "type" field).

834

# Examples:

835

# INT64: {type_kind="INT64"}

836

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

837

# STRUCT<x STRING, y ARRAY<DATE>>:

838

# {type_kind="STRUCT",

839

# struct_type={fields=[

840

# {name="x", type={type_kind="STRING"}},

841

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

842

# ]}}

843

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

844

"fields": [

845

# Object with schema name: StandardSqlField

846

],

847

},

848

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

849

"typeKind": "A String", # Required. The top level type of this field.

850

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

851

},

852

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

853

},

854

],

855

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

856

"modelType": "A String", # Output only. Type of the model resource.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

857

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

858

# encryption configuration of the model data while stored in BigQuery

859

# storage. This field can be used with PatchModel to update encryption key

860

# for an already encrypted model.

861

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

862

},

863

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

864

"projectId": "A String", # [Required] The ID of the project containing this model.

865

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

866

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

867

},

868

"etag": "A String", # Output only. A hash of this resource.

869

"location": "A String", # Output only. The geographic location where the model resides. This value

870

# is inherited from the dataset.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

871

"friendlyName": "A String", # Optional. A descriptive name for this model.

872

"expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.

873

# If not present, the model will persist indefinitely. Expired models

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

874

# will be deleted and their storage reclaimed. The defaultTableExpirationMs

875

# property of the encapsulating dataset can be used to set a default

876

# expirationTime on newly created models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

877

"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

878

},

879

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

}</pre>

</div>

<code class="details" id="list_next">list_next(previous_request, previous_response)</code>

885

<pre>Retrieves the next page of results.

886

887

Args:

888

previous_request: The request for the previous page. (required)

889

previous_response: The response from the request for the previous page. (required)

890

891

Returns:

892

A request object that you can call 'execute()' on to request the next

893

page. Returns None if there are no more items in the collection.

</pre>

</div>

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

898

<code class="details" id="patch">patch(projectId, datasetId, modelId, body=None)</code>

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

899

<pre>Patch specific fields in the specified model.

900

901

Args:

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

902

projectId: string, Required. Project ID of the model to patch. (required)

903

datasetId: string, Required. Dataset ID of the model to patch. (required)

904

modelId: string, Required. Model ID of the model to patch. (required)

905

body: object, The request body.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

906

The object takes the form of:

907

908

{

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

909

"labels": { # The labels associated with this model. You can use these to organize

910

# and group your models. Label keys and values can be no longer

911

# than 63 characters, can only contain lowercase letters, numeric

912

# characters, underscores and dashes. International characters are allowed.

913

# Label values are optional. Label keys must start with a letter and each

914

# label in the list must have a different key.

915

"a_key": "A String",

916

},

917

"description": "A String", # Optional. A user-friendly description of this model.

918

"trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

919

{ # Information about a single training query run for the model.

920

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

921

# end of training.

922

# data or just the eval data based on whether eval data was used during

923

# training. These are not present for imported models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

924

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

925

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

926

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

927

"clusters": [ # [Beta] Information for all clusters.

928

{ # Message containing the information about one cluster.

929

"count": "A String", # Count of training data rows that were assigned to this cluster.

930

"featureValues": [ # Values of highly variant features for this cluster.

931

{ # Representative value of a single feature within the cluster.

932

"featureColumn": "A String", # The feature column name.

933

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

934

# feature.

935

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

936

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

937

# more than ten categories, we return top ten (by count) and return

938

# one more CategoryCount with category "_OTHER_" and count as

939

# aggregate counts of remaining categories.

940

{ # Represents the count of a single category within the cluster.

941

"category": "A String", # The name of category.

942

"count": "A String", # The count of training samples matching the category within the

# cluster.

},

],

},

},

],

"centroidId": "A String", # Centroid id.

950

},

951

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

952

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

953

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

954

# factorization models.

955

# factorization models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

956

"meanSquaredLogError": 3.14, # Mean squared log error.

957

"meanAbsoluteError": 3.14, # Mean absolute error.

958

"meanSquaredError": 3.14, # Mean squared error.

959

"medianAbsoluteError": 3.14, # Median absolute error.

960

"rSquared": 3.14, # R^2 score.

961

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

962

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

963

# models.

964

# feedback_type=implicit.

965

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

966

# recommendation models except instead of computing the rating directly,

967

# the output from evaluate is computed against a preference which is 1 or 0

968

# depending on if the rating exists or not.

969

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

970

# then averages all the precisions across all the users.

971

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

972

# from the predicted confidence and dividing it by the original rank.

973

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

974

# predicted confidence by comparing it to an ideal rank measured by the

975

# original ratings.

976

},

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

977

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

978

"negativeLabel": "A String", # Label representing the negative class.

979

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

980

# models, the metrics are either macro-averaged or micro-averaged. When

981

# macro-averaged, the metrics are calculated for each label and then an

982

# unweighted average is taken of those values. When micro-averaged, the

983

# metric is calculated globally by counting the total number of correctly

984

# predicted rows.

985

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

986

# positive prediction. For multiclass this is a macro-averaged metric.

987

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

988

# positive actual labels. For multiclass this is a macro-averaged

989

# metric treating each class as a binary classifier.

990

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

991

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

992

# classification models this is the positive class threshold.

993

# For multi-class classfication models this is the confidence

994

# threshold.

995

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

996

# multiclass this is a micro-averaged metric.

997

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

998

# this is a macro-averaged metric.

999

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1000

# metric.

1001

},

1002

"positiveLabel": "A String", # Label representing the positive class.

1003

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

1004

{ # Confusion matrix for binary classification models.

1005

"truePositives": "A String", # Number of true samples predicted as true.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1006

"recall": 3.14, # The fraction of actual positive labels that were given a positive

1007

# prediction.

1008

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

1009

# labels.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1010

"falseNegatives": "A String", # Number of false samples predicted as false.

1011

"trueNegatives": "A String", # Number of true samples predicted as false.

1012

"falsePositives": "A String", # Number of false samples predicted as true.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1013

"f1Score": 3.14, # The equally weighted average of recall and precision.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1014

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1015

"accuracy": 3.14, # The fraction of predictions given the correct label.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

1020

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1021

# models, the metrics are either macro-averaged or micro-averaged. When

1022

# macro-averaged, the metrics are calculated for each label and then an

1023

# unweighted average is taken of those values. When micro-averaged, the

1024

# metric is calculated globally by counting the total number of correctly

1025

# predicted rows.

1026

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1027

# positive prediction. For multiclass this is a macro-averaged metric.

1028

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1029

# positive actual labels. For multiclass this is a macro-averaged

1030

# metric treating each class as a binary classifier.

1031

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1032

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1033

# classification models this is the positive class threshold.

1034

# For multi-class classfication models this is the confidence

1035

# threshold.

1036

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1037

# multiclass this is a micro-averaged metric.

1038

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1039

# this is a macro-averaged metric.

1040

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1041

# metric.

1042

},

1043

"confusionMatrixList": [ # Confusion matrix at different thresholds.

1044

{ # Confusion matrix for multi-class classification models.

1045

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

1046

# confusion matrix.

1047

"rows": [ # One row per actual label.

1048

{ # A single row in the confusion matrix.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1049

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1050

"entries": [ # Info describing predicted label distribution.

1051

{ # A single entry in the confusion matrix.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1052

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1053

# also add an entry indicating the number of items under the

1054

# confidence threshold.

1055

"itemCount": "A String", # Number of items being predicted as this label.

1056

},

1057

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

],

},

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1064

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

1065

# actually split.

1066

# data tables that were used to train the model.

1067

"trainingTable": { # Table reference of the training data after split.

1068

"projectId": "A String", # [Required] The ID of the project containing this table.

1069

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

1070

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

1071

},

1072

"evaluationTable": { # Table reference of the evaluation data after split.

1073

"projectId": "A String", # [Required] The ID of the project containing this table.

1074

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

1075

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

1076

},

1077

},

1078

"results": [ # Output of each iteration run, results.size() <= max_iterations.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1079

{ # Information about a single iteration of the training run.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1080

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

1081

# refactoring if we want to use model-specific iteration results.

1082

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

1083

# fitted in auto-arima. For non-auto-arima model, its size is one.

1084

{ # Arima model information.

1085

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

1086

# for one time series.

1087

"A String",

1088

],

1089

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

1090

# when d is not 1.

1091

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

1092

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

1093

3.14,

1094

],

1095

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

1096

3.14,

1097

],

1098

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

1099

},

1100

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

1101

"q": "A String", # Order of the moving-average part.

1102

"p": "A String", # Order of the autoregressive part.

1103

"d": "A String", # Order of the differencing part.

1104

},

1105

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

1106

"variance": 3.14, # Variance.

1107

"logLikelihood": 3.14, # Log-likelihood.

1108

"aic": 3.14, # AIC.

1109

},

1110

"timeSeriesId": "A String", # The id to indicate different time series.

1111

},

1112

],

1113

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

# one time series.

"A String",

],

},

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1118

"index": 42, # Index of the iteration, 0 based.

1119

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

1120

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

1121

"learnRate": 3.14, # Learn rate used for this iteration.

1122

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1123

"clusterInfos": [ # Information about top clusters for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1124

{ # Information about a single cluster for clustering model.

1125

"centroidId": "A String", # Centroid id.

1126

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

1127

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

1128

# to each point assigned to the cluster.

},

],

},

],

"startTime": "A String", # The start time of this training run.

1134

"trainingOptions": { # Options that were used for this training run, includes

1135

# user specified and default options that were used.

1136

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1137

"itemColumn": "A String", # Item column specified for matrix factorization models.

1138

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

1139

# factorization.

1140

"numFactors": "A String", # Num factors specified for matrix factorization models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1141

"inputLabelColumns": [ # Name of input label columns in training data.

1142

"A String",

1143

],

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1144

"batchSize": "A String", # Batch size for dnn models.

1145

"distanceType": "A String", # Distance type for clustering models.

1146

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

1147

# when kmeans_initialization_method is CUSTOM.

1148

"l2Regularization": 3.14, # L2 regularization coefficient.

1149

"dropout": 3.14, # Dropout probability for dnn models.

1150

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

1151

# less than 'min_relative_progress'. Used only for iterative training

1152

# algorithms.

1153

"l1Regularization": 3.14, # L1 regularization coefficient.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1154

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

1155

# training algorithms.

1156

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

1157

# any more (compared to min_relative_progress). Used only for iterative

1158

# training algorithms.

1159

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

1160

# strategy.

1161

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

1162

# feature.

1163

# 1. When data_split_method is CUSTOM, the corresponding column should

1164

# be boolean. The rows with true value tag are eval data, and the false

1165

# are training data.

1166

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

1167

# rows (from smallest to largest) in the corresponding column are used

1168

# as training data, and the rest are eval data. It respects the order

1169

# in Orderable data types:

1170

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1171

"numClusters": "A String", # Number of clusters for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1172

"warmStart": True or False, # Whether to train a model from the last checkpoint.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1173

"hiddenUnits": [ # Hidden units for dnn models.

1174

"A String",

1175

],

1176

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

1177

"userColumn": "A String", # User column specified for matrix factorization models.

1178

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

1179

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1180

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

1181

# of data will be used as training data. The format should be double.

1182

# Accurate to two decimal places.

1183

# Default value is 0.2.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1184

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

1185

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

1186

# overfitting for boosted tree models.

1187

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

1188

# training data. Only applicable for classification models.

1189

"a_key": 3.14,

1190

},

1191

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1192

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

1193

# applicable for imported models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1194

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

1195

# specified.

1196

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

1197

"lossType": "A String", # Type of loss function used during training run.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

},

],

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

1202

{ # A field or a column.

1203

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

1204

# specified (e.g., CREATE FUNCTION statement can omit the return type;

1205

# in this case the output parameter does not have this "type" field).

1206

# Examples:

1207

# INT64: {type_kind="INT64"}

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1208

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

1209

# STRUCT<x STRING, y ARRAY<DATE>>:

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1210

# {type_kind="STRUCT",

1211

# struct_type={fields=[

1212

# {name="x", type={type_kind="STRING"}},

1213

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

1214

# ]}}

1215

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

1216

"fields": [

1217

# Object with schema name: StandardSqlField

1218

],

1219

},

1220

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

1221

"typeKind": "A String", # Required. The top level type of this field.

1222

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

1223

},

1224

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

1225

},

1226

],

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1227

"labelColumns": [ # Output only. Label columns that were used to train this model.

1228

# The output of the model will have a "predicted_" prefix to these columns.

1229

{ # A field or a column.

1230

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

1231

# specified (e.g., CREATE FUNCTION statement can omit the return type;

1232

# in this case the output parameter does not have this "type" field).

1233

# Examples:

1234

# INT64: {type_kind="INT64"}

1235

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

1236

# STRUCT<x STRING, y ARRAY<DATE>>:

1237

# {type_kind="STRUCT",

1238

# struct_type={fields=[

1239

# {name="x", type={type_kind="STRING"}},

1240

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

1241

# ]}}

1242

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

1243

"fields": [

1244

# Object with schema name: StandardSqlField

1245

],

1246

},

1247

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

1248

"typeKind": "A String", # Required. The top level type of this field.

1249

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

1250

},

1251

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

1252

},

1253

],

1254

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1255

"modelType": "A String", # Output only. Type of the model resource.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1256

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

1257

# encryption configuration of the model data while stored in BigQuery

1258

# storage. This field can be used with PatchModel to update encryption key

1259

# for an already encrypted model.

1260

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

1261

},

1262

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1263

"projectId": "A String", # [Required] The ID of the project containing this model.

1264

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1265

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1266

},

1267

"etag": "A String", # Output only. A hash of this resource.

1268

"location": "A String", # Output only. The geographic location where the model resides. This value

1269

# is inherited from the dataset.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1270

"friendlyName": "A String", # Optional. A descriptive name for this model.

1271

"expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.

1272

# If not present, the model will persist indefinitely. Expired models

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1273

# will be deleted and their storage reclaimed. The defaultTableExpirationMs

1274

# property of the encapsulating dataset can be used to set a default

1275

# expirationTime on newly created models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1276

"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

}

Returns:

An object of the form:

1282

1283

{

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1284

"labels": { # The labels associated with this model. You can use these to organize

1285

# and group your models. Label keys and values can be no longer

1286

# than 63 characters, can only contain lowercase letters, numeric

1287

# characters, underscores and dashes. International characters are allowed.

1288

# Label values are optional. Label keys must start with a letter and each

1289

# label in the list must have a different key.

1290

"a_key": "A String",

1291

},

1292

"description": "A String", # Optional. A user-friendly description of this model.

1293

"trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1294

{ # Information about a single training query run for the model.

1295

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

1296

# end of training.

1297

# data or just the eval data based on whether eval data was used during

1298

# training. These are not present for imported models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1299

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1300

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

1301

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1302

"clusters": [ # [Beta] Information for all clusters.

1303

{ # Message containing the information about one cluster.

1304

"count": "A String", # Count of training data rows that were assigned to this cluster.

1305

"featureValues": [ # Values of highly variant features for this cluster.

1306

{ # Representative value of a single feature within the cluster.

1307

"featureColumn": "A String", # The feature column name.

1308

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

1309

# feature.

1310

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

1311

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

1312

# more than ten categories, we return top ten (by count) and return

1313

# one more CategoryCount with category "_OTHER_" and count as

1314

# aggregate counts of remaining categories.

1315

{ # Represents the count of a single category within the cluster.

1316

"category": "A String", # The name of category.

1317

"count": "A String", # The count of training samples matching the category within the

# cluster.

},

],

},

},

],

"centroidId": "A String", # Centroid id.

1325

},

1326

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1327

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1328

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

1329

# factorization models.

1330

# factorization models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1331

"meanSquaredLogError": 3.14, # Mean squared log error.

1332

"meanAbsoluteError": 3.14, # Mean absolute error.

1333

"meanSquaredError": 3.14, # Mean squared error.

1334

"medianAbsoluteError": 3.14, # Median absolute error.

1335

"rSquared": 3.14, # R^2 score.

1336

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1337

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

1338

# models.

1339

# feedback_type=implicit.

1340

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

1341

# recommendation models except instead of computing the rating directly,

1342

# the output from evaluate is computed against a preference which is 1 or 0

1343

# depending on if the rating exists or not.

1344

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

1345

# then averages all the precisions across all the users.

1346

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

1347

# from the predicted confidence and dividing it by the original rank.

1348

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

1349

# predicted confidence by comparing it to an ideal rank measured by the

1350

# original ratings.

1351

},

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1352

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

1353

"negativeLabel": "A String", # Label representing the negative class.

1354

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1355

# models, the metrics are either macro-averaged or micro-averaged. When

1356

# macro-averaged, the metrics are calculated for each label and then an

1357

# unweighted average is taken of those values. When micro-averaged, the

1358

# metric is calculated globally by counting the total number of correctly

1359

# predicted rows.

1360

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1361

# positive prediction. For multiclass this is a macro-averaged metric.

1362

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1363

# positive actual labels. For multiclass this is a macro-averaged

1364

# metric treating each class as a binary classifier.

1365

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1366

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1367

# classification models this is the positive class threshold.

1368

# For multi-class classfication models this is the confidence

1369

# threshold.

1370

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1371

# multiclass this is a micro-averaged metric.

1372

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1373

# this is a macro-averaged metric.

1374

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1375

# metric.

1376

},

1377

"positiveLabel": "A String", # Label representing the positive class.

1378

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

1379

{ # Confusion matrix for binary classification models.

1380

"truePositives": "A String", # Number of true samples predicted as true.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1381

"recall": 3.14, # The fraction of actual positive labels that were given a positive

1382

# prediction.

1383

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

1384

# labels.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1385

"falseNegatives": "A String", # Number of false samples predicted as false.

1386

"trueNegatives": "A String", # Number of true samples predicted as false.

1387

"falsePositives": "A String", # Number of false samples predicted as true.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1388

"f1Score": 3.14, # The equally weighted average of recall and precision.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1389

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1390

"accuracy": 3.14, # The fraction of predictions given the correct label.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

1395

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1396

# models, the metrics are either macro-averaged or micro-averaged. When

1397

# macro-averaged, the metrics are calculated for each label and then an

1398

# unweighted average is taken of those values. When micro-averaged, the

1399

# metric is calculated globally by counting the total number of correctly

1400

# predicted rows.

1401

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1402

# positive prediction. For multiclass this is a macro-averaged metric.

1403

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1404

# positive actual labels. For multiclass this is a macro-averaged

1405

# metric treating each class as a binary classifier.

1406

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1407

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1408

# classification models this is the positive class threshold.

1409

# For multi-class classfication models this is the confidence

1410

# threshold.

1411

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1412

# multiclass this is a micro-averaged metric.

1413

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1414

# this is a macro-averaged metric.

1415

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1416

# metric.

1417

},

1418

"confusionMatrixList": [ # Confusion matrix at different thresholds.

1419

{ # Confusion matrix for multi-class classification models.

1420

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

1421

# confusion matrix.

1422

"rows": [ # One row per actual label.

1423

{ # A single row in the confusion matrix.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1424

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1425

"entries": [ # Info describing predicted label distribution.

1426

{ # A single entry in the confusion matrix.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1427

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1428

# also add an entry indicating the number of items under the

1429

# confidence threshold.

1430

"itemCount": "A String", # Number of items being predicted as this label.

1431

},

1432

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

],

},

],

},

},

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1439

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

1440

# actually split.

1441

# data tables that were used to train the model.

1442

"trainingTable": { # Table reference of the training data after split.

1443

"projectId": "A String", # [Required] The ID of the project containing this table.

1444

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

1445

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

1446

},

1447

"evaluationTable": { # Table reference of the evaluation data after split.

1448

"projectId": "A String", # [Required] The ID of the project containing this table.

1449

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

1450

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

1451

},

1452

},

1453

"results": [ # Output of each iteration run, results.size() <= max_iterations.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1454

{ # Information about a single iteration of the training run.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1455

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

1456

# refactoring if we want to use model-specific iteration results.

1457

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

1458

# fitted in auto-arima. For non-auto-arima model, its size is one.

1459

{ # Arima model information.

1460

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

1461

# for one time series.

1462

"A String",

1463

],

1464

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

1465

# when d is not 1.

1466

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

1467

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

1468

3.14,

1469

],

1470

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

1471

3.14,

1472

],

1473

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

1474

},

1475

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

1476

"q": "A String", # Order of the moving-average part.

1477

"p": "A String", # Order of the autoregressive part.

1478

"d": "A String", # Order of the differencing part.

1479

},

1480

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

1481

"variance": 3.14, # Variance.

1482

"logLikelihood": 3.14, # Log-likelihood.

1483

"aic": 3.14, # AIC.

1484

},

1485

"timeSeriesId": "A String", # The id to indicate different time series.

1486

},

1487

],

1488

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

# one time series.

"A String",

],

},

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1493

"index": 42, # Index of the iteration, 0 based.

1494

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

1495

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

1496

"learnRate": 3.14, # Learn rate used for this iteration.

1497

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1498

"clusterInfos": [ # Information about top clusters for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1499

{ # Information about a single cluster for clustering model.

1500

"centroidId": "A String", # Centroid id.

1501

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

1502

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

1503

# to each point assigned to the cluster.

},

],

},

],

"startTime": "A String", # The start time of this training run.

1509

"trainingOptions": { # Options that were used for this training run, includes

1510

# user specified and default options that were used.

1511

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1512

"itemColumn": "A String", # Item column specified for matrix factorization models.

1513

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

1514

# factorization.

1515

"numFactors": "A String", # Num factors specified for matrix factorization models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1516

"inputLabelColumns": [ # Name of input label columns in training data.

1517

"A String",

1518

],

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1519

"batchSize": "A String", # Batch size for dnn models.

1520

"distanceType": "A String", # Distance type for clustering models.

1521

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

1522

# when kmeans_initialization_method is CUSTOM.

1523

"l2Regularization": 3.14, # L2 regularization coefficient.

1524

"dropout": 3.14, # Dropout probability for dnn models.

1525

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

1526

# less than 'min_relative_progress'. Used only for iterative training

1527

# algorithms.

1528

"l1Regularization": 3.14, # L1 regularization coefficient.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1529

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

1530

# training algorithms.

1531

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

1532

# any more (compared to min_relative_progress). Used only for iterative

1533

# training algorithms.

1534

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

1535

# strategy.

1536

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

1537

# feature.

1538

# 1. When data_split_method is CUSTOM, the corresponding column should

1539

# be boolean. The rows with true value tag are eval data, and the false

1540

# are training data.

1541

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

1542

# rows (from smallest to largest) in the corresponding column are used

1543

# as training data, and the rest are eval data. It respects the order

1544

# in Orderable data types:

1545

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1546

"numClusters": "A String", # Number of clusters for clustering models.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1547

"warmStart": True or False, # Whether to train a model from the last checkpoint.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1548

"hiddenUnits": [ # Hidden units for dnn models.

1549

"A String",

1550

],

1551

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

1552

"userColumn": "A String", # User column specified for matrix factorization models.

1553

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

1554

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1555

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

1556

# of data will be used as training data. The format should be double.

1557

# Accurate to two decimal places.

1558

# Default value is 0.2.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1559

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

1560

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

1561

# overfitting for boosted tree models.

1562

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

1563

# training data. Only applicable for classification models.

1564

"a_key": 3.14,

1565

},

1566

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1567

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

1568

# applicable for imported models.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1569

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

1570

# specified.

1571

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

1572

"lossType": "A String", # Type of loss function used during training run.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

},

},

],

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

1577

{ # A field or a column.

1578

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

1579

# specified (e.g., CREATE FUNCTION statement can omit the return type;

1580

# in this case the output parameter does not have this "type" field).

1581

# Examples:

1582

# INT64: {type_kind="INT64"}

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1583

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

1584

# STRUCT<x STRING, y ARRAY<DATE>>:

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1585

# {type_kind="STRUCT",

1586

# struct_type={fields=[

1587

# {name="x", type={type_kind="STRING"}},

1588

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

1589

# ]}}

1590

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

1591

"fields": [

1592

# Object with schema name: StandardSqlField

1593

],

1594

},

1595

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

1596

"typeKind": "A String", # Required. The top level type of this field.

1597

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

1598

},

1599

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

1600

},

1601

],

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1602

"labelColumns": [ # Output only. Label columns that were used to train this model.

1603

# The output of the model will have a "predicted_" prefix to these columns.

1604

{ # A field or a column.

1605

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

1606

# specified (e.g., CREATE FUNCTION statement can omit the return type;

1607

# in this case the output parameter does not have this "type" field).

1608

# Examples:

1609

# INT64: {type_kind="INT64"}

1610

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

1611

# STRUCT<x STRING, y ARRAY<DATE>>:

1612

# {type_kind="STRUCT",

1613

# struct_type={fields=[

1614

# {name="x", type={type_kind="STRING"}},

1615

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

1616

# ]}}

1617

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

1618

"fields": [

1619

# Object with schema name: StandardSqlField

1620

],

1621

},

1622

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

1623

"typeKind": "A String", # Required. The top level type of this field.

1624

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

1625

},

1626

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

1627

},

1628

],

1629

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1630

"modelType": "A String", # Output only. Type of the model resource.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1631

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

1632

# encryption configuration of the model data while stored in BigQuery

1633

# storage. This field can be used with PatchModel to update encryption key

1634

# for an already encrypted model.

1635

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

1636

},

1637

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1638

"projectId": "A String", # [Required] The ID of the project containing this model.

1639

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

Dan O'Meara

dd49464

2020-05-01 07:42:23 -0700

[diff] [blame]

1640

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

1641

},

1642

"etag": "A String", # Output only. A hash of this resource.

1643

"location": "A String", # Output only. The geographic location where the model resides. This value

1644

# is inherited from the dataset.