Blame - docs/dyn/bigquery_v2.models.html - platform/external/python/google-api-python-client

2020-05-20 12:08:20 -0700

[diff] [blame]

250

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

251

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

252

# models, the metrics are either macro-averaged or micro-averaged. When

253

# macro-averaged, the metrics are calculated for each label and then an

254

# unweighted average is taken of those values. When micro-averaged, the

255

# metric is calculated globally by counting the total number of correctly

256

# predicted rows.

257

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

258

# positive prediction. For multiclass this is a macro-averaged metric.

259

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

260

# classification models this is the positive class threshold.

261

# For multi-class classfication models this is the confidence

262

# threshold.

263

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

264

# metric.

265

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

266

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

267

# this is a macro-averaged metric.

268

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

269

# positive actual labels. For multiclass this is a macro-averaged

270

# metric treating each class as a binary classifier.

271

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

272

# multiclass this is a micro-averaged metric.

273

},

274

"confusionMatrixList": [ # Confusion matrix at different thresholds.

275

{ # Confusion matrix for multi-class classification models.

276

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

277

# confusion matrix.

278

"rows": [ # One row per actual label.

279

{ # A single row in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

280

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

281

"entries": [ # Info describing predicted label distribution.

282

{ # A single entry in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

283

"itemCount": "A String", # Number of items being predicted as this label.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

284

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

285

# also add an entry indicating the number of items under the

286

# confidence threshold.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

287

},

288

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

],

},

],

},

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

295

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

296

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

297

"clusters": [ # [Beta] Information for all clusters.

298

{ # Message containing the information about one cluster.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

299

"centroidId": "A String", # Centroid id.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

300

"count": "A String", # Count of training data rows that were assigned to this cluster.

301

"featureValues": [ # Values of highly variant features for this cluster.

302

{ # Representative value of a single feature within the cluster.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

303

"featureColumn": "A String", # The feature column name.

304

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

305

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

306

# more than ten categories, we return top ten (by count) and return

307

# one more CategoryCount with category "_OTHER_" and count as

308

# aggregate counts of remaining categories.

309

{ # Represents the count of a single category within the cluster.

310

"category": "A String", # The name of category.

311

"count": "A String", # The count of training samples matching the category within the

312

# cluster.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

313

},

314

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

315

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

316

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

317

# feature.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

318

},

319

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

320

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

321

],

322

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

323

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

324

"positiveLabel": "A String", # Label representing the positive class.

325

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

326

{ # Confusion matrix for binary classification models.

327

"f1Score": 3.14, # The equally weighted average of recall and precision.

328

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

329

# labels.

330

"accuracy": 3.14, # The fraction of predictions given the correct label.

331

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

332

"truePositives": "A String", # Number of true samples predicted as true.

333

"recall": 3.14, # The fraction of actual positive labels that were given a positive

334

# prediction.

335

"falseNegatives": "A String", # Number of false samples predicted as false.

336

"trueNegatives": "A String", # Number of true samples predicted as false.

337

"falsePositives": "A String", # Number of false samples predicted as true.

338

},

339

],

340

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

341

# models, the metrics are either macro-averaged or micro-averaged. When

342

# macro-averaged, the metrics are calculated for each label and then an

343

# unweighted average is taken of those values. When micro-averaged, the

344

# metric is calculated globally by counting the total number of correctly

345

# predicted rows.

346

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

347

# positive prediction. For multiclass this is a macro-averaged metric.

348

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

349

# classification models this is the positive class threshold.

350

# For multi-class classfication models this is the confidence

351

# threshold.

352

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

353

# metric.

354

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

355

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

356

# this is a macro-averaged metric.

357

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

358

# positive actual labels. For multiclass this is a macro-averaged

359

# metric treating each class as a binary classifier.

360

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

361

# multiclass this is a micro-averaged metric.

362

},

363

"negativeLabel": "A String", # Label representing the negative class.

364

},

365

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

366

# factorization models.

367

# factorization models.

368

"medianAbsoluteError": 3.14, # Median absolute error.

369

"meanSquaredLogError": 3.14, # Mean squared log error.

370

"meanAbsoluteError": 3.14, # Mean absolute error.

371

"meanSquaredError": 3.14, # Mean squared error.

372

"rSquared": 3.14, # R^2 score.

373

},

374

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

375

# models.

376

# feedback_type=implicit.

377

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

378

# then averages all the precisions across all the users.

379

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

380

# predicted confidence by comparing it to an ideal rank measured by the

381

# original ratings.

382

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

383

# from the predicted confidence and dividing it by the original rank.

384

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

385

# recommendation models except instead of computing the rating directly,

386

# the output from evaluate is computed against a preference which is 1 or 0

387

# depending on if the rating exists or not.

388

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

389

},

390

"trainingOptions": { # Options that were used for this training run, includes

391

# user specified and default options that were used.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

392

"inputLabelColumns": [ # Name of input label columns in training data.

393

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

394

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

395

"warmStart": True or False, # Whether to train a model from the last checkpoint.

396

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

397

"numFactors": "A String", # Num factors specified for matrix factorization models.

398

"lossType": "A String", # Type of loss function used during training run.

399

"hiddenUnits": [ # Hidden units for dnn models.

400

"A String",

401

],

402

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

403

"l1Regularization": 3.14, # L1 regularization coefficient.

404

"distanceType": "A String", # Distance type for clustering models.

405

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

406

# specified.

407

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

408

# factorization.

409

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

410

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

411

# feature.

412

# 1. When data_split_method is CUSTOM, the corresponding column should

413

# be boolean. The rows with true value tag are eval data, and the false

414

# are training data.

415

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

416

# rows (from smallest to largest) in the corresponding column are used

417

# as training data, and the rest are eval data. It respects the order

418

# in Orderable data types:

419

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

420

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

421

# training algorithms.

422

"userColumn": "A String", # User column specified for matrix factorization models.

423

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

424

"l2Regularization": 3.14, # L2 regularization coefficient.

425

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

426

# applicable for imported models.

427

"batchSize": "A String", # Batch size for dnn models.

428

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

429

# less than 'min_relative_progress'. Used only for iterative training

430

# algorithms.

431

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

432

# when kmeans_initialization_method is CUSTOM.

433

"numClusters": "A String", # Number of clusters for clustering models.

434

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

435

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

436

"dropout": 3.14, # Dropout probability for dnn models.

437

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

438

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

439

# training data. Only applicable for classification models.

440

"a_key": 3.14,

441

},

442

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

443

# overfitting for boosted tree models.

444

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

445

# any more (compared to min_relative_progress). Used only for iterative

446

# training algorithms.

447

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

448

# of data will be used as training data. The format should be double.

449

# Accurate to two decimal places.

450

# Default value is 0.2.

451

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

452

# strategy.

453

"itemColumn": "A String", # Item column specified for matrix factorization models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

454

},

455

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

456

# actually split.

457

# data tables that were used to train the model.

458

"trainingTable": { # Table reference of the training data after split.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

459

"projectId": "A String", # [Required] The ID of the project containing this table.

460

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

461

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

462

},

463

"evaluationTable": { # Table reference of the evaluation data after split.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

464

"projectId": "A String", # [Required] The ID of the project containing this table.

465

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

466

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

467

},

468

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

469

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

470

],

471

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

472

"projectId": "A String", # [Required] The ID of the project containing this model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

473

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

474

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

475

},

476

"description": "A String", # Optional. A user-friendly description of this model.

477

"etag": "A String", # Output only. A hash of this resource.

478

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

479

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

480

# encryption configuration of the model data while stored in BigQuery

481

# storage. This field can be used with PatchModel to update encryption key

482

# for an already encrypted model.

483

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

484

},

485

}</pre>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

</div>

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

489

<code class="details" id="list">list(projectId, datasetId, maxResults=None, pageToken=None)</code>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

490

<pre>Lists all models in the specified dataset. Requires the READER dataset

491

role.

492

493

Args:

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

494

projectId: string, Required. Project ID of the models to list. (required)

495

datasetId: string, Required. Dataset ID of the models to list. (required)

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

496

maxResults: integer, The maximum number of results to return in a single response page.

497

Leverage the page tokens to iterate through the entire collection.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

498

pageToken: string, Page token, returned by a previous call to request the next page of

499

results

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

500

501

Returns:

502

An object of the form:

503

504

{

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

505

"nextPageToken": "A String", # A token to request the next page of results.

506

"models": [ # Models in the requested dataset. Only the following fields are populated:

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

507

# model_reference, model_type, creation_time, last_modified_time and

508

# labels.

509

{

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

510

"location": "A String", # Output only. The geographic location where the model resides. This value

511

# is inherited from the dataset.

512

"friendlyName": "A String", # Optional. A descriptive name for this model.

513

"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.

514

"labels": { # The labels associated with this model. You can use these to organize

515

# and group your models. Label keys and values can be no longer

516

# than 63 characters, can only contain lowercase letters, numeric

517

# characters, underscores and dashes. International characters are allowed.

518

# Label values are optional. Label keys must start with a letter and each

519

# label in the list must have a different key.

520

"a_key": "A String",

521

},

522

"labelColumns": [ # Output only. Label columns that were used to train this model.

523

# The output of the model will have a "predicted_" prefix to these columns.

524

{ # A field or a column.

525

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

526

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

527

# specified (e.g., CREATE FUNCTION statement can omit the return type;

528

# in this case the output parameter does not have this "type" field).

529

# Examples:

530

# INT64: {type_kind="INT64"}

531

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

532

# STRUCT<x STRING, y ARRAY<DATE>>:

533

# {type_kind="STRUCT",

534

# struct_type={fields=[

535

# {name="x", type={type_kind="STRING"}},

536

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

537

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

538

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

539

"typeKind": "A String", # Required. The top level type of this field.

540

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

541

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

542

"fields": [

543

# Object with schema name: StandardSqlField

544

],

545

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

546

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

547

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

548

],

549

"modelType": "A String", # Output only. Type of the model resource.

550

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

551

{ # A field or a column.

552

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

553

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

554

# specified (e.g., CREATE FUNCTION statement can omit the return type;

555

# in this case the output parameter does not have this "type" field).

556

# Examples:

557

# INT64: {type_kind="INT64"}

558

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

559

# STRUCT<x STRING, y ARRAY<DATE>>:

560

# {type_kind="STRUCT",

561

# struct_type={fields=[

562

# {name="x", type={type_kind="STRING"}},

563

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

564

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

565

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

566

"typeKind": "A String", # Required. The top level type of this field.

567

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

568

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

569

"fields": [

570

# Object with schema name: StandardSqlField

571

],

572

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

},

],

"expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.

577

# If not present, the model will persist indefinitely. Expired models

578

# will be deleted and their storage reclaimed. The defaultTableExpirationMs

579

# property of the encapsulating dataset can be used to set a default

580

# expirationTime on newly created models.

581

"trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.

582

{ # Information about a single training query run for the model.

583

"startTime": "A String", # The start time of this training run.

584

"results": [ # Output of each iteration run, results.size() <= max_iterations.

585

{ # Information about a single iteration of the training run.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

586

"clusterInfos": [ # Information about top clusters for clustering models.

587

{ # Information about a single cluster for clustering model.

588

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

589

# to each point assigned to the cluster.

590

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

591

"centroidId": "A String", # Centroid id.

592

},

593

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

594

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

595

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

596

"index": 42, # Index of the iteration, 0 based.

597

"learnRate": 3.14, # Learn rate used for this iteration.

598

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

599

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

600

# refactoring if we want to use model-specific iteration results.

601

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

602

# fitted in auto-arima. For non-auto-arima model, its size is one.

603

{ # Arima model information.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

604

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

605

# for one time series.

606

"A String",

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

607

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

608

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

609

"d": "A String", # Order of the differencing part.

610

"p": "A String", # Order of the autoregressive part.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

611

"q": "A String", # Order of the moving-average part.

612

},

613

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

614

"logLikelihood": 3.14, # Log-likelihood.

615

"variance": 3.14, # Variance.

616

"aic": 3.14, # AIC.

617

},

618

"timeSeriesId": "A String", # The id to indicate different time series.

619

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

620

# when d is not 1.

621

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

622

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

623

3.14,

624

],

625

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

626

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

627

3.14,

628

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

629

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

630

},

631

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

632

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

633

# one time series.

634

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

635

],

636

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

637

},

638

],

639

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

640

# end of training.

641

# data or just the eval data based on whether eval data was used during

642

# training. These are not present for imported models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

643

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

644

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

645

# models, the metrics are either macro-averaged or micro-averaged. When

646

# macro-averaged, the metrics are calculated for each label and then an

647

# unweighted average is taken of those values. When micro-averaged, the

648

# metric is calculated globally by counting the total number of correctly

649

# predicted rows.

650

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

651

# positive prediction. For multiclass this is a macro-averaged metric.

652

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

653

# classification models this is the positive class threshold.

654

# For multi-class classfication models this is the confidence

655

# threshold.

656

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

657

# metric.

658

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

659

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

660

# this is a macro-averaged metric.

661

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

662

# positive actual labels. For multiclass this is a macro-averaged

663

# metric treating each class as a binary classifier.

664

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

665

# multiclass this is a micro-averaged metric.

666

},

667

"confusionMatrixList": [ # Confusion matrix at different thresholds.

668

{ # Confusion matrix for multi-class classification models.

669

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

670

# confusion matrix.

671

"rows": [ # One row per actual label.

672

{ # A single row in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

673

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

674

"entries": [ # Info describing predicted label distribution.

675

{ # A single entry in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

676

"itemCount": "A String", # Number of items being predicted as this label.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

677

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

678

# also add an entry indicating the number of items under the

679

# confidence threshold.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

680

},

681

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

],

},

],

},

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

688

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

689

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

690

"clusters": [ # [Beta] Information for all clusters.

691

{ # Message containing the information about one cluster.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

692

"centroidId": "A String", # Centroid id.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

693

"count": "A String", # Count of training data rows that were assigned to this cluster.

694

"featureValues": [ # Values of highly variant features for this cluster.

695

{ # Representative value of a single feature within the cluster.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

696

"featureColumn": "A String", # The feature column name.

697

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

698

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

699

# more than ten categories, we return top ten (by count) and return

700

# one more CategoryCount with category "_OTHER_" and count as

701

# aggregate counts of remaining categories.

702

{ # Represents the count of a single category within the cluster.

703

"category": "A String", # The name of category.

704

"count": "A String", # The count of training samples matching the category within the

705

# cluster.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

706

},

707

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

708

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

709

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

710

# feature.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

711

},

712

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

713

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

714

],

715

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

716

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

717

"positiveLabel": "A String", # Label representing the positive class.

718

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

719

{ # Confusion matrix for binary classification models.

720

"f1Score": 3.14, # The equally weighted average of recall and precision.

721

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

722

# labels.

723

"accuracy": 3.14, # The fraction of predictions given the correct label.

724

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

725

"truePositives": "A String", # Number of true samples predicted as true.

726

"recall": 3.14, # The fraction of actual positive labels that were given a positive

727

# prediction.

728

"falseNegatives": "A String", # Number of false samples predicted as false.

729

"trueNegatives": "A String", # Number of true samples predicted as false.

730

"falsePositives": "A String", # Number of false samples predicted as true.

731

},

732

],

733

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

734

# models, the metrics are either macro-averaged or micro-averaged. When

735

# macro-averaged, the metrics are calculated for each label and then an

736

# unweighted average is taken of those values. When micro-averaged, the

737

# metric is calculated globally by counting the total number of correctly

738

# predicted rows.

739

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

740

# positive prediction. For multiclass this is a macro-averaged metric.

741

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

742

# classification models this is the positive class threshold.

743

# For multi-class classfication models this is the confidence

744

# threshold.

745

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

746

# metric.

747

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

748

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

749

# this is a macro-averaged metric.

750

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

751

# positive actual labels. For multiclass this is a macro-averaged

752

# metric treating each class as a binary classifier.

753

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

754

# multiclass this is a micro-averaged metric.

755

},

756

"negativeLabel": "A String", # Label representing the negative class.

757

},

758

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

759

# factorization models.

760

# factorization models.

761

"medianAbsoluteError": 3.14, # Median absolute error.

762

"meanSquaredLogError": 3.14, # Mean squared log error.

763

"meanAbsoluteError": 3.14, # Mean absolute error.

764

"meanSquaredError": 3.14, # Mean squared error.

765

"rSquared": 3.14, # R^2 score.

766

},

767

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

768

# models.

769

# feedback_type=implicit.

770

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

771

# then averages all the precisions across all the users.

772

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

773

# predicted confidence by comparing it to an ideal rank measured by the

774

# original ratings.

775

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

776

# from the predicted confidence and dividing it by the original rank.

777

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

778

# recommendation models except instead of computing the rating directly,

779

# the output from evaluate is computed against a preference which is 1 or 0

780

# depending on if the rating exists or not.

781

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

782

},

783

"trainingOptions": { # Options that were used for this training run, includes

784

# user specified and default options that were used.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

785

"inputLabelColumns": [ # Name of input label columns in training data.

786

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

787

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

788

"warmStart": True or False, # Whether to train a model from the last checkpoint.

789

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

790

"numFactors": "A String", # Num factors specified for matrix factorization models.

791

"lossType": "A String", # Type of loss function used during training run.

792

"hiddenUnits": [ # Hidden units for dnn models.

793

"A String",

794

],

795

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

796

"l1Regularization": 3.14, # L1 regularization coefficient.

797

"distanceType": "A String", # Distance type for clustering models.

798

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

799

# specified.

800

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

801

# factorization.

802

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

803

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

804

# feature.

805

# 1. When data_split_method is CUSTOM, the corresponding column should

806

# be boolean. The rows with true value tag are eval data, and the false

807

# are training data.

808

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

809

# rows (from smallest to largest) in the corresponding column are used

810

# as training data, and the rest are eval data. It respects the order

811

# in Orderable data types:

812

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

813

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

814

# training algorithms.

815

"userColumn": "A String", # User column specified for matrix factorization models.

816

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

817

"l2Regularization": 3.14, # L2 regularization coefficient.

818

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

819

# applicable for imported models.

820

"batchSize": "A String", # Batch size for dnn models.

821

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

822

# less than 'min_relative_progress'. Used only for iterative training

823

# algorithms.

824

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

825

# when kmeans_initialization_method is CUSTOM.

826

"numClusters": "A String", # Number of clusters for clustering models.

827

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

828

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

829

"dropout": 3.14, # Dropout probability for dnn models.

830

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

831

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

832

# training data. Only applicable for classification models.

833

"a_key": 3.14,

834

},

835

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

836

# overfitting for boosted tree models.

837

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

838

# any more (compared to min_relative_progress). Used only for iterative

839

# training algorithms.

840

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

841

# of data will be used as training data. The format should be double.

842

# Accurate to two decimal places.

843

# Default value is 0.2.

844

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

845

# strategy.

846

"itemColumn": "A String", # Item column specified for matrix factorization models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

847

},

848

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

849

# actually split.

850

# data tables that were used to train the model.

851

"trainingTable": { # Table reference of the training data after split.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

852

"projectId": "A String", # [Required] The ID of the project containing this table.

853

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

854

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

855

},

856

"evaluationTable": { # Table reference of the evaluation data after split.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

857

"projectId": "A String", # [Required] The ID of the project containing this table.

858

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

859

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

860

},

861

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

862

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

863

],

864

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

865

"projectId": "A String", # [Required] The ID of the project containing this model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

866

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

867

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

868

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

869

"description": "A String", # Optional. A user-friendly description of this model.

870

"etag": "A String", # Output only. A hash of this resource.

871

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

872

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

873

# encryption configuration of the model data while stored in BigQuery

874

# storage. This field can be used with PatchModel to update encryption key

875

# for an already encrypted model.

876

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

877

},

878

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

879

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

}</pre>

</div>

<code class="details" id="list_next">list_next(previous_request, previous_response)</code>

885

<pre>Retrieves the next page of results.

886

887

Args:

888

previous_request: The request for the previous page. (required)

889

previous_response: The response from the request for the previous page. (required)

890

891

Returns:

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

892

A request object that you can call 'execute()' on to request the next

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

893

page. Returns None if there are no more items in the collection.

</pre>

</div>

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

898

<code class="details" id="patch">patch(projectId, datasetId, modelId, body=None)</code>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

899

<pre>Patch specific fields in the specified model.

900

901

Args:

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

902

projectId: string, Required. Project ID of the model to patch. (required)

903

datasetId: string, Required. Dataset ID of the model to patch. (required)

904

modelId: string, Required. Model ID of the model to patch. (required)

905

body: object, The request body.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

906

The object takes the form of:

907

908

{

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

909

"location": "A String", # Output only. The geographic location where the model resides. This value

910

# is inherited from the dataset.

911

"friendlyName": "A String", # Optional. A descriptive name for this model.

912

"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.

913

"labels": { # The labels associated with this model. You can use these to organize

914

# and group your models. Label keys and values can be no longer

915

# than 63 characters, can only contain lowercase letters, numeric

916

# characters, underscores and dashes. International characters are allowed.

917

# Label values are optional. Label keys must start with a letter and each

918

# label in the list must have a different key.

919

"a_key": "A String",

920

},

921

"labelColumns": [ # Output only. Label columns that were used to train this model.

922

# The output of the model will have a "predicted_" prefix to these columns.

923

{ # A field or a column.

924

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

925

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

926

# specified (e.g., CREATE FUNCTION statement can omit the return type;

927

# in this case the output parameter does not have this "type" field).

928

# Examples:

929

# INT64: {type_kind="INT64"}

930

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

931

# STRUCT<x STRING, y ARRAY<DATE>>:

932

# {type_kind="STRUCT",

933

# struct_type={fields=[

934

# {name="x", type={type_kind="STRING"}},

935

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

936

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

937

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

938

"typeKind": "A String", # Required. The top level type of this field.

939

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

940

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

941

"fields": [

942

# Object with schema name: StandardSqlField

943

],

944

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

},

],

"modelType": "A String", # Output only. Type of the model resource.

949

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

950

{ # A field or a column.

951

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

952

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

953

# specified (e.g., CREATE FUNCTION statement can omit the return type;

954

# in this case the output parameter does not have this "type" field).

955

# Examples:

956

# INT64: {type_kind="INT64"}

957

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

958

# STRUCT<x STRING, y ARRAY<DATE>>:

959

# {type_kind="STRUCT",

960

# struct_type={fields=[

961

# {name="x", type={type_kind="STRING"}},

962

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

963

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

964

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

965

"typeKind": "A String", # Required. The top level type of this field.

966

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

967

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

968

"fields": [

969

# Object with schema name: StandardSqlField

970

],

971

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

},

],

"expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.

976

# If not present, the model will persist indefinitely. Expired models

977

# will be deleted and their storage reclaimed. The defaultTableExpirationMs

978

# property of the encapsulating dataset can be used to set a default

979

# expirationTime on newly created models.

980

"trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.

981

{ # Information about a single training query run for the model.

982

"startTime": "A String", # The start time of this training run.

983

"results": [ # Output of each iteration run, results.size() <= max_iterations.

984

{ # Information about a single iteration of the training run.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

985

"clusterInfos": [ # Information about top clusters for clustering models.

986

{ # Information about a single cluster for clustering model.

987

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

988

# to each point assigned to the cluster.

989

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

990

"centroidId": "A String", # Centroid id.

991

},

992

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

993

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

994

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

995

"index": 42, # Index of the iteration, 0 based.

996

"learnRate": 3.14, # Learn rate used for this iteration.

997

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

998

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

999

# refactoring if we want to use model-specific iteration results.

1000

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

1001

# fitted in auto-arima. For non-auto-arima model, its size is one.

1002

{ # Arima model information.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1003

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

1004

# for one time series.

1005

"A String",

1006

],

1007

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1008

"d": "A String", # Order of the differencing part.

1009

"p": "A String", # Order of the autoregressive part.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1010

"q": "A String", # Order of the moving-average part.

1011

},

1012

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

1013

"logLikelihood": 3.14, # Log-likelihood.

1014

"variance": 3.14, # Variance.

1015

"aic": 3.14, # AIC.

1016

},

1017

"timeSeriesId": "A String", # The id to indicate different time series.

1018

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

1019

# when d is not 1.

1020

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

1021

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

1022

3.14,

1023

],

1024

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

1025

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

1026

3.14,

1027

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

},

],

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

# one time series.

"A String",

],

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1036

},

1037

],

1038

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

1039

# end of training.

1040

# data or just the eval data based on whether eval data was used during

1041

# training. These are not present for imported models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1042

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

1043

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1044

# models, the metrics are either macro-averaged or micro-averaged. When

1045

# macro-averaged, the metrics are calculated for each label and then an

1046

# unweighted average is taken of those values. When micro-averaged, the

1047

# metric is calculated globally by counting the total number of correctly

1048

# predicted rows.

1049

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1050

# positive prediction. For multiclass this is a macro-averaged metric.

1051

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1052

# classification models this is the positive class threshold.

1053

# For multi-class classfication models this is the confidence

1054

# threshold.

1055

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1056

# metric.

1057

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1058

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1059

# this is a macro-averaged metric.

1060

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1061

# positive actual labels. For multiclass this is a macro-averaged

1062

# metric treating each class as a binary classifier.

1063

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1064

# multiclass this is a micro-averaged metric.

1065

},

1066

"confusionMatrixList": [ # Confusion matrix at different thresholds.

1067

{ # Confusion matrix for multi-class classification models.

1068

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

1069

# confusion matrix.

1070

"rows": [ # One row per actual label.

1071

{ # A single row in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1072

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1073

"entries": [ # Info describing predicted label distribution.

1074

{ # A single entry in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1075

"itemCount": "A String", # Number of items being predicted as this label.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1076

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

1077

# also add an entry indicating the number of items under the

1078

# confidence threshold.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1079

},

1080

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

],

},

],

},

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

1087

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

1088

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

1089

"clusters": [ # [Beta] Information for all clusters.

1090

{ # Message containing the information about one cluster.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1091

"centroidId": "A String", # Centroid id.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1092

"count": "A String", # Count of training data rows that were assigned to this cluster.

1093

"featureValues": [ # Values of highly variant features for this cluster.

1094

{ # Representative value of a single feature within the cluster.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1095

"featureColumn": "A String", # The feature column name.

1096

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

1097

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

1098

# more than ten categories, we return top ten (by count) and return

1099

# one more CategoryCount with category "_OTHER_" and count as

1100

# aggregate counts of remaining categories.

1101

{ # Represents the count of a single category within the cluster.

1102

"category": "A String", # The name of category.

1103

"count": "A String", # The count of training samples matching the category within the

# cluster.

},

],

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1108

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

1109

# feature.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1110

},

1111

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1112

},

1113

],

1114

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1115

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

1116

"positiveLabel": "A String", # Label representing the positive class.

1117

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

1118

{ # Confusion matrix for binary classification models.

1119

"f1Score": 3.14, # The equally weighted average of recall and precision.

1120

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

1121

# labels.

1122

"accuracy": 3.14, # The fraction of predictions given the correct label.

1123

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

1124

"truePositives": "A String", # Number of true samples predicted as true.

1125

"recall": 3.14, # The fraction of actual positive labels that were given a positive

1126

# prediction.

1127

"falseNegatives": "A String", # Number of false samples predicted as false.

1128

"trueNegatives": "A String", # Number of true samples predicted as false.

1129

"falsePositives": "A String", # Number of false samples predicted as true.

1130

},

1131

],

1132

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1133

# models, the metrics are either macro-averaged or micro-averaged. When

1134

# macro-averaged, the metrics are calculated for each label and then an

1135

# unweighted average is taken of those values. When micro-averaged, the

1136

# metric is calculated globally by counting the total number of correctly

1137

# predicted rows.

1138

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1139

# positive prediction. For multiclass this is a macro-averaged metric.

1140

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1141

# classification models this is the positive class threshold.

1142

# For multi-class classfication models this is the confidence

1143

# threshold.

1144

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1145

# metric.

1146

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1147

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1148

# this is a macro-averaged metric.

1149

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1150

# positive actual labels. For multiclass this is a macro-averaged

1151

# metric treating each class as a binary classifier.

1152

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1153

# multiclass this is a micro-averaged metric.

1154

},

1155

"negativeLabel": "A String", # Label representing the negative class.

1156

},

1157

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

1158

# factorization models.

1159

# factorization models.

1160

"medianAbsoluteError": 3.14, # Median absolute error.

1161

"meanSquaredLogError": 3.14, # Mean squared log error.

1162

"meanAbsoluteError": 3.14, # Mean absolute error.

1163

"meanSquaredError": 3.14, # Mean squared error.

1164

"rSquared": 3.14, # R^2 score.

1165

},

1166

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

1167

# models.

1168

# feedback_type=implicit.

1169

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

1170

# then averages all the precisions across all the users.

1171

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

1172

# predicted confidence by comparing it to an ideal rank measured by the

1173

# original ratings.

1174

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

1175

# from the predicted confidence and dividing it by the original rank.

1176

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

1177

# recommendation models except instead of computing the rating directly,

1178

# the output from evaluate is computed against a preference which is 1 or 0

1179

# depending on if the rating exists or not.

1180

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1181

},

1182

"trainingOptions": { # Options that were used for this training run, includes

1183

# user specified and default options that were used.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1184

"inputLabelColumns": [ # Name of input label columns in training data.

1185

"A String",

1186

],

1187

"warmStart": True or False, # Whether to train a model from the last checkpoint.

1188

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

1189

"numFactors": "A String", # Num factors specified for matrix factorization models.

1190

"lossType": "A String", # Type of loss function used during training run.

1191

"hiddenUnits": [ # Hidden units for dnn models.

1192

"A String",

1193

],

1194

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

1195

"l1Regularization": 3.14, # L1 regularization coefficient.

1196

"distanceType": "A String", # Distance type for clustering models.

1197

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

1198

# specified.

1199

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

1200

# factorization.

1201

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

1202

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

1203

# feature.

1204

# 1. When data_split_method is CUSTOM, the corresponding column should

1205

# be boolean. The rows with true value tag are eval data, and the false

1206

# are training data.

1207

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

1208

# rows (from smallest to largest) in the corresponding column are used

1209

# as training data, and the rest are eval data. It respects the order

1210

# in Orderable data types:

1211

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

1212

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

1213

# training algorithms.

1214

"userColumn": "A String", # User column specified for matrix factorization models.

1215

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

1216

"l2Regularization": 3.14, # L2 regularization coefficient.

1217

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

1218

# applicable for imported models.

1219

"batchSize": "A String", # Batch size for dnn models.

1220

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

1221

# less than 'min_relative_progress'. Used only for iterative training

1222

# algorithms.

1223

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

1224

# when kmeans_initialization_method is CUSTOM.

1225

"numClusters": "A String", # Number of clusters for clustering models.

1226

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

1227

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1228

"dropout": 3.14, # Dropout probability for dnn models.

1229

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

1230

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

1231

# training data. Only applicable for classification models.

1232

"a_key": 3.14,

1233

},

1234

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

1235

# overfitting for boosted tree models.

1236

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

1237

# any more (compared to min_relative_progress). Used only for iterative

1238

# training algorithms.

1239

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

1240

# of data will be used as training data. The format should be double.

1241

# Accurate to two decimal places.

1242

# Default value is 0.2.

1243

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

1244

# strategy.

1245

"itemColumn": "A String", # Item column specified for matrix factorization models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1246

},

1247

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

1248

# actually split.

1249

# data tables that were used to train the model.

1250

"trainingTable": { # Table reference of the training data after split.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1251

"projectId": "A String", # [Required] The ID of the project containing this table.

1252

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1253

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1254

},

1255

"evaluationTable": { # Table reference of the evaluation data after split.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1256

"projectId": "A String", # [Required] The ID of the project containing this table.

1257

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1258

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

},

},

],

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1264

"projectId": "A String", # [Required] The ID of the project containing this model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1265

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

1266

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1267

},

1268

"description": "A String", # Optional. A user-friendly description of this model.

1269

"etag": "A String", # Output only. A hash of this resource.

1270

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

1271

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

1272

# encryption configuration of the model data while stored in BigQuery

1273

# storage. This field can be used with PatchModel to update encryption key

1274

# for an already encrypted model.

1275

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

},

}

Returns:

An object of the form:

1282

1283

{

1284

"location": "A String", # Output only. The geographic location where the model resides. This value

1285

# is inherited from the dataset.

1286

"friendlyName": "A String", # Optional. A descriptive name for this model.

1287

"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.

1288

"labels": { # The labels associated with this model. You can use these to organize

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1289

# and group your models. Label keys and values can be no longer

1290

# than 63 characters, can only contain lowercase letters, numeric

1291

# characters, underscores and dashes. International characters are allowed.

1292

# Label values are optional. Label keys must start with a letter and each

1293

# label in the list must have a different key.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1294

"a_key": "A String",

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1295

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1296

"labelColumns": [ # Output only. Label columns that were used to train this model.

1297

# The output of the model will have a "predicted_" prefix to these columns.

1298

{ # A field or a column.

1299

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

1300

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

1301

# specified (e.g., CREATE FUNCTION statement can omit the return type;

1302

# in this case the output parameter does not have this "type" field).

1303

# Examples:

1304

# INT64: {type_kind="INT64"}

1305

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

1306

# STRUCT<x STRING, y ARRAY<DATE>>:

1307

# {type_kind="STRUCT",

1308

# struct_type={fields=[

1309

# {name="x", type={type_kind="STRING"}},

1310

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

1311

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1312

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

1313

"typeKind": "A String", # Required. The top level type of this field.

1314

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1315

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

1316

"fields": [

1317

# Object with schema name: StandardSqlField

1318

],

1319

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

},

],

"modelType": "A String", # Output only. Type of the model resource.

1324

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

1325

{ # A field or a column.

1326

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

1327

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

1328

# specified (e.g., CREATE FUNCTION statement can omit the return type;

1329

# in this case the output parameter does not have this "type" field).

1330

# Examples:

1331

# INT64: {type_kind="INT64"}

1332

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

1333

# STRUCT<x STRING, y ARRAY<DATE>>:

1334

# {type_kind="STRUCT",

1335

# struct_type={fields=[

1336

# {name="x", type={type_kind="STRING"}},

1337

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

1338

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1339

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

1340

"typeKind": "A String", # Required. The top level type of this field.

1341

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1342

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

1343

"fields": [

1344

# Object with schema name: StandardSqlField

1345

],

1346

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

},

],

"expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.

1351

# If not present, the model will persist indefinitely. Expired models

1352

# will be deleted and their storage reclaimed. The defaultTableExpirationMs

1353

# property of the encapsulating dataset can be used to set a default

1354

# expirationTime on newly created models.

1355

"trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1356

{ # Information about a single training query run for the model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1357

"startTime": "A String", # The start time of this training run.

1358

"results": [ # Output of each iteration run, results.size() <= max_iterations.

1359

{ # Information about a single iteration of the training run.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1360

"clusterInfos": [ # Information about top clusters for clustering models.

1361

{ # Information about a single cluster for clustering model.

1362

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

1363

# to each point assigned to the cluster.

1364

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

1365

"centroidId": "A String", # Centroid id.

1366

},

1367

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1368

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

1369

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

1370

"index": 42, # Index of the iteration, 0 based.

1371

"learnRate": 3.14, # Learn rate used for this iteration.

1372

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

1373

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

1374

# refactoring if we want to use model-specific iteration results.

1375

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

1376

# fitted in auto-arima. For non-auto-arima model, its size is one.

1377

{ # Arima model information.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1378

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

1379

# for one time series.

1380

"A String",

1381

],

1382

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1383

"d": "A String", # Order of the differencing part.

1384

"p": "A String", # Order of the autoregressive part.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1385

"q": "A String", # Order of the moving-average part.

1386

},

1387

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

1388

"logLikelihood": 3.14, # Log-likelihood.

1389

"variance": 3.14, # Variance.

1390

"aic": 3.14, # AIC.

1391

},

1392

"timeSeriesId": "A String", # The id to indicate different time series.

1393

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

1394

# when d is not 1.

1395

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

1396

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

1397

3.14,

1398

],

1399

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

1400

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

1401

3.14,

1402

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

},

],

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

# one time series.

"A String",

],

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1411

},

1412

],

1413

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1414

# end of training.

1415

# data or just the eval data based on whether eval data was used during

1416

# training. These are not present for imported models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1417

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

1418

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1419

# models, the metrics are either macro-averaged or micro-averaged. When

1420

# macro-averaged, the metrics are calculated for each label and then an

1421

# unweighted average is taken of those values. When micro-averaged, the

1422

# metric is calculated globally by counting the total number of correctly

1423

# predicted rows.

1424

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1425

# positive prediction. For multiclass this is a macro-averaged metric.

1426

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1427

# classification models this is the positive class threshold.

1428

# For multi-class classfication models this is the confidence

1429

# threshold.

1430

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1431

# metric.

1432

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1433

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1434

# this is a macro-averaged metric.

1435

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1436

# positive actual labels. For multiclass this is a macro-averaged

1437

# metric treating each class as a binary classifier.

1438

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1439

# multiclass this is a micro-averaged metric.

1440

},

1441

"confusionMatrixList": [ # Confusion matrix at different thresholds.

1442

{ # Confusion matrix for multi-class classification models.

1443

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

1444

# confusion matrix.

1445

"rows": [ # One row per actual label.

1446

{ # A single row in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1447

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1448

"entries": [ # Info describing predicted label distribution.

1449

{ # A single entry in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1450

"itemCount": "A String", # Number of items being predicted as this label.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1451

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

1452

# also add an entry indicating the number of items under the

1453

# confidence threshold.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1454

},

1455

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

],

},

],

},

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

1462

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

1463

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

1464

"clusters": [ # [Beta] Information for all clusters.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1465

{ # Message containing the information about one cluster.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1466

"centroidId": "A String", # Centroid id.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1467

"count": "A String", # Count of training data rows that were assigned to this cluster.

1468

"featureValues": [ # Values of highly variant features for this cluster.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1469

{ # Representative value of a single feature within the cluster.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1470

"featureColumn": "A String", # The feature column name.

1471

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

1472

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1473

# more than ten categories, we return top ten (by count) and return

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1474

# one more CategoryCount with category "_OTHER_" and count as

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1475

# aggregate counts of remaining categories.

1476

{ # Represents the count of a single category within the cluster.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1477

"category": "A String", # The name of category.

1478

"count": "A String", # The count of training samples matching the category within the

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

# cluster.

},

],

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1483

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

1484

# feature.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1485

},

1486

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1487

},

1488

],

1489

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1490

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

1491

"positiveLabel": "A String", # Label representing the positive class.

1492

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

1493

{ # Confusion matrix for binary classification models.

1494

"f1Score": 3.14, # The equally weighted average of recall and precision.

1495

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

1496

# labels.

1497

"accuracy": 3.14, # The fraction of predictions given the correct label.

1498

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

1499

"truePositives": "A String", # Number of true samples predicted as true.

1500

"recall": 3.14, # The fraction of actual positive labels that were given a positive

1501

# prediction.

1502

"falseNegatives": "A String", # Number of false samples predicted as false.

1503

"trueNegatives": "A String", # Number of true samples predicted as false.

1504

"falsePositives": "A String", # Number of false samples predicted as true.

1505

},

1506

],

1507

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1508

# models, the metrics are either macro-averaged or micro-averaged. When

1509

# macro-averaged, the metrics are calculated for each label and then an

1510

# unweighted average is taken of those values. When micro-averaged, the

1511

# metric is calculated globally by counting the total number of correctly

1512

# predicted rows.

1513

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1514

# positive prediction. For multiclass this is a macro-averaged metric.

1515

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1516

# classification models this is the positive class threshold.

1517

# For multi-class classfication models this is the confidence

1518

# threshold.

1519

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1520

# metric.

1521

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1522

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1523

# this is a macro-averaged metric.

1524

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1525

# positive actual labels. For multiclass this is a macro-averaged

1526

# metric treating each class as a binary classifier.

1527

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1528

# multiclass this is a micro-averaged metric.

1529

},

1530

"negativeLabel": "A String", # Label representing the negative class.

1531

},

1532

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

1533

# factorization models.

1534

# factorization models.

1535

"medianAbsoluteError": 3.14, # Median absolute error.

1536

"meanSquaredLogError": 3.14, # Mean squared log error.

1537

"meanAbsoluteError": 3.14, # Mean absolute error.

1538

"meanSquaredError": 3.14, # Mean squared error.

1539

"rSquared": 3.14, # R^2 score.

1540

},

1541

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

1542

# models.

1543

# feedback_type=implicit.

1544

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

1545

# then averages all the precisions across all the users.

1546

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

1547

# predicted confidence by comparing it to an ideal rank measured by the

1548

# original ratings.

1549

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

1550

# from the predicted confidence and dividing it by the original rank.

1551

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

1552

# recommendation models except instead of computing the rating directly,

1553

# the output from evaluate is computed against a preference which is 1 or 0

1554

# depending on if the rating exists or not.

1555

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1556

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1557

"trainingOptions": { # Options that were used for this training run, includes

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1558

# user specified and default options that were used.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1559

"inputLabelColumns": [ # Name of input label columns in training data.

1560

"A String",

1561

],

1562

"warmStart": True or False, # Whether to train a model from the last checkpoint.

1563

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

1564

"numFactors": "A String", # Num factors specified for matrix factorization models.

1565

"lossType": "A String", # Type of loss function used during training run.

1566

"hiddenUnits": [ # Hidden units for dnn models.

1567

"A String",

1568

],

1569

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

1570

"l1Regularization": 3.14, # L1 regularization coefficient.

1571

"distanceType": "A String", # Distance type for clustering models.

1572

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

1573

# specified.

1574

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

1575

# factorization.

1576

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

1577

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1578

# feature.

1579

# 1. When data_split_method is CUSTOM, the corresponding column should

1580

# be boolean. The rows with true value tag are eval data, and the false

1581

# are training data.

1582

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

1583

# rows (from smallest to largest) in the corresponding column are used

1584

# as training data, and the rest are eval data. It respects the order

1585

# in Orderable data types:

1586

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1587

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

1588

# training algorithms.

1589

"userColumn": "A String", # User column specified for matrix factorization models.

1590

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

1591

"l2Regularization": 3.14, # L2 regularization coefficient.

1592

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1593

# applicable for imported models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1594

"batchSize": "A String", # Batch size for dnn models.

1595

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

1596

# less than 'min_relative_progress'. Used only for iterative training

1597

# algorithms.

1598

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

1599

# when kmeans_initialization_method is CUSTOM.

1600

"numClusters": "A String", # Number of clusters for clustering models.

1601

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

1602

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1603

"dropout": 3.14, # Dropout probability for dnn models.

1604

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

1605

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

1606

# training data. Only applicable for classification models.

1607

"a_key": 3.14,

1608

},

1609

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

1610

# overfitting for boosted tree models.

1611

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

1612

# any more (compared to min_relative_progress). Used only for iterative

1613

# training algorithms.

1614

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

1615

# of data will be used as training data. The format should be double.

1616

# Accurate to two decimal places.

1617

# Default value is 0.2.

1618

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

1619

# strategy.

1620

"itemColumn": "A String", # Item column specified for matrix factorization models.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1621

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1622

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

1623

# actually split.

1624

# data tables that were used to train the model.

1625

"trainingTable": { # Table reference of the training data after split.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1626

"projectId": "A String", # [Required] The ID of the project containing this table.

1627

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1628

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1629

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1630

"evaluationTable": { # Table reference of the evaluation data after split.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1631

"projectId": "A String", # [Required] The ID of the project containing this table.

1632

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1633

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1634

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1635

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1636

},

1637

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1638

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1639

"projectId": "A String", # [Required] The ID of the project containing this model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1640

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

1641

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1642

},

1643

"description": "A String", # Optional. A user-friendly description of this model.

1644

"etag": "A String", # Output only. A hash of this resource.

1645

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

1646

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1647

# encryption configuration of the model data while stored in BigQuery

1648

# storage. This field can be used with PatchModel to update encryption key

1649

# for an already encrypted model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1650

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1651

},

Bu Sun Kim