Blame - docs/dyn/bigquery_v2.models.html - platform/external/python/google-api-python-client

2020-07-22 17:02:09 -0700

[diff] [blame]

238

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

239

# models.

240

# feedback_type=implicit.

241

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

242

# predicted confidence by comparing it to an ideal rank measured by the

243

# original ratings.

244

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

245

# from the predicted confidence and dividing it by the original rank.

246

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

247

# recommendation models except instead of computing the rating directly,

248

# the output from evaluate is computed against a preference which is 1 or 0

249

# depending on if the rating exists or not.

250

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

251

# then averages all the precisions across all the users.

252

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

253

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

254

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

255

# models, the metrics are either macro-averaged or micro-averaged. When

256

# macro-averaged, the metrics are calculated for each label and then an

257

# unweighted average is taken of those values. When micro-averaged, the

258

# metric is calculated globally by counting the total number of correctly

259

# predicted rows.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

260

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

261

# classification models this is the positive class threshold.

262

# For multi-class classfication models this is the confidence

263

# threshold.

264

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

265

# metric.

266

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

267

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

268

# this is a macro-averaged metric.

269

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

270

# positive actual labels. For multiclass this is a macro-averaged

271

# metric treating each class as a binary classifier.

272

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

273

# multiclass this is a micro-averaged metric.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

274

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

275

# positive prediction. For multiclass this is a macro-averaged metric.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

276

},

277

"confusionMatrixList": [ # Confusion matrix at different thresholds.

278

{ # Confusion matrix for multi-class classification models.

279

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

280

# confusion matrix.

281

"rows": [ # One row per actual label.

282

{ # A single row in the confusion matrix.

283

"entries": [ # Info describing predicted label distribution.

284

{ # A single entry in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

285

"itemCount": "A String", # Number of items being predicted as this label.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

286

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

287

# also add an entry indicating the number of items under the

288

# confidence threshold.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

289

},

290

],

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

291

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

],

},

],

},

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

298

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

299

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

300

"clusters": [ # [Beta] Information for all clusters.

301

{ # Message containing the information about one cluster.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

302

"centroidId": "A String", # Centroid id.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

303

"count": "A String", # Count of training data rows that were assigned to this cluster.

304

"featureValues": [ # Values of highly variant features for this cluster.

305

{ # Representative value of a single feature within the cluster.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

306

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

307

# feature.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

308

"featureColumn": "A String", # The feature column name.

309

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

310

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

311

# more than ten categories, we return top ten (by count) and return

312

# one more CategoryCount with category "_OTHER_" and count as

313

# aggregate counts of remaining categories.

314

{ # Represents the count of a single category within the cluster.

315

"category": "A String", # The name of category.

316

"count": "A String", # The count of training samples matching the category within the

317

# cluster.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

318

},

319

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

320

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

321

},

322

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

323

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

324

],

325

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

326

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

327

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

328

{ # Confusion matrix for binary classification models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

329

"recall": 3.14, # The fraction of actual positive labels that were given a positive

330

# prediction.

331

"falseNegatives": "A String", # Number of false samples predicted as false.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

332

"falsePositives": "A String", # Number of false samples predicted as true.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

333

"trueNegatives": "A String", # Number of true samples predicted as false.

334

"f1Score": 3.14, # The equally weighted average of recall and precision.

335

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

336

# labels.

337

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

338

"accuracy": 3.14, # The fraction of predictions given the correct label.

339

"truePositives": "A String", # Number of true samples predicted as true.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

340

},

341

],

342

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

343

# models, the metrics are either macro-averaged or micro-averaged. When

344

# macro-averaged, the metrics are calculated for each label and then an

345

# unweighted average is taken of those values. When micro-averaged, the

346

# metric is calculated globally by counting the total number of correctly

347

# predicted rows.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

348

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

349

# classification models this is the positive class threshold.

350

# For multi-class classfication models this is the confidence

351

# threshold.

352

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

353

# metric.

354

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

355

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

356

# this is a macro-averaged metric.

357

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

358

# positive actual labels. For multiclass this is a macro-averaged

359

# metric treating each class as a binary classifier.

360

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

361

# multiclass this is a micro-averaged metric.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

362

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

363

# positive prediction. For multiclass this is a macro-averaged metric.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

364

},

365

"negativeLabel": "A String", # Label representing the negative class.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

366

"positiveLabel": "A String", # Label representing the positive class.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

367

},

368

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

369

# factorization models.

370

# factorization models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

371

"meanSquaredError": 3.14, # Mean squared error.

372

"rSquared": 3.14, # R^2 score.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

373

"medianAbsoluteError": 3.14, # Median absolute error.

374

"meanSquaredLogError": 3.14, # Mean squared log error.

375

"meanAbsoluteError": 3.14, # Mean absolute error.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

376

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

377

},

378

"trainingOptions": { # Options that were used for this training run, includes

379

# user specified and default options that were used.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

380

"dropout": 3.14, # Dropout probability for dnn models.

381

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

382

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

383

# training data. Only applicable for classification models.

384

"a_key": 3.14,

385

},

386

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

387

# overfitting for boosted tree models.

388

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

389

# any more (compared to min_relative_progress). Used only for iterative

390

# training algorithms.

391

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

392

# of data will be used as training data. The format should be double.

393

# Accurate to two decimal places.

394

# Default value is 0.2.

395

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

396

# strategy.

397

"itemColumn": "A String", # Item column specified for matrix factorization models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

398

"inputLabelColumns": [ # Name of input label columns in training data.

399

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

400

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

401

"warmStart": True or False, # Whether to train a model from the last checkpoint.

402

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

403

"numFactors": "A String", # Num factors specified for matrix factorization models.

404

"lossType": "A String", # Type of loss function used during training run.

405

"hiddenUnits": [ # Hidden units for dnn models.

406

"A String",

407

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

408

"l1Regularization": 3.14, # L1 regularization coefficient.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

409

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

410

"distanceType": "A String", # Distance type for clustering models.

411

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

412

# specified.

413

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

414

# factorization.

415

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

416

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

417

# feature.

418

# 1. When data_split_method is CUSTOM, the corresponding column should

419

# be boolean. The rows with true value tag are eval data, and the false

420

# are training data.

421

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

422

# rows (from smallest to largest) in the corresponding column are used

423

# as training data, and the rest are eval data. It respects the order

424

# in Orderable data types:

425

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

426

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

427

# training algorithms.

428

"userColumn": "A String", # User column specified for matrix factorization models.

429

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

430

"preserveInputStructs": True or False, # Whether to preserve the input structs in output feature names.

431

# Suppose there is a struct A with field b.

432

# When false (default), the output feature name is A_b.

433

# When true, the output feature name is A.b.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

434

"l2Regularization": 3.14, # L2 regularization coefficient.

435

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

436

# applicable for imported models.

437

"batchSize": "A String", # Batch size for dnn models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

438

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

439

# when kmeans_initialization_method is CUSTOM.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

440

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

441

# less than 'min_relative_progress'. Used only for iterative training

442

# algorithms.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

443

"numClusters": "A String", # Number of clusters for clustering models.

444

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

445

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

446

},

447

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

448

# actually split.

449

# data tables that were used to train the model.

450

"trainingTable": { # Table reference of the training data after split.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

451

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

452

"projectId": "A String", # [Required] The ID of the project containing this table.

453

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

454

},

455

"evaluationTable": { # Table reference of the evaluation data after split.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

456

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

457

"projectId": "A String", # [Required] The ID of the project containing this table.

458

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

459

},

460

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

461

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

462

],

463

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

464

"projectId": "A String", # [Required] The ID of the project containing this model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

465

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

466

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

467

},

468

"description": "A String", # Optional. A user-friendly description of this model.

469

"etag": "A String", # Output only. A hash of this resource.

470

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

471

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

472

# encryption configuration of the model data while stored in BigQuery

473

# storage. This field can be used with PatchModel to update encryption key

474

# for an already encrypted model.

475

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

476

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

477

"location": "A String", # Output only. The geographic location where the model resides. This value

478

# is inherited from the dataset.

479

"friendlyName": "A String", # Optional. A descriptive name for this model.

480

"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.

481

"labels": { # The labels associated with this model. You can use these to organize

482

# and group your models. Label keys and values can be no longer

483

# than 63 characters, can only contain lowercase letters, numeric

484

# characters, underscores and dashes. International characters are allowed.

485

# Label values are optional. Label keys must start with a letter and each

486

# label in the list must have a different key.

487

"a_key": "A String",

488

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

489

}</pre>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

</div>

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

493

<code class="details" id="list">list(projectId, datasetId, pageToken=None, maxResults=None)</code>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

494

<pre>Lists all models in the specified dataset. Requires the READER dataset

495

role.

496

497

Args:

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

498

projectId: string, Required. Project ID of the models to list. (required)

499

datasetId: string, Required. Dataset ID of the models to list. (required)

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

500

pageToken: string, Page token, returned by a previous call to request the next page of

501

results

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

502

maxResults: integer, The maximum number of results to return in a single response page.

503

Leverage the page tokens to iterate through the entire collection.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

504

505

Returns:

506

An object of the form:

507

508

{

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

509

"models": [ # Models in the requested dataset. Only the following fields are populated:

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

510

# model_reference, model_type, creation_time, last_modified_time and

511

# labels.

512

{

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

513

"modelType": "A String", # Output only. Type of the model resource.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

514

"labelColumns": [ # Output only. Label columns that were used to train this model.

515

# The output of the model will have a "predicted_" prefix to these columns.

516

{ # A field or a column.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

517

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

518

# specified (e.g., CREATE FUNCTION statement can omit the return type;

519

# in this case the output parameter does not have this "type" field).

520

# Examples:

521

# INT64: {type_kind="INT64"}

522

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

523

# STRUCT<x STRING, y ARRAY<DATE>>:

524

# {type_kind="STRUCT",

525

# struct_type={fields=[

526

# {name="x", type={type_kind="STRING"}},

527

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

528

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

529

"typeKind": "A String", # Required. The top level type of this field.

530

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

531

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

532

"fields": [

533

# Object with schema name: StandardSqlField

534

],

535

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

536

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

537

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

538

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

539

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

540

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

541

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

542

{ # A field or a column.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

543

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

544

# specified (e.g., CREATE FUNCTION statement can omit the return type;

545

# in this case the output parameter does not have this "type" field).

546

# Examples:

547

# INT64: {type_kind="INT64"}

548

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

549

# STRUCT<x STRING, y ARRAY<DATE>>:

550

# {type_kind="STRUCT",

551

# struct_type={fields=[

552

# {name="x", type={type_kind="STRING"}},

553

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

554

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

555

"typeKind": "A String", # Required. The top level type of this field.

556

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

557

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

558

"fields": [

559

# Object with schema name: StandardSqlField

560

],

561

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

562

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

563

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

564

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

565

},

566

],

567

"expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.

568

# If not present, the model will persist indefinitely. Expired models

569

# will be deleted and their storage reclaimed. The defaultTableExpirationMs

570

# property of the encapsulating dataset can be used to set a default

571

# expirationTime on newly created models.

572

"trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.

573

{ # Information about a single training query run for the model.

574

"startTime": "A String", # The start time of this training run.

575

"results": [ # Output of each iteration run, results.size() <= max_iterations.

576

{ # Information about a single iteration of the training run.

577

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

578

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

579

"index": 42, # Index of the iteration, 0 based.

580

"learnRate": 3.14, # Learn rate used for this iteration.

581

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

582

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

583

# refactoring if we want to use model-specific iteration results.

584

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

585

# fitted in auto-arima. For non-auto-arima model, its size is one.

586

{ # Arima model information.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

587

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

588

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

589

3.14,

590

],

591

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

592

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

3.14,

],

},

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

597

# when d is not 1.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

598

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

599

# for one time series.

600

"A String",

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

601

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

602

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

603

"d": "A String", # Order of the differencing part.

604

"p": "A String", # Order of the autoregressive part.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

605

"q": "A String", # Order of the moving-average part.

606

},

607

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

608

"aic": 3.14, # AIC.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

609

"logLikelihood": 3.14, # Log-likelihood.

610

"variance": 3.14, # Variance.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

611

},

612

"timeSeriesId": "A String", # The id to indicate different time series.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

613

},

614

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

615

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

616

# one time series.

617

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

618

],

619

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

620

"clusterInfos": [ # Information about top clusters for clustering models.

621

{ # Information about a single cluster for clustering model.

622

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

623

# to each point assigned to the cluster.

624

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

625

"centroidId": "A String", # Centroid id.

626

},

627

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

628

},

629

],

630

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

631

# end of training.

632

# data or just the eval data based on whether eval data was used during

633

# training. These are not present for imported models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

634

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

635

# models.

636

# feedback_type=implicit.

637

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

638

# predicted confidence by comparing it to an ideal rank measured by the

639

# original ratings.

640

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

641

# from the predicted confidence and dividing it by the original rank.

642

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

643

# recommendation models except instead of computing the rating directly,

644

# the output from evaluate is computed against a preference which is 1 or 0

645

# depending on if the rating exists or not.

646

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

647

# then averages all the precisions across all the users.

648

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

649

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

650

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

651

# models, the metrics are either macro-averaged or micro-averaged. When

652

# macro-averaged, the metrics are calculated for each label and then an

653

# unweighted average is taken of those values. When micro-averaged, the

654

# metric is calculated globally by counting the total number of correctly

655

# predicted rows.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

656

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

657

# classification models this is the positive class threshold.

658

# For multi-class classfication models this is the confidence

659

# threshold.

660

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

661

# metric.

662

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

663

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

664

# this is a macro-averaged metric.

665

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

666

# positive actual labels. For multiclass this is a macro-averaged

667

# metric treating each class as a binary classifier.

668

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

669

# multiclass this is a micro-averaged metric.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

670

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

671

# positive prediction. For multiclass this is a macro-averaged metric.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

672

},

673

"confusionMatrixList": [ # Confusion matrix at different thresholds.

674

{ # Confusion matrix for multi-class classification models.

675

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

676

# confusion matrix.

677

"rows": [ # One row per actual label.

678

{ # A single row in the confusion matrix.

679

"entries": [ # Info describing predicted label distribution.

680

{ # A single entry in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

681

"itemCount": "A String", # Number of items being predicted as this label.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

682

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

683

# also add an entry indicating the number of items under the

684

# confidence threshold.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

685

},

686

],

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

687

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

],

},

],

},

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

694

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

695

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

696

"clusters": [ # [Beta] Information for all clusters.

697

{ # Message containing the information about one cluster.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

698

"centroidId": "A String", # Centroid id.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

699

"count": "A String", # Count of training data rows that were assigned to this cluster.

700

"featureValues": [ # Values of highly variant features for this cluster.

701

{ # Representative value of a single feature within the cluster.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

702

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

703

# feature.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

704

"featureColumn": "A String", # The feature column name.

705

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

706

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

707

# more than ten categories, we return top ten (by count) and return

708

# one more CategoryCount with category "_OTHER_" and count as

709

# aggregate counts of remaining categories.

710

{ # Represents the count of a single category within the cluster.

711

"category": "A String", # The name of category.

712

"count": "A String", # The count of training samples matching the category within the

713

# cluster.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

714

},

715

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

716

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

717

},

718

],

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

719

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

720

],

721

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

722

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

723

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

724

{ # Confusion matrix for binary classification models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

725

"recall": 3.14, # The fraction of actual positive labels that were given a positive

726

# prediction.

727

"falseNegatives": "A String", # Number of false samples predicted as false.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

728

"falsePositives": "A String", # Number of false samples predicted as true.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

729

"trueNegatives": "A String", # Number of true samples predicted as false.

730

"f1Score": 3.14, # The equally weighted average of recall and precision.

731

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

732

# labels.

733

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

734

"accuracy": 3.14, # The fraction of predictions given the correct label.

735

"truePositives": "A String", # Number of true samples predicted as true.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

736

},

737

],

738

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

739

# models, the metrics are either macro-averaged or micro-averaged. When

740

# macro-averaged, the metrics are calculated for each label and then an

741

# unweighted average is taken of those values. When micro-averaged, the

742

# metric is calculated globally by counting the total number of correctly

743

# predicted rows.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

744

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

745

# classification models this is the positive class threshold.

746

# For multi-class classfication models this is the confidence

747

# threshold.

748

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

749

# metric.

750

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

751

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

752

# this is a macro-averaged metric.

753

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

754

# positive actual labels. For multiclass this is a macro-averaged

755

# metric treating each class as a binary classifier.

756

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

757

# multiclass this is a micro-averaged metric.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

758

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

759

# positive prediction. For multiclass this is a macro-averaged metric.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

760

},

761

"negativeLabel": "A String", # Label representing the negative class.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

762

"positiveLabel": "A String", # Label representing the positive class.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

763

},

764

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

765

# factorization models.

766

# factorization models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

767

"meanSquaredError": 3.14, # Mean squared error.

768

"rSquared": 3.14, # R^2 score.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

769

"medianAbsoluteError": 3.14, # Median absolute error.

770

"meanSquaredLogError": 3.14, # Mean squared log error.

771

"meanAbsoluteError": 3.14, # Mean absolute error.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

772

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

773

},

774

"trainingOptions": { # Options that were used for this training run, includes

775

# user specified and default options that were used.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

776

"dropout": 3.14, # Dropout probability for dnn models.

777

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

778

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

779

# training data. Only applicable for classification models.

780

"a_key": 3.14,

781

},

782

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

783

# overfitting for boosted tree models.

784

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

785

# any more (compared to min_relative_progress). Used only for iterative

786

# training algorithms.

787

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

788

# of data will be used as training data. The format should be double.

789

# Accurate to two decimal places.

790

# Default value is 0.2.

791

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

792

# strategy.

793

"itemColumn": "A String", # Item column specified for matrix factorization models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

794

"inputLabelColumns": [ # Name of input label columns in training data.

795

"A String",

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

796

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

797

"warmStart": True or False, # Whether to train a model from the last checkpoint.

798

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

799

"numFactors": "A String", # Num factors specified for matrix factorization models.

800

"lossType": "A String", # Type of loss function used during training run.

801

"hiddenUnits": [ # Hidden units for dnn models.

802

"A String",

803

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

804

"l1Regularization": 3.14, # L1 regularization coefficient.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

805

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

806

"distanceType": "A String", # Distance type for clustering models.

807

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

808

# specified.

809

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

810

# factorization.

811

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

812

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

813

# feature.

814

# 1. When data_split_method is CUSTOM, the corresponding column should

815

# be boolean. The rows with true value tag are eval data, and the false

816

# are training data.

817

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

818

# rows (from smallest to largest) in the corresponding column are used

819

# as training data, and the rest are eval data. It respects the order

820

# in Orderable data types:

821

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

822

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

823

# training algorithms.

824

"userColumn": "A String", # User column specified for matrix factorization models.

825

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

826

"preserveInputStructs": True or False, # Whether to preserve the input structs in output feature names.

827

# Suppose there is a struct A with field b.

828

# When false (default), the output feature name is A_b.

829

# When true, the output feature name is A.b.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

830

"l2Regularization": 3.14, # L2 regularization coefficient.

831

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

832

# applicable for imported models.

833

"batchSize": "A String", # Batch size for dnn models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

834

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

835

# when kmeans_initialization_method is CUSTOM.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

836

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

837

# less than 'min_relative_progress'. Used only for iterative training

838

# algorithms.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

839

"numClusters": "A String", # Number of clusters for clustering models.

840

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

841

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

842

},

843

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

844

# actually split.

845

# data tables that were used to train the model.

846

"trainingTable": { # Table reference of the training data after split.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

847

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

848

"projectId": "A String", # [Required] The ID of the project containing this table.

849

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

850

},

851

"evaluationTable": { # Table reference of the evaluation data after split.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

852

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

853

"projectId": "A String", # [Required] The ID of the project containing this table.

854

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

855

},

856

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

857

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

858

],

859

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

860

"projectId": "A String", # [Required] The ID of the project containing this model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

861

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

862

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

863

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

864

"description": "A String", # Optional. A user-friendly description of this model.

865

"etag": "A String", # Output only. A hash of this resource.

866

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

867

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

868

# encryption configuration of the model data while stored in BigQuery

869

# storage. This field can be used with PatchModel to update encryption key

870

# for an already encrypted model.

871

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

872

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

873

"location": "A String", # Output only. The geographic location where the model resides. This value

874

# is inherited from the dataset.

875

"friendlyName": "A String", # Optional. A descriptive name for this model.

876

"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.

877

"labels": { # The labels associated with this model. You can use these to organize

878

# and group your models. Label keys and values can be no longer

879

# than 63 characters, can only contain lowercase letters, numeric

880

# characters, underscores and dashes. International characters are allowed.

881

# Label values are optional. Label keys must start with a letter and each

882

# label in the list must have a different key.

883

"a_key": "A String",

884

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

885

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

886

],

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

887

"nextPageToken": "A String", # A token to request the next page of results.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

}</pre>

</div>

<code class="details" id="list_next">list_next(previous_request, previous_response)</code>

893

<pre>Retrieves the next page of results.

894

895

Args:

896

previous_request: The request for the previous page. (required)

897

previous_response: The response from the request for the previous page. (required)

898

899

Returns:

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

900

A request object that you can call 'execute()' on to request the next

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

901

page. Returns None if there are no more items in the collection.

</pre>

</div>

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

906

<code class="details" id="patch">patch(projectId, datasetId, modelId, body=None)</code>

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

907

<pre>Patch specific fields in the specified model.

908

909

Args:

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

910

projectId: string, Required. Project ID of the model to patch. (required)

911

datasetId: string, Required. Dataset ID of the model to patch. (required)

912

modelId: string, Required. Model ID of the model to patch. (required)

913

body: object, The request body.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

914

The object takes the form of:

915

916

{

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

917

"modelType": "A String", # Output only. Type of the model resource.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

918

"labelColumns": [ # Output only. Label columns that were used to train this model.

919

# The output of the model will have a "predicted_" prefix to these columns.

920

{ # A field or a column.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

921

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

922

# specified (e.g., CREATE FUNCTION statement can omit the return type;

923

# in this case the output parameter does not have this "type" field).

924

# Examples:

925

# INT64: {type_kind="INT64"}

926

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

927

# STRUCT<x STRING, y ARRAY<DATE>>:

928

# {type_kind="STRUCT",

929

# struct_type={fields=[

930

# {name="x", type={type_kind="STRING"}},

931

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

932

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

933

"typeKind": "A String", # Required. The top level type of this field.

934

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

935

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

936

"fields": [

937

# Object with schema name: StandardSqlField

938

],

939

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

940

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

941

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

942

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

943

},

944

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

945

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

946

{ # A field or a column.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

947

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

948

# specified (e.g., CREATE FUNCTION statement can omit the return type;

949

# in this case the output parameter does not have this "type" field).

950

# Examples:

951

# INT64: {type_kind="INT64"}

952

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

953

# STRUCT<x STRING, y ARRAY<DATE>>:

954

# {type_kind="STRUCT",

955

# struct_type={fields=[

956

# {name="x", type={type_kind="STRING"}},

957

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

958

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

959

"typeKind": "A String", # Required. The top level type of this field.

960

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

961

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

962

"fields": [

963

# Object with schema name: StandardSqlField

964

],

965

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

966

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

967

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

968

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

969

},

970

],

971

"expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.

972

# If not present, the model will persist indefinitely. Expired models

973

# will be deleted and their storage reclaimed. The defaultTableExpirationMs

974

# property of the encapsulating dataset can be used to set a default

975

# expirationTime on newly created models.

976

"trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.

977

{ # Information about a single training query run for the model.

978

"startTime": "A String", # The start time of this training run.

979

"results": [ # Output of each iteration run, results.size() <= max_iterations.

980

{ # Information about a single iteration of the training run.

981

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

982

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

983

"index": 42, # Index of the iteration, 0 based.

984

"learnRate": 3.14, # Learn rate used for this iteration.

985

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

986

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

987

# refactoring if we want to use model-specific iteration results.

988

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

989

# fitted in auto-arima. For non-auto-arima model, its size is one.

990

{ # Arima model information.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

991

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

992

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

993

3.14,

994

],

995

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

996

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

3.14,

],

},

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

1001

# when d is not 1.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1002

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

1003

# for one time series.

1004

"A String",

1005

],

1006

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1007

"d": "A String", # Order of the differencing part.

1008

"p": "A String", # Order of the autoregressive part.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1009

"q": "A String", # Order of the moving-average part.

1010

},

1011

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1012

"aic": 3.14, # AIC.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1013

"logLikelihood": 3.14, # Log-likelihood.

1014

"variance": 3.14, # Variance.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1015

},

1016

"timeSeriesId": "A String", # The id to indicate different time series.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1017

},

1018

],

1019

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

# one time series.

"A String",

],

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1024

"clusterInfos": [ # Information about top clusters for clustering models.

1025

{ # Information about a single cluster for clustering model.

1026

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

1027

# to each point assigned to the cluster.

1028

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

1029

"centroidId": "A String", # Centroid id.

1030

},

1031

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1032

},

1033

],

1034

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

1035

# end of training.

1036

# data or just the eval data based on whether eval data was used during

1037

# training. These are not present for imported models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1038

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

1039

# models.

1040

# feedback_type=implicit.

1041

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

1042

# predicted confidence by comparing it to an ideal rank measured by the

1043

# original ratings.

1044

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

1045

# from the predicted confidence and dividing it by the original rank.

1046

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

1047

# recommendation models except instead of computing the rating directly,

1048

# the output from evaluate is computed against a preference which is 1 or 0

1049

# depending on if the rating exists or not.

1050

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

1051

# then averages all the precisions across all the users.

1052

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1053

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

1054

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1055

# models, the metrics are either macro-averaged or micro-averaged. When

1056

# macro-averaged, the metrics are calculated for each label and then an

1057

# unweighted average is taken of those values. When micro-averaged, the

1058

# metric is calculated globally by counting the total number of correctly

1059

# predicted rows.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1060

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1061

# classification models this is the positive class threshold.

1062

# For multi-class classfication models this is the confidence

1063

# threshold.

1064

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1065

# metric.

1066

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1067

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1068

# this is a macro-averaged metric.

1069

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1070

# positive actual labels. For multiclass this is a macro-averaged

1071

# metric treating each class as a binary classifier.

1072

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1073

# multiclass this is a micro-averaged metric.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1074

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1075

# positive prediction. For multiclass this is a macro-averaged metric.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1076

},

1077

"confusionMatrixList": [ # Confusion matrix at different thresholds.

1078

{ # Confusion matrix for multi-class classification models.

1079

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

1080

# confusion matrix.

1081

"rows": [ # One row per actual label.

1082

{ # A single row in the confusion matrix.

1083

"entries": [ # Info describing predicted label distribution.

1084

{ # A single entry in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1085

"itemCount": "A String", # Number of items being predicted as this label.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1086

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

1087

# also add an entry indicating the number of items under the

1088

# confidence threshold.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1089

},

1090

],

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1091

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

],

},

],

},

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

1098

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

1099

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

1100

"clusters": [ # [Beta] Information for all clusters.

1101

{ # Message containing the information about one cluster.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1102

"centroidId": "A String", # Centroid id.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1103

"count": "A String", # Count of training data rows that were assigned to this cluster.

1104

"featureValues": [ # Values of highly variant features for this cluster.

1105

{ # Representative value of a single feature within the cluster.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1106

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

1107

# feature.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1108

"featureColumn": "A String", # The feature column name.

1109

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

1110

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

1111

# more than ten categories, we return top ten (by count) and return

1112

# one more CategoryCount with category "_OTHER_" and count as

1113

# aggregate counts of remaining categories.

1114

{ # Represents the count of a single category within the cluster.

1115

"category": "A String", # The name of category.

1116

"count": "A String", # The count of training samples matching the category within the

# cluster.

},

],

},

},

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1123

},

1124

],

1125

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1126

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1127

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

1128

{ # Confusion matrix for binary classification models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1129

"recall": 3.14, # The fraction of actual positive labels that were given a positive

1130

# prediction.

1131

"falseNegatives": "A String", # Number of false samples predicted as false.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1132

"falsePositives": "A String", # Number of false samples predicted as true.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1133

"trueNegatives": "A String", # Number of true samples predicted as false.

1134

"f1Score": 3.14, # The equally weighted average of recall and precision.

1135

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

1136

# labels.

1137

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

1138

"accuracy": 3.14, # The fraction of predictions given the correct label.

1139

"truePositives": "A String", # Number of true samples predicted as true.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1140

},

1141

],

1142

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1143

# models, the metrics are either macro-averaged or micro-averaged. When

1144

# macro-averaged, the metrics are calculated for each label and then an

1145

# unweighted average is taken of those values. When micro-averaged, the

1146

# metric is calculated globally by counting the total number of correctly

1147

# predicted rows.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1148

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1149

# classification models this is the positive class threshold.

1150

# For multi-class classfication models this is the confidence

1151

# threshold.

1152

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1153

# metric.

1154

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1155

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1156

# this is a macro-averaged metric.

1157

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1158

# positive actual labels. For multiclass this is a macro-averaged

1159

# metric treating each class as a binary classifier.

1160

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1161

# multiclass this is a micro-averaged metric.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1162

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1163

# positive prediction. For multiclass this is a macro-averaged metric.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1164

},

1165

"negativeLabel": "A String", # Label representing the negative class.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1166

"positiveLabel": "A String", # Label representing the positive class.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1167

},

1168

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

1169

# factorization models.

1170

# factorization models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1171

"meanSquaredError": 3.14, # Mean squared error.

1172

"rSquared": 3.14, # R^2 score.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1173

"medianAbsoluteError": 3.14, # Median absolute error.

1174

"meanSquaredLogError": 3.14, # Mean squared log error.

1175

"meanAbsoluteError": 3.14, # Mean absolute error.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1176

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1177

},

1178

"trainingOptions": { # Options that were used for this training run, includes

1179

# user specified and default options that were used.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1180

"dropout": 3.14, # Dropout probability for dnn models.

1181

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

1182

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

1183

# training data. Only applicable for classification models.

1184

"a_key": 3.14,

1185

},

1186

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

1187

# overfitting for boosted tree models.

1188

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

1189

# any more (compared to min_relative_progress). Used only for iterative

1190

# training algorithms.

1191

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

1192

# of data will be used as training data. The format should be double.

1193

# Accurate to two decimal places.

1194

# Default value is 0.2.

1195

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

1196

# strategy.

1197

"itemColumn": "A String", # Item column specified for matrix factorization models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1198

"inputLabelColumns": [ # Name of input label columns in training data.

1199

"A String",

1200

],

1201

"warmStart": True or False, # Whether to train a model from the last checkpoint.

1202

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

1203

"numFactors": "A String", # Num factors specified for matrix factorization models.

1204

"lossType": "A String", # Type of loss function used during training run.

1205

"hiddenUnits": [ # Hidden units for dnn models.

1206

"A String",

1207

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1208

"l1Regularization": 3.14, # L1 regularization coefficient.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1209

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1210

"distanceType": "A String", # Distance type for clustering models.

1211

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

1212

# specified.

1213

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

1214

# factorization.

1215

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

1216

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

1217

# feature.

1218

# 1. When data_split_method is CUSTOM, the corresponding column should

1219

# be boolean. The rows with true value tag are eval data, and the false

1220

# are training data.

1221

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

1222

# rows (from smallest to largest) in the corresponding column are used

1223

# as training data, and the rest are eval data. It respects the order

1224

# in Orderable data types:

1225

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

1226

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

1227

# training algorithms.

1228

"userColumn": "A String", # User column specified for matrix factorization models.

1229

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1230

"preserveInputStructs": True or False, # Whether to preserve the input structs in output feature names.

1231

# Suppose there is a struct A with field b.

1232

# When false (default), the output feature name is A_b.

1233

# When true, the output feature name is A.b.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1234

"l2Regularization": 3.14, # L2 regularization coefficient.

1235

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

1236

# applicable for imported models.

1237

"batchSize": "A String", # Batch size for dnn models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1238

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

1239

# when kmeans_initialization_method is CUSTOM.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1240

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

1241

# less than 'min_relative_progress'. Used only for iterative training

1242

# algorithms.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1243

"numClusters": "A String", # Number of clusters for clustering models.

1244

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

1245

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

1246

},

1247

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

1248

# actually split.

1249

# data tables that were used to train the model.

1250

"trainingTable": { # Table reference of the training data after split.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1251

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1252

"projectId": "A String", # [Required] The ID of the project containing this table.

1253

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

1254

},

1255

"evaluationTable": { # Table reference of the evaluation data after split.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1256

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1257

"projectId": "A String", # [Required] The ID of the project containing this table.

1258

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

},

},

},

],

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1264

"projectId": "A String", # [Required] The ID of the project containing this model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1265

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

1266

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1267

},

1268

"description": "A String", # Optional. A user-friendly description of this model.

1269

"etag": "A String", # Output only. A hash of this resource.

1270

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

1271

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

1272

# encryption configuration of the model data while stored in BigQuery

1273

# storage. This field can be used with PatchModel to update encryption key

1274

# for an already encrypted model.

1275

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

1276

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1277

"location": "A String", # Output only. The geographic location where the model resides. This value

1278

# is inherited from the dataset.

1279

"friendlyName": "A String", # Optional. A descriptive name for this model.

1280

"lastModifiedTime": "A String", # Output only. The time when this model was last modified, in millisecs since the epoch.

1281

"labels": { # The labels associated with this model. You can use these to organize

1282

# and group your models. Label keys and values can be no longer

1283

# than 63 characters, can only contain lowercase letters, numeric

1284

# characters, underscores and dashes. International characters are allowed.

1285

# Label values are optional. Label keys must start with a letter and each

1286

# label in the list must have a different key.

1287

"a_key": "A String",

1288

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

}

Returns:

An object of the form:

1294

1295

{

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1296

"modelType": "A String", # Output only. Type of the model resource.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1297

"labelColumns": [ # Output only. Label columns that were used to train this model.

1298

# The output of the model will have a "predicted_" prefix to these columns.

1299

{ # A field or a column.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1300

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

1301

# specified (e.g., CREATE FUNCTION statement can omit the return type;

1302

# in this case the output parameter does not have this "type" field).

1303

# Examples:

1304

# INT64: {type_kind="INT64"}

1305

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

1306

# STRUCT<x STRING, y ARRAY<DATE>>:

1307

# {type_kind="STRUCT",

1308

# struct_type={fields=[

1309

# {name="x", type={type_kind="STRING"}},

1310

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

1311

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1312

"typeKind": "A String", # Required. The top level type of this field.

1313

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1314

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

1315

"fields": [

1316

# Object with schema name: StandardSqlField

1317

],

1318

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1319

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1320

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1321

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1322

},

1323

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1324

"featureColumns": [ # Output only. Input feature columns that were used to train this model.

1325

{ # A field or a column.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1326

"type": { # The type of a variable, e.g., a function argument. # Optional. The type of this parameter. Absent if not explicitly

1327

# specified (e.g., CREATE FUNCTION statement can omit the return type;

1328

# in this case the output parameter does not have this "type" field).

1329

# Examples:

1330

# INT64: {type_kind="INT64"}

1331

# ARRAY<STRING>: {type_kind="ARRAY", array_element_type="STRING"}

1332

# STRUCT<x STRING, y ARRAY<DATE>>:

1333

# {type_kind="STRUCT",

1334

# struct_type={fields=[

1335

# {name="x", type={type_kind="STRING"}},

1336

# {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}

1337

# ]}}

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1338

"typeKind": "A String", # Required. The top level type of this field.

1339

# Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1340

"structType": { # The fields of this struct, in order, if type_kind = "STRUCT".

1341

"fields": [

1342

# Object with schema name: StandardSqlField

1343

],

1344

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1345

"arrayElementType": # Object with schema name: StandardSqlDataType # The type of the array's elements, if type_kind = "ARRAY".

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1346

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1347

"name": "A String", # Optional. The name of this field. Can be absent for struct fields.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1348

},

1349

],

1350

"expirationTime": "A String", # Optional. The time when this model expires, in milliseconds since the epoch.

1351

# If not present, the model will persist indefinitely. Expired models

1352

# will be deleted and their storage reclaimed. The defaultTableExpirationMs

1353

# property of the encapsulating dataset can be used to set a default

1354

# expirationTime on newly created models.

1355

"trainingRuns": [ # Output only. Information for all training runs in increasing order of start_time.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1356

{ # Information about a single training query run for the model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1357

"startTime": "A String", # The start time of this training run.

1358

"results": [ # Output of each iteration run, results.size() <= max_iterations.

1359

{ # Information about a single iteration of the training run.

1360

"trainingLoss": 3.14, # Loss computed on the training data at the end of iteration.

1361

"evalLoss": 3.14, # Loss computed on the eval data at the end of iteration.

1362

"index": 42, # Index of the iteration, 0 based.

1363

"learnRate": 3.14, # Learn rate used for this iteration.

1364

"durationMs": "A String", # Time taken to run the iteration in milliseconds.

1365

"arimaResult": { # (Auto-)arima fitting result. Wrap everything in ArimaResult for easier

1366

# refactoring if we want to use model-specific iteration results.

1367

"arimaModelInfo": [ # This message is repeated because there are multiple arima models

1368

# fitted in auto-arima. For non-auto-arima model, its size is one.

1369

{ # Arima model information.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1370

"arimaCoefficients": { # Arima coefficients. # Arima coefficients.

1371

"autoRegressiveCoefficients": [ # Auto-regressive coefficients, an array of double.

1372

3.14,

1373

],

1374

"interceptCoefficient": 3.14, # Intercept coefficient, just a double not an array.

1375

"movingAverageCoefficients": [ # Moving-average coefficients, an array of double.

3.14,

],

},

"hasDrift": True or False, # Whether Arima model fitted with drift or not. It is always false

1380

# when d is not 1.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1381

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported

1382

# for one time series.

1383

"A String",

1384

],

1385

"nonSeasonalOrder": { # Arima order, can be used for both non-seasonal and seasonal parts. # Non-seasonal order.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1386

"d": "A String", # Order of the differencing part.

1387

"p": "A String", # Order of the autoregressive part.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1388

"q": "A String", # Order of the moving-average part.

1389

},

1390

"arimaFittingMetrics": { # ARIMA model fitting metrics. # Arima fitting metrics.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1391

"aic": 3.14, # AIC.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1392

"logLikelihood": 3.14, # Log-likelihood.

1393

"variance": 3.14, # Variance.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1394

},

1395

"timeSeriesId": "A String", # The id to indicate different time series.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1396

},

1397

],

1398

"seasonalPeriods": [ # Seasonal periods. Repeated because multiple periods are supported for

# one time series.

"A String",

],

},

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1403

"clusterInfos": [ # Information about top clusters for clustering models.

1404

{ # Information about a single cluster for clustering model.

1405

"clusterRadius": 3.14, # Cluster radius, the average distance from centroid

1406

# to each point assigned to the cluster.

1407

"clusterSize": "A String", # Cluster size, the total number of points assigned to the cluster.

1408

"centroidId": "A String", # Centroid id.

1409

},

1410

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1411

},

1412

],

1413

"evaluationMetrics": { # Evaluation metrics of a model. These are either computed on all training # The evaluation metrics over training/eval data that were computed at the

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1414

# end of training.

1415

# data or just the eval data based on whether eval data was used during

1416

# training. These are not present for imported models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1417

"rankingMetrics": { # Evaluation metrics used by weighted-ALS models specified by # [Alpha] Populated for implicit feedback type matrix factorization

1418

# models.

1419

# feedback_type=implicit.

1420

"normalizedDiscountedCumulativeGain": 3.14, # A metric to determine the goodness of a ranking calculated from the

1421

# predicted confidence by comparing it to an ideal rank measured by the

1422

# original ratings.

1423

"averageRank": 3.14, # Determines the goodness of a ranking by computing the percentile rank

1424

# from the predicted confidence and dividing it by the original rank.

1425

"meanSquaredError": 3.14, # Similar to the mean squared error computed in regression and explicit

1426

# recommendation models except instead of computing the rating directly,

1427

# the output from evaluate is computed against a preference which is 1 or 0

1428

# depending on if the rating exists or not.

1429

"meanAveragePrecision": 3.14, # Calculates a precision per user for all the items by ranking them and

1430

# then averages all the precisions across all the users.

1431

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1432

"multiClassClassificationMetrics": { # Evaluation metrics for multi-class classification/classifier models. # Populated for multi-class classification/classifier models.

1433

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1434

# models, the metrics are either macro-averaged or micro-averaged. When

1435

# macro-averaged, the metrics are calculated for each label and then an

1436

# unweighted average is taken of those values. When micro-averaged, the

1437

# metric is calculated globally by counting the total number of correctly

1438

# predicted rows.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1439

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1440

# classification models this is the positive class threshold.

1441

# For multi-class classfication models this is the confidence

1442

# threshold.

1443

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1444

# metric.

1445

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1446

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1447

# this is a macro-averaged metric.

1448

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1449

# positive actual labels. For multiclass this is a macro-averaged

1450

# metric treating each class as a binary classifier.

1451

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1452

# multiclass this is a micro-averaged metric.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1453

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1454

# positive prediction. For multiclass this is a macro-averaged metric.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1455

},

1456

"confusionMatrixList": [ # Confusion matrix at different thresholds.

1457

{ # Confusion matrix for multi-class classification models.

1458

"confidenceThreshold": 3.14, # Confidence threshold used when computing the entries of the

1459

# confusion matrix.

1460

"rows": [ # One row per actual label.

1461

{ # A single row in the confusion matrix.

1462

"entries": [ # Info describing predicted label distribution.

1463

{ # A single entry in the confusion matrix.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1464

"itemCount": "A String", # Number of items being predicted as this label.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1465

"predictedLabel": "A String", # The predicted label. For confidence_threshold > 0, we will

1466

# also add an entry indicating the number of items under the

1467

# confidence threshold.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1468

},

1469

],

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1470

"actualLabel": "A String", # The original label of this row.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

},

],

},

],

},

"clusteringMetrics": { # Evaluation metrics for clustering models. # Populated for clustering models.

1477

"meanSquaredDistance": 3.14, # Mean of squared distances between each sample to its cluster centroid.

1478

"daviesBouldinIndex": 3.14, # Davies-Bouldin index.

1479

"clusters": [ # [Beta] Information for all clusters.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1480

{ # Message containing the information about one cluster.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1481

"centroidId": "A String", # Centroid id.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1482

"count": "A String", # Count of training data rows that were assigned to this cluster.

1483

"featureValues": [ # Values of highly variant features for this cluster.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1484

{ # Representative value of a single feature within the cluster.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1485

"numericalValue": 3.14, # The numerical feature value. This is the centroid value for this

1486

# feature.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1487

"featureColumn": "A String", # The feature column name.

1488

"categoricalValue": { # Representative value of a categorical feature. # The categorical feature value.

1489

"categoryCounts": [ # Counts of all categories for the categorical feature. If there are

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1490

# more than ten categories, we return top ten (by count) and return

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1491

# one more CategoryCount with category "_OTHER_" and count as

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1492

# aggregate counts of remaining categories.

1493

{ # Represents the count of a single category within the cluster.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1494

"category": "A String", # The name of category.

1495

"count": "A String", # The count of training samples matching the category within the

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

# cluster.

},

],

},

},

],

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1502

},

1503

],

1504

},

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1505

"binaryClassificationMetrics": { # Evaluation metrics for binary classification/classifier models. # Populated for binary classification/classifier models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1506

"binaryConfusionMatrixList": [ # Binary confusion matrix at multiple thresholds.

1507

{ # Confusion matrix for binary classification models.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1508

"recall": 3.14, # The fraction of actual positive labels that were given a positive

1509

# prediction.

1510

"falseNegatives": "A String", # Number of false samples predicted as false.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1511

"falsePositives": "A String", # Number of false samples predicted as true.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1512

"trueNegatives": "A String", # Number of true samples predicted as false.

1513

"f1Score": 3.14, # The equally weighted average of recall and precision.

1514

"precision": 3.14, # The fraction of actual positive predictions that had positive actual

1515

# labels.

1516

"positiveClassThreshold": 3.14, # Threshold value used when computing each of the following metric.

1517

"accuracy": 3.14, # The fraction of predictions given the correct label.

1518

"truePositives": "A String", # Number of true samples predicted as true.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1519

},

1520

],

1521

"aggregateClassificationMetrics": { # Aggregate metrics for classification/classifier models. For multi-class # Aggregate classification metrics.

1522

# models, the metrics are either macro-averaged or micro-averaged. When

1523

# macro-averaged, the metrics are calculated for each label and then an

1524

# unweighted average is taken of those values. When micro-averaged, the

1525

# metric is calculated globally by counting the total number of correctly

1526

# predicted rows.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1527

"threshold": 3.14, # Threshold at which the metrics are computed. For binary

1528

# classification models this is the positive class threshold.

1529

# For multi-class classfication models this is the confidence

1530

# threshold.

1531

"rocAuc": 3.14, # Area Under a ROC Curve. For multiclass this is a macro-averaged

1532

# metric.

1533

"logLoss": 3.14, # Logarithmic Loss. For multiclass this is a macro-averaged metric.

1534

"f1Score": 3.14, # The F1 score is an average of recall and precision. For multiclass

1535

# this is a macro-averaged metric.

1536

"precision": 3.14, # Precision is the fraction of actual positive predictions that had

1537

# positive actual labels. For multiclass this is a macro-averaged

1538

# metric treating each class as a binary classifier.

1539

"accuracy": 3.14, # Accuracy is the fraction of predictions given the correct label. For

1540

# multiclass this is a micro-averaged metric.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1541

"recall": 3.14, # Recall is the fraction of actual positive labels that were given a

1542

# positive prediction. For multiclass this is a macro-averaged metric.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1543

},

1544

"negativeLabel": "A String", # Label representing the negative class.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1545

"positiveLabel": "A String", # Label representing the positive class.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1546

},

1547

"regressionMetrics": { # Evaluation metrics for regression and explicit feedback type matrix # Populated for regression models and explicit feedback type matrix

1548

# factorization models.

1549

# factorization models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1550

"meanSquaredError": 3.14, # Mean squared error.

1551

"rSquared": 3.14, # R^2 score.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1552

"medianAbsoluteError": 3.14, # Median absolute error.

1553

"meanSquaredLogError": 3.14, # Mean squared log error.

1554

"meanAbsoluteError": 3.14, # Mean absolute error.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1555

},

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1556

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1557

"trainingOptions": { # Options that were used for this training run, includes

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1558

# user specified and default options that were used.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1559

"dropout": 3.14, # Dropout probability for dnn models.

1560

"learnRate": 3.14, # Learning rate in training. Used only for iterative training algorithms.

1561

"labelClassWeights": { # Weights associated with each label class, for rebalancing the

1562

# training data. Only applicable for classification models.

1563

"a_key": 3.14,

1564

},

1565

"subsample": 3.14, # Subsample fraction of the training data to grow tree to prevent

1566

# overfitting for boosted tree models.

1567

"earlyStop": True or False, # Whether to stop early when the loss doesn't improve significantly

1568

# any more (compared to min_relative_progress). Used only for iterative

1569

# training algorithms.

1570

"dataSplitEvalFraction": 3.14, # The fraction of evaluation data over the whole input data. The rest

1571

# of data will be used as training data. The format should be double.

1572

# Accurate to two decimal places.

1573

# Default value is 0.2.

1574

"initialLearnRate": 3.14, # Specifies the initial learning rate for the line search learn rate

1575

# strategy.

1576

"itemColumn": "A String", # Item column specified for matrix factorization models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1577

"inputLabelColumns": [ # Name of input label columns in training data.

1578

"A String",

1579

],

1580

"warmStart": True or False, # Whether to train a model from the last checkpoint.

1581

"learnRateStrategy": "A String", # The strategy to determine learn rate for the current iteration.

1582

"numFactors": "A String", # Num factors specified for matrix factorization models.

1583

"lossType": "A String", # Type of loss function used during training run.

1584

"hiddenUnits": [ # Hidden units for dnn models.

1585

"A String",

1586

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1587

"l1Regularization": 3.14, # L1 regularization coefficient.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1588

"kmeansInitializationMethod": "A String", # The method used to initialize the centroids for kmeans algorithm.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1589

"distanceType": "A String", # Distance type for clustering models.

1590

"walsAlpha": 3.14, # Hyperparameter for matrix factoration when implicit feedback type is

1591

# specified.

1592

"feedbackType": "A String", # Feedback type that specifies which algorithm to run for matrix

1593

# factorization.

1594

"optimizationStrategy": "A String", # Optimization strategy for training linear regression models.

1595

"dataSplitColumn": "A String", # The column to split data with. This column won't be used as a

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1596

# feature.

1597

# 1. When data_split_method is CUSTOM, the corresponding column should

1598

# be boolean. The rows with true value tag are eval data, and the false

1599

# are training data.

1600

# 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION

1601

# rows (from smallest to largest) in the corresponding column are used

1602

# as training data, and the rest are eval data. It respects the order

1603

# in Orderable data types:

1604

# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1605

"maxIterations": "A String", # The maximum number of iterations in training. Used only for iterative

1606

# training algorithms.

1607

"userColumn": "A String", # User column specified for matrix factorization models.

1608

"maxTreeDepth": "A String", # Maximum depth of a tree for boosted tree models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1609

"preserveInputStructs": True or False, # Whether to preserve the input structs in output feature names.

1610

# Suppose there is a struct A with field b.

1611

# When false (default), the output feature name is A_b.

1612

# When true, the output feature name is A.b.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1613

"l2Regularization": 3.14, # L2 regularization coefficient.

1614

"modelUri": "A String", # [Beta] Google Cloud Storage URI from which the model was imported. Only

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1615

# applicable for imported models.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1616

"batchSize": "A String", # Batch size for dnn models.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1617

"kmeansInitializationColumn": "A String", # The column used to provide the initial centroids for kmeans algorithm

1618

# when kmeans_initialization_method is CUSTOM.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1619

"minRelativeProgress": 3.14, # When early_stop is true, stops training when accuracy improvement is

1620

# less than 'min_relative_progress'. Used only for iterative training

1621

# algorithms.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1622

"numClusters": "A String", # Number of clusters for clustering models.

1623

"dataSplitMethod": "A String", # The data split type for training and evaluation, e.g. RANDOM.

1624

"minSplitLoss": 3.14, # Minimum split loss for boosted tree models.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1625

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1626

"dataSplitResult": { # Data split result. This contains references to the training and evaluation # Data split result of the training run. Only set when the input data is

1627

# actually split.

1628

# data tables that were used to train the model.

1629

"trainingTable": { # Table reference of the training data after split.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1630

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1631

"projectId": "A String", # [Required] The ID of the project containing this table.

1632

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Bu Sun Kim

2019-06-14 16:50:42 -0700

[diff] [blame]

1633

},

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1634

"evaluationTable": { # Table reference of the evaluation data after split.

Bu Sun Kim

2020-07-22 17:02:09 -0700

[diff] [blame]

1635

"tableId": "A String", # [Required] The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1636

"projectId": "A String", # [Required] The ID of the project containing this table.

1637

"datasetId": "A String", # [Required] The ID of the dataset containing this table.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1638

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1639

},

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1640

},

1641

],

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1642

"modelReference": { # Required. Unique identifier for this model.

Bu Sun Kim

2020-05-27 12:20:54 -0700

[diff] [blame]

1643

"projectId": "A String", # [Required] The ID of the project containing this model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1644

"datasetId": "A String", # [Required] The ID of the dataset containing this model.

1645

"modelId": "A String", # [Required] The ID of the model. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum length is 1,024 characters.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1646

},

1647

"description": "A String", # Optional. A user-friendly description of this model.

1648

"etag": "A String", # Output only. A hash of this resource.

1649

"creationTime": "A String", # Output only. The time when this model was created, in millisecs since the epoch.

1650

"encryptionConfiguration": { # Custom encryption configuration (e.g., Cloud KMS keys). This shows the

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1651

# encryption configuration of the model data while stored in BigQuery

1652

# storage. This field can be used with PatchModel to update encryption key

1653

# for an already encrypted model.

Bu Sun Kim

2020-05-20 12:08:20 -0700

[diff] [blame]

1654

"kmsKeyName": "A String", # [Optional] Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key.

Dan O'Meara

2020-05-01 07:42:23 -0700

[diff] [blame]

1655

},

Bu Sun Kim