blob: f70a5ea74e33b093f8b2788dd623d1233eec589b [file] [log] [blame]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001#!/usr/bin/env python
2#
3# Copyright 2016 - The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070016"""A client that manages Google Compute Engine.
17
18** ComputeClient **
19
20ComputeClient is a wrapper around Google Compute Engine APIs.
21It provides a set of methods for managing a google compute engine project,
22such as creating images, creating instances, etc.
23
24Design philosophy: We tried to make ComputeClient as stateless as possible,
25and it only keeps states about authentication. ComputeClient should be very
26generic, and only knows how to talk to Compute Engine APIs.
27"""
Kevin Cheng5c124ec2018-05-16 13:28:51 -070028# pylint: disable=too-many-lines
Kevin Cheng86d43c72018-08-30 10:59:14 -070029import collections
Fang Dengcef4b112017-03-02 11:20:17 -080030import copy
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070031import functools
32import logging
33import os
34
Sam Chiu7de3b232018-12-06 19:45:52 +080035from acloud import errors
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070036from acloud.internal.lib import base_cloud_client
37from acloud.internal.lib import utils
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070038
39logger = logging.getLogger(__name__)
40
Kevin Chengb5963882018-05-09 00:06:27 -070041_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10
cylan64af2db2019-01-17 15:13:59 +080042_METADATA_KEY = "key"
43_METADATA_KEY_VALUE = "value"
44_SSH_KEYS_NAME = "sshKeys"
45_ITEMS = "items"
46_METADATA = "metadata"
Kevin Chengb5963882018-05-09 00:06:27 -070047
Kevin Cheng5c124ec2018-05-16 13:28:51 -070048BASE_DISK_ARGS = {
49 "type": "PERSISTENT",
50 "boot": True,
51 "mode": "READ_WRITE",
52 "autoDelete": True,
53 "initializeParams": {},
54}
55
Kevin Cheng86d43c72018-08-30 10:59:14 -070056IP = collections.namedtuple("IP", ["external", "internal"])
57
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070058
59class OperationScope(object):
60 """Represents operation scope enum."""
61 ZONE = "zone"
62 REGION = "region"
63 GLOBAL = "global"
64
65
Kevin Chengb5963882018-05-09 00:06:27 -070066class PersistentDiskType(object):
67 """Represents different persistent disk types.
68
69 pd-standard for regular hard disk.
70 pd-ssd for solid state disk.
71 """
72 STANDARD = "pd-standard"
73 SSD = "pd-ssd"
74
75
76class ImageStatus(object):
77 """Represents the status of an image."""
78 PENDING = "PENDING"
79 READY = "READY"
80 FAILED = "FAILED"
81
82
83def _IsFingerPrintError(exc):
84 """Determine if the exception is a HTTP error with code 412.
85
86 Args:
87 exc: Exception instance.
88
89 Returns:
90 Boolean. True if the exception is a "Precondition Failed" error.
91 """
92 return isinstance(exc, errors.HttpError) and exc.code == 412
93
94
Kevin Cheng5c124ec2018-05-16 13:28:51 -070095# pylint: disable=too-many-public-methods
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070096class ComputeClient(base_cloud_client.BaseCloudApiClient):
97 """Client that manages GCE."""
98
99 # API settings, used by BaseCloudApiClient.
100 API_NAME = "compute"
101 API_VERSION = "v1"
herbertxue308f7662018-05-18 03:25:58 +0000102 SCOPE = " ".join([
103 "https://www.googleapis.com/auth/compute",
104 "https://www.googleapis.com/auth/devstorage.read_write"
105 ])
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700106 # Default settings for gce operations
107 DEFAULT_INSTANCE_SCOPE = [
108 "https://www.googleapis.com/auth/devstorage.read_only",
109 "https://www.googleapis.com/auth/logging.write"
110 ]
Kevin Chengb5963882018-05-09 00:06:27 -0700111 OPERATION_TIMEOUT_SECS = 30 * 60 # 30 mins
112 OPERATION_POLL_INTERVAL_SECS = 20
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700113 MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"]
Fang Dengcef4b112017-03-02 11:20:17 -0800114 ACCESS_DENIED_CODE = 403
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700115
116 def __init__(self, acloud_config, oauth2_credentials):
117 """Initialize.
118
119 Args:
120 acloud_config: An AcloudConfig object.
121 oauth2_credentials: An oauth2client.OAuth2Credentials instance.
122 """
123 super(ComputeClient, self).__init__(oauth2_credentials)
124 self._project = acloud_config.project
125
126 def _GetOperationStatus(self, operation, operation_scope, scope_name=None):
127 """Get status of an operation.
128
129 Args:
130 operation: An Operation resource in the format of json.
131 operation_scope: A value from OperationScope, "zone", "region",
132 or "global".
133 scope_name: If operation_scope is "zone" or "region", this should be
134 the name of the zone or region, e.g. "us-central1-f".
135
136 Returns:
137 Status of the operation, one of "DONE", "PENDING", "RUNNING".
138
139 Raises:
140 errors.DriverError: if the operation fails.
141 """
142 operation_name = operation["name"]
143 if operation_scope == OperationScope.GLOBAL:
herbertxue308f7662018-05-18 03:25:58 +0000144 api = self.service.globalOperations().get(
145 project=self._project, operation=operation_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700146 result = self.Execute(api)
147 elif operation_scope == OperationScope.ZONE:
herbertxue308f7662018-05-18 03:25:58 +0000148 api = self.service.zoneOperations().get(
149 project=self._project,
150 operation=operation_name,
151 zone=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700152 result = self.Execute(api)
153 elif operation_scope == OperationScope.REGION:
herbertxue308f7662018-05-18 03:25:58 +0000154 api = self.service.regionOperations().get(
155 project=self._project,
156 operation=operation_name,
157 region=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700158 result = self.Execute(api)
159
160 if result.get("error"):
161 errors_list = result["error"]["errors"]
herbertxue308f7662018-05-18 03:25:58 +0000162 raise errors.DriverError(
163 "Get operation state failed, errors: %s" % str(errors_list))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700164 return result["status"]
165
166 def WaitOnOperation(self, operation, operation_scope, scope_name=None):
167 """Wait for an operation to finish.
168
169 Args:
170 operation: An Operation resource in the format of json.
171 operation_scope: A value from OperationScope, "zone", "region",
172 or "global".
173 scope_name: If operation_scope is "zone" or "region", this should be
174 the name of the zone or region, e.g. "us-central1-f".
175 """
176 timeout_exception = errors.GceOperationTimeoutError(
177 "Operation hits timeout, did not complete within %d secs." %
178 self.OPERATION_TIMEOUT_SECS)
179 utils.PollAndWait(
180 func=self._GetOperationStatus,
181 expected_return="DONE",
182 timeout_exception=timeout_exception,
183 timeout_secs=self.OPERATION_TIMEOUT_SECS,
184 sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS,
185 operation=operation,
186 operation_scope=operation_scope,
187 scope_name=scope_name)
188
189 def GetProject(self):
190 """Get project information.
191
192 Returns:
193 A project resource in json.
194 """
195 api = self.service.projects().get(project=self._project)
196 return self.Execute(api)
197
198 def GetDisk(self, disk_name, zone):
199 """Get disk information.
200
201 Args:
202 disk_name: A string.
203 zone: String, name of zone.
204
205 Returns:
206 An disk resource in json.
207 https://cloud.google.com/compute/docs/reference/latest/disks#resource
208 """
herbertxue308f7662018-05-18 03:25:58 +0000209 api = self.service.disks().get(
210 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700211 return self.Execute(api)
212
213 def CheckDiskExists(self, disk_name, zone):
214 """Check if disk exists.
215
216 Args:
217 disk_name: A string
218 zone: String, name of zone.
219
220 Returns:
221 True if disk exists, otherwise False.
222 """
223 try:
224 self.GetDisk(disk_name, zone)
225 exists = True
226 except errors.ResourceNotFoundError:
227 exists = False
228 logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name,
229 exists)
230 return exists
231
herbertxue308f7662018-05-18 03:25:58 +0000232 def CreateDisk(self,
233 disk_name,
234 source_image,
235 size_gb,
236 zone,
237 source_project=None,
238 disk_type=PersistentDiskType.STANDARD):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700239 """Create a gce disk.
240
241 Args:
herbertxue308f7662018-05-18 03:25:58 +0000242 disk_name: String
243 source_image: String, name of the image.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700244 size_gb: Integer, size in gb.
herbertxue308f7662018-05-18 03:25:58 +0000245 zone: String, name of the zone, e.g. us-central1-b.
Kevin Chengb5963882018-05-09 00:06:27 -0700246 source_project: String, required if the image is located in a different
247 project.
248 disk_type: String, a value from PersistentDiskType, STANDARD
249 for regular hard disk or SSD for solid state disk.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700250 """
Kevin Chengb5963882018-05-09 00:06:27 -0700251 source_project = source_project or self._project
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700252 source_image = "projects/%s/global/images/%s" % (
Kevin Chengb5963882018-05-09 00:06:27 -0700253 source_project, source_image) if source_image else None
254 logger.info("Creating disk %s, size_gb: %d, source_image: %s",
255 disk_name, size_gb, str(source_image))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700256 body = {
257 "name": disk_name,
258 "sizeGb": size_gb,
herbertxue308f7662018-05-18 03:25:58 +0000259 "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone,
260 disk_type),
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700261 }
herbertxue308f7662018-05-18 03:25:58 +0000262 api = self.service.disks().insert(
263 project=self._project,
264 sourceImage=source_image,
265 zone=zone,
266 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700267 operation = self.Execute(api)
268 try:
herbertxue308f7662018-05-18 03:25:58 +0000269 self.WaitOnOperation(
270 operation=operation,
271 operation_scope=OperationScope.ZONE,
272 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700273 except errors.DriverError:
274 logger.error("Creating disk failed, cleaning up: %s", disk_name)
275 if self.CheckDiskExists(disk_name, zone):
276 self.DeleteDisk(disk_name, zone)
277 raise
278 logger.info("Disk %s has been created.", disk_name)
279
280 def DeleteDisk(self, disk_name, zone):
281 """Delete a gce disk.
282
283 Args:
284 disk_name: A string, name of disk.
285 zone: A string, name of zone.
286 """
287 logger.info("Deleting disk %s", disk_name)
herbertxue308f7662018-05-18 03:25:58 +0000288 api = self.service.disks().delete(
289 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700290 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000291 self.WaitOnOperation(
292 operation=operation,
293 operation_scope=OperationScope.ZONE,
294 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700295 logger.info("Deleted disk %s", disk_name)
296
297 def DeleteDisks(self, disk_names, zone):
298 """Delete multiple disks.
299
300 Args:
301 disk_names: A list of disk names.
302 zone: A string, name of zone.
303
304 Returns:
305 A tuple, (deleted, failed, error_msgs)
306 deleted: A list of names of disks that have been deleted.
307 failed: A list of names of disks that we fail to delete.
308 error_msgs: A list of failure messages.
309 """
310 if not disk_names:
311 logger.warn("Nothing to delete. Arg disk_names is not provided.")
312 return [], [], []
313 # Batch send deletion requests.
314 logger.info("Deleting disks: %s", disk_names)
315 delete_requests = {}
316 for disk_name in set(disk_names):
herbertxue308f7662018-05-18 03:25:58 +0000317 request = self.service.disks().delete(
318 project=self._project, disk=disk_name, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700319 delete_requests[disk_name] = request
herbertxue308f7662018-05-18 03:25:58 +0000320 return self._BatchExecuteAndWait(
321 delete_requests, OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700322
323 def ListDisks(self, zone, disk_filter=None):
324 """List disks.
325
326 Args:
327 zone: A string, representing zone name. e.g. "us-central1-f"
328 disk_filter: A string representing a filter in format of
329 FIELD_NAME COMPARISON_STRING LITERAL_STRING
330 e.g. "name ne example-instance"
331 e.g. "name eq "example-instance-[0-9]+""
332
333 Returns:
334 A list of disks.
335 """
herbertxue308f7662018-05-18 03:25:58 +0000336 return self.ListWithMultiPages(
337 api_resource=self.service.disks().list,
338 project=self._project,
339 zone=zone,
340 filter=disk_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700341
herbertxue308f7662018-05-18 03:25:58 +0000342 def CreateImage(self,
343 image_name,
344 source_uri=None,
345 source_disk=None,
Kevin Chengb5963882018-05-09 00:06:27 -0700346 labels=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700347 """Create a Gce image.
348
349 Args:
herbertxue308f7662018-05-18 03:25:58 +0000350 image_name: String, name of image
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700351 source_uri: Full Google Cloud Storage URL where the disk image is
Kevin Chengb5963882018-05-09 00:06:27 -0700352 stored. e.g. "https://storage.googleapis.com/my-bucket/
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700353 avd-system-2243663.tar.gz"
Kevin Chengb5963882018-05-09 00:06:27 -0700354 source_disk: String, this should be the disk's selfLink value
355 (including zone and project), rather than the disk_name
356 e.g. https://www.googleapis.com/compute/v1/projects/
357 google.com:android-builds-project/zones/
358 us-east1-d/disks/<disk_name>
359 labels: Dict, will be added to the image's labels.
360
Kevin Chengb5963882018-05-09 00:06:27 -0700361 Raises:
362 errors.DriverError: For malformed request or response.
363 errors.GceOperationTimeoutError: Operation takes too long to finish.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700364 """
herbertxue308f7662018-05-18 03:25:58 +0000365 if self.CheckImageExists(image_name):
366 return
367 if (source_uri and source_disk) or (not source_uri
368 and not source_disk):
Kevin Chengb5963882018-05-09 00:06:27 -0700369 raise errors.DriverError(
370 "Creating image %s requires either source_uri %s or "
herbertxue308f7662018-05-18 03:25:58 +0000371 "source_disk %s but not both" % (image_name, source_uri,
372 source_disk))
Kevin Chengb5963882018-05-09 00:06:27 -0700373 elif source_uri:
herbertxue308f7662018-05-18 03:25:58 +0000374 logger.info("Creating image %s, source_uri %s", image_name,
375 source_uri)
Kevin Chengb5963882018-05-09 00:06:27 -0700376 body = {
377 "name": image_name,
378 "rawDisk": {
379 "source": source_uri,
380 },
381 }
382 else:
herbertxue308f7662018-05-18 03:25:58 +0000383 logger.info("Creating image %s, source_disk %s", image_name,
384 source_disk)
Kevin Chengb5963882018-05-09 00:06:27 -0700385 body = {
386 "name": image_name,
387 "sourceDisk": source_disk,
388 }
389 if labels is not None:
390 body["labels"] = labels
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700391 api = self.service.images().insert(project=self._project, body=body)
392 operation = self.Execute(api)
393 try:
herbertxue308f7662018-05-18 03:25:58 +0000394 self.WaitOnOperation(
395 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700396 except errors.DriverError:
397 logger.error("Creating image failed, cleaning up: %s", image_name)
398 if self.CheckImageExists(image_name):
399 self.DeleteImage(image_name)
400 raise
401 logger.info("Image %s has been created.", image_name)
402
Kevin Chengb5963882018-05-09 00:06:27 -0700403 @utils.RetryOnException(_IsFingerPrintError,
404 _MAX_RETRIES_ON_FINGERPRINT_CONFLICT)
405 def SetImageLabels(self, image_name, new_labels):
406 """Update image's labels. Retry for finger print conflict.
407
408 Note: Decorator RetryOnException will retry the call for FingerPrint
409 conflict (HTTP error code 412). The fingerprint is used to detect
410 conflicts of GCE resource updates. The fingerprint is initially generated
411 by Compute Engine and changes after every request to modify or update
412 resources (e.g. GCE "image" resource has "fingerPrint" for "labels"
413 updates).
414
415 Args:
416 image_name: A string, the image name.
417 new_labels: Dict, will be added to the image's labels.
418
419 Returns:
420 A GlobalOperation resouce.
421 https://cloud.google.com/compute/docs/reference/latest/globalOperations
422 """
423 image = self.GetImage(image_name)
424 labels = image.get("labels", {})
425 labels.update(new_labels)
426 body = {
427 "labels": labels,
428 "labelFingerprint": image["labelFingerprint"]
429 }
herbertxue308f7662018-05-18 03:25:58 +0000430 api = self.service.images().setLabels(
431 project=self._project, resource=image_name, body=body)
Kevin Chengb5963882018-05-09 00:06:27 -0700432 return self.Execute(api)
433
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700434 def CheckImageExists(self, image_name):
435 """Check if image exists.
436
437 Args:
438 image_name: A string
439
440 Returns:
441 True if image exists, otherwise False.
442 """
443 try:
444 self.GetImage(image_name)
445 exists = True
446 except errors.ResourceNotFoundError:
447 exists = False
448 logger.debug("CheckImageExists: image_name: %s, result: %s",
449 image_name, exists)
450 return exists
451
Kevin Chengb5963882018-05-09 00:06:27 -0700452 def GetImage(self, image_name, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700453 """Get image information.
454
455 Args:
456 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -0700457 image_project: A string
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700458
459 Returns:
460 An image resource in json.
461 https://cloud.google.com/compute/docs/reference/latest/images#resource
462 """
herbertxue308f7662018-05-18 03:25:58 +0000463 api = self.service.images().get(
464 project=image_project or self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700465 return self.Execute(api)
466
467 def DeleteImage(self, image_name):
468 """Delete an image.
469
470 Args:
471 image_name: A string
472 """
473 logger.info("Deleting image %s", image_name)
herbertxue308f7662018-05-18 03:25:58 +0000474 api = self.service.images().delete(
475 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700476 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000477 self.WaitOnOperation(
478 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700479 logger.info("Deleted image %s", image_name)
480
481 def DeleteImages(self, image_names):
482 """Delete multiple images.
483
484 Args:
485 image_names: A list of image names.
486
487 Returns:
488 A tuple, (deleted, failed, error_msgs)
489 deleted: A list of names of images that have been deleted.
490 failed: A list of names of images that we fail to delete.
491 error_msgs: A list of failure messages.
492 """
493 if not image_names:
494 return [], [], []
495 # Batch send deletion requests.
496 logger.info("Deleting images: %s", image_names)
497 delete_requests = {}
498 for image_name in set(image_names):
herbertxue308f7662018-05-18 03:25:58 +0000499 request = self.service.images().delete(
500 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700501 delete_requests[image_name] = request
502 return self._BatchExecuteAndWait(delete_requests,
503 OperationScope.GLOBAL)
504
Kevin Chengb5963882018-05-09 00:06:27 -0700505 def ListImages(self, image_filter=None, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700506 """List images.
507
508 Args:
509 image_filter: A string representing a filter in format of
510 FIELD_NAME COMPARISON_STRING LITERAL_STRING
511 e.g. "name ne example-image"
512 e.g. "name eq "example-image-[0-9]+""
Kevin Chengb5963882018-05-09 00:06:27 -0700513 image_project: String. If not provided, will list images from the default
514 project. Otherwise, will list images from the given
515 project, which can be any arbitrary project where the
516 account has read access
517 (i.e. has the role "roles/compute.imageUser")
518
519 Read more about image sharing across project:
520 https://cloud.google.com/compute/docs/images/sharing-images-across-projects
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700521
522 Returns:
523 A list of images.
524 """
herbertxue308f7662018-05-18 03:25:58 +0000525 return self.ListWithMultiPages(
526 api_resource=self.service.images().list,
527 project=image_project or self._project,
528 filter=image_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700529
530 def GetInstance(self, instance, zone):
531 """Get information about an instance.
532
533 Args:
534 instance: A string, representing instance name.
535 zone: A string, representing zone name. e.g. "us-central1-f"
536
537 Returns:
538 An instance resource in json.
539 https://cloud.google.com/compute/docs/reference/latest/instances#resource
540 """
herbertxue308f7662018-05-18 03:25:58 +0000541 api = self.service.instances().get(
542 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700543 return self.Execute(api)
544
Kevin Chengb5963882018-05-09 00:06:27 -0700545 def AttachAccelerator(self, instance, zone, accelerator_count,
546 accelerator_type):
547 """Attach a GPU accelerator to the instance.
548
549 Note: In order for this to succeed the following must hold:
550 - The machine schedule must be set to "terminate" i.e:
551 SetScheduling(self, instance, zone, on_host_maintenance="terminate")
552 must have been called.
553 - The machine is not starting or running. i.e.
554 StopInstance(self, instance) must have been called.
555
556 Args:
557 instance: A string, representing instance name.
558 zone: String, name of zone.
559 accelerator_count: The number accelerators to be attached to the instance.
560 a value of 0 will detach all accelerators.
561 accelerator_type: The type of accelerator to attach. e.g.
562 "nvidia-tesla-k80"
563 """
564 body = {
565 "guestAccelerators": [{
herbertxue308f7662018-05-18 03:25:58 +0000566 "acceleratorType":
567 self.GetAcceleratorUrl(accelerator_type, zone),
568 "acceleratorCount":
569 accelerator_count
Kevin Chengb5963882018-05-09 00:06:27 -0700570 }]
571 }
572 api = self.service.instances().setMachineResources(
573 project=self._project, zone=zone, instance=instance, body=body)
574 operation = self.Execute(api)
575 try:
576 self.WaitOnOperation(
577 operation=operation,
578 operation_scope=OperationScope.ZONE,
579 scope_name=zone)
580 except errors.GceOperationTimeoutError:
581 logger.error("Attach instance failed: %s", instance)
582 raise
herbertxue308f7662018-05-18 03:25:58 +0000583 logger.info("%d x %s have been attached to instance %s.",
584 accelerator_count, accelerator_type, instance)
Kevin Chengb5963882018-05-09 00:06:27 -0700585
586 def AttachDisk(self, instance, zone, **kwargs):
587 """Attach the external disk to the instance.
588
589 Args:
590 instance: A string, representing instance name.
591 zone: String, name of zone.
592 **kwargs: The attachDisk request body. See "https://cloud.google.com/
593 compute/docs/reference/latest/instances/attachDisk" for detail.
594 {
595 "kind": "compute#attachedDisk",
596 "type": string,
597 "mode": string,
598 "source": string,
599 "deviceName": string,
600 "index": integer,
601 "boot": boolean,
602 "initializeParams": {
603 "diskName": string,
604 "sourceImage": string,
605 "diskSizeGb": long,
606 "diskType": string,
607 "sourceImageEncryptionKey": {
608 "rawKey": string,
609 "sha256": string
610 }
611 },
612 "autoDelete": boolean,
613 "licenses": [
614 string
615 ],
616 "interface": string,
617 "diskEncryptionKey": {
618 "rawKey": string,
619 "sha256": string
620 }
621 }
622
623 Returns:
624 An disk resource in json.
625 https://cloud.google.com/compute/docs/reference/latest/disks#resource
626
627
628 Raises:
629 errors.GceOperationTimeoutError: Operation takes too long to finish.
630 """
631 api = self.service.instances().attachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000632 project=self._project, zone=zone, instance=instance, body=kwargs)
Kevin Chengb5963882018-05-09 00:06:27 -0700633 operation = self.Execute(api)
634 try:
635 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000636 operation=operation,
637 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700638 scope_name=zone)
639 except errors.GceOperationTimeoutError:
640 logger.error("Attach instance failed: %s", instance)
641 raise
642 logger.info("Disk has been attached to instance %s.", instance)
643
644 def DetachDisk(self, instance, zone, disk_name):
645 """Attach the external disk to the instance.
646
647 Args:
648 instance: A string, representing instance name.
649 zone: String, name of zone.
650 disk_name: A string, the name of the detach disk.
651
652 Returns:
653 A ZoneOperation resource.
654 See https://cloud.google.com/compute/docs/reference/latest/zoneOperations
655
656 Raises:
657 errors.GceOperationTimeoutError: Operation takes too long to finish.
658 """
659 api = self.service.instances().detachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000660 project=self._project,
661 zone=zone,
662 instance=instance,
Kevin Chengb5963882018-05-09 00:06:27 -0700663 deviceName=disk_name)
664 operation = self.Execute(api)
665 try:
666 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000667 operation=operation,
668 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700669 scope_name=zone)
670 except errors.GceOperationTimeoutError:
671 logger.error("Detach instance failed: %s", instance)
672 raise
673 logger.info("Disk has been detached to instance %s.", instance)
674
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700675 def StartInstance(self, instance, zone):
676 """Start |instance| in |zone|.
677
678 Args:
679 instance: A string, representing instance name.
680 zone: A string, representing zone name. e.g. "us-central1-f"
681
682 Raises:
683 errors.GceOperationTimeoutError: Operation takes too long to finish.
684 """
herbertxue308f7662018-05-18 03:25:58 +0000685 api = self.service.instances().start(
686 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700687 operation = self.Execute(api)
688 try:
herbertxue308f7662018-05-18 03:25:58 +0000689 self.WaitOnOperation(
690 operation=operation,
691 operation_scope=OperationScope.ZONE,
692 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700693 except errors.GceOperationTimeoutError:
694 logger.error("Start instance failed: %s", instance)
695 raise
696 logger.info("Instance %s has been started.", instance)
697
698 def StartInstances(self, instances, zone):
699 """Start |instances| in |zone|.
700
701 Args:
702 instances: A list of strings, representing instance names's list.
703 zone: A string, representing zone name. e.g. "us-central1-f"
704
705 Returns:
706 A tuple, (done, failed, error_msgs)
707 done: A list of string, representing the names of instances that
708 have been executed.
709 failed: A list of string, representing the names of instances that
710 we failed to execute.
711 error_msgs: A list of string, representing the failure messages.
712 """
herbertxue308f7662018-05-18 03:25:58 +0000713 action = functools.partial(
714 self.service.instances().start, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700715 return self._BatchExecuteOnInstances(instances, zone, action)
716
717 def StopInstance(self, instance, zone):
718 """Stop |instance| in |zone|.
719
720 Args:
721 instance: A string, representing instance name.
722 zone: A string, representing zone name. e.g. "us-central1-f"
723
724 Raises:
725 errors.GceOperationTimeoutError: Operation takes too long to finish.
726 """
herbertxue308f7662018-05-18 03:25:58 +0000727 api = self.service.instances().stop(
728 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700729 operation = self.Execute(api)
730 try:
herbertxue308f7662018-05-18 03:25:58 +0000731 self.WaitOnOperation(
732 operation=operation,
733 operation_scope=OperationScope.ZONE,
734 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700735 except errors.GceOperationTimeoutError:
736 logger.error("Stop instance failed: %s", instance)
737 raise
738 logger.info("Instance %s has been terminated.", instance)
739
740 def StopInstances(self, instances, zone):
741 """Stop |instances| in |zone|.
742
743 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700744 instances: A list of strings, representing instance names's list.
745 zone: A string, representing zone name. e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700746
747 Returns:
748 A tuple, (done, failed, error_msgs)
749 done: A list of string, representing the names of instances that
750 have been executed.
751 failed: A list of string, representing the names of instances that
752 we failed to execute.
753 error_msgs: A list of string, representing the failure messages.
754 """
herbertxue308f7662018-05-18 03:25:58 +0000755 action = functools.partial(
756 self.service.instances().stop, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700757 return self._BatchExecuteOnInstances(instances, zone, action)
758
759 def SetScheduling(self,
760 instance,
761 zone,
762 automatic_restart=True,
763 on_host_maintenance="MIGRATE"):
764 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
765
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700766 Args:
767 instance: A string, representing instance name.
768 zone: A string, representing zone name. e.g. "us-central1-f".
769 automatic_restart: Boolean, determine whether the instance will
770 automatically restart if it crashes or not,
771 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700772 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700773 The instance's maintenance behavior, which
774 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700775 "MIGRATE" or "TERMINATE" when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700776 a maintenance event.
777
778 Raises:
779 errors.GceOperationTimeoutError: Operation takes too long to finish.
780 """
herbertxue308f7662018-05-18 03:25:58 +0000781 body = {
782 "automaticRestart": automatic_restart,
783 "onHostMaintenance": on_host_maintenance
784 }
785 api = self.service.instances().setScheduling(
786 project=self._project, zone=zone, instance=instance, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700787 operation = self.Execute(api)
788 try:
herbertxue308f7662018-05-18 03:25:58 +0000789 self.WaitOnOperation(
790 operation=operation,
791 operation_scope=OperationScope.ZONE,
792 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700793 except errors.GceOperationTimeoutError:
794 logger.error("Set instance scheduling failed: %s", instance)
795 raise
herbertxue308f7662018-05-18 03:25:58 +0000796 logger.info(
797 "Instance scheduling changed:\n"
798 " automaticRestart: %s\n"
799 " onHostMaintenance: %s\n",
800 str(automatic_restart).lower(), on_host_maintenance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700801
802 def ListInstances(self, zone, instance_filter=None):
803 """List instances.
804
805 Args:
806 zone: A string, representing zone name. e.g. "us-central1-f"
807 instance_filter: A string representing a filter in format of
808 FIELD_NAME COMPARISON_STRING LITERAL_STRING
809 e.g. "name ne example-instance"
810 e.g. "name eq "example-instance-[0-9]+""
811
812 Returns:
813 A list of instances.
814 """
815 return self.ListWithMultiPages(
816 api_resource=self.service.instances().list,
817 project=self._project,
818 zone=zone,
819 filter=instance_filter)
820
821 def SetSchedulingInstances(self,
822 instances,
823 zone,
824 automatic_restart=True,
825 on_host_maintenance="MIGRATE"):
826 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
827
828 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option.
829
830 Args:
831 instances: A list of string, representing instance names.
832 zone: A string, representing zone name. e.g. "us-central1-f".
833 automatic_restart: Boolean, determine whether the instance will
834 automatically restart if it crashes or not,
835 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700836 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700837 The instance's maintenance behavior, which
838 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700839 migrated or terminated when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700840 a maintenance event.
841
842 Returns:
843 A tuple, (done, failed, error_msgs)
844 done: A list of string, representing the names of instances that
845 have been executed.
846 failed: A list of string, representing the names of instances that
847 we failed to execute.
848 error_msgs: A list of string, representing the failure messages.
849 """
herbertxue308f7662018-05-18 03:25:58 +0000850 body = {
851 "automaticRestart": automatic_restart,
852 "OnHostMaintenance": on_host_maintenance
853 }
854 action = functools.partial(
855 self.service.instances().setScheduling,
856 project=self._project,
857 zone=zone,
858 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700859 return self._BatchExecuteOnInstances(instances, zone, action)
860
861 def _BatchExecuteOnInstances(self, instances, zone, action):
862 """Batch processing operations requiring computing time.
863
864 Args:
865 instances: A list of instance names.
866 zone: A string, e.g. "us-central1-f".
867 action: partial func, all kwargs for this gcloud action has been
868 defined in the caller function (e.g. See "StartInstances")
869 except 'instance' which will be defined by iterating the
870 |instances|.
871
872 Returns:
873 A tuple, (done, failed, error_msgs)
874 done: A list of string, representing the names of instances that
875 have been executed.
876 failed: A list of string, representing the names of instances that
877 we failed to execute.
878 error_msgs: A list of string, representing the failure messages.
879 """
880 if not instances:
881 return [], [], []
882 # Batch send requests.
883 logger.info("Batch executing instances: %s", instances)
884 requests = {}
885 for instance_name in set(instances):
886 requests[instance_name] = action(instance=instance_name)
herbertxue308f7662018-05-18 03:25:58 +0000887 return self._BatchExecuteAndWait(
888 requests, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700889
890 def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None):
891 """Batch processing requests and wait on the operation.
892
893 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700894 requests: A dictionary. The key is a string representing the resource
895 name. For example, an instance name, or an image name.
896 operation_scope: A value from OperationScope, "zone", "region",
897 or "global".
898 scope_name: If operation_scope is "zone" or "region", this should be
899 the name of the zone or region, e.g. "us-central1-f".
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700900 Returns:
Kevin Chengb5963882018-05-09 00:06:27 -0700901 A tuple, (done, failed, error_msgs)
902 done: A list of string, representing the resource names that have
903 been executed.
904 failed: A list of string, representing resource names that
905 we failed to execute.
906 error_msgs: A list of string, representing the failure messages.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700907 """
908 results = self.BatchExecute(requests)
909 # Initialize return values
910 failed = []
911 error_msgs = []
912 for resource_name, (_, error) in results.iteritems():
913 if error is not None:
914 failed.append(resource_name)
915 error_msgs.append(str(error))
916 done = []
917 # Wait for the executing operations to finish.
918 logger.info("Waiting for executing operations")
919 for resource_name in requests.iterkeys():
920 operation, _ = results[resource_name]
921 if operation:
922 try:
923 self.WaitOnOperation(operation, operation_scope,
924 scope_name)
925 done.append(resource_name)
926 except errors.DriverError as exc:
927 failed.append(resource_name)
928 error_msgs.append(str(exc))
929 return done, failed, error_msgs
930
931 def ListZones(self):
932 """List all zone instances in the project.
933
934 Returns:
935 Gcompute response instance. For example:
936 {
937 "id": "projects/google.com%3Aandroid-build-staging/zones",
938 "kind": "compute#zoneList",
939 "selfLink": "https://www.googleapis.com/compute/v1/projects/"
940 "google.com:android-build-staging/zones"
941 "items": [
942 {
943 'creationTimestamp': '2014-07-15T10:44:08.663-07:00',
944 'description': 'asia-east1-c',
945 'id': '2222',
946 'kind': 'compute#zone',
947 'name': 'asia-east1-c',
948 'region': 'https://www.googleapis.com/compute/v1/projects/'
949 'google.com:android-build-staging/regions/asia-east1',
950 'selfLink': 'https://www.googleapis.com/compute/v1/projects/'
951 'google.com:android-build-staging/zones/asia-east1-c',
952 'status': 'UP'
953 }, {
954 'creationTimestamp': '2014-05-30T18:35:16.575-07:00',
955 'description': 'asia-east1-b',
956 'id': '2221',
957 'kind': 'compute#zone',
958 'name': 'asia-east1-b',
959 'region': 'https://www.googleapis.com/compute/v1/projects/'
960 'google.com:android-build-staging/regions/asia-east1',
961 'selfLink': 'https://www.googleapis.com/compute/v1/projects'
962 '/google.com:android-build-staging/zones/asia-east1-b',
963 'status': 'UP'
964 }]
965 }
966 See cloud cluster's api/mixer_zones.proto
967 """
968 api = self.service.zones().list(project=self._project)
969 return self.Execute(api)
970
Kevin Chengb5963882018-05-09 00:06:27 -0700971 def ListRegions(self):
972 """List all the regions for a project.
973
974 Returns:
975 A dictionary containing all the zones and additional data. See this link
976 for the detailed response:
977 https://cloud.google.com/compute/docs/reference/latest/regions/list.
978 Example:
979 {
980 'items': [{
981 'name':
982 'us-central1',
983 'quotas': [{
984 'usage': 2.0,
985 'limit': 24.0,
986 'metric': 'CPUS'
987 }, {
988 'usage': 1.0,
989 'limit': 23.0,
990 'metric': 'IN_USE_ADDRESSES'
991 }, {
992 'usage': 209.0,
993 'limit': 10240.0,
994 'metric': 'DISKS_TOTAL_GB'
995 }, {
996 'usage': 1000.0,
997 'limit': 20000.0,
998 'metric': 'INSTANCES'
999 }]
1000 },..]
1001 }
1002 """
1003 api = self.service.regions().list(project=self._project)
1004 return self.Execute(api)
1005
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001006 def _GetNetworkArgs(self, network, zone):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001007 """Helper to generate network args that is used to create an instance.
1008
1009 Args:
1010 network: A string, e.g. "default".
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001011 zone: String, representing zone name, e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001012
1013 Returns:
1014 A dictionary representing network args.
1015 """
1016 return {
1017 "network": self.GetNetworkUrl(network),
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001018 "subnetwork": self.GetSubnetworkUrl(network, zone),
herbertxue308f7662018-05-18 03:25:58 +00001019 "accessConfigs": [{
1020 "name": "External NAT",
1021 "type": "ONE_TO_ONE_NAT"
1022 }]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001023 }
1024
herbertxue308f7662018-05-18 03:25:58 +00001025 def _GetDiskArgs(self,
1026 disk_name,
1027 image_name,
1028 image_project=None,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001029 disk_size_gb=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001030 """Helper to generate disk args that is used to create an instance.
1031
1032 Args:
1033 disk_name: A string
1034 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -07001035 image_project: A string
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001036 disk_size_gb: An integer
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001037
1038 Returns:
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001039 List holding dict of disk args.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001040 """
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001041 args = copy.deepcopy(BASE_DISK_ARGS)
1042 args["initializeParams"] = {
1043 "diskName": disk_name,
herbertxue308f7662018-05-18 03:25:58 +00001044 "sourceImage": self.GetImage(image_name,
1045 image_project)["selfLink"],
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001046 }
1047 # TODO: Remove this check once it's validated that we can either pass in
1048 # a None diskSizeGb or we find an appropriate default val.
1049 if disk_size_gb:
1050 args["diskSizeGb"] = disk_size_gb
1051 return [args]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001052
herbertxue308f7662018-05-18 03:25:58 +00001053 def _GetExtraDiskArgs(self, extra_disk_name, zone):
1054 """Get extra disk arg for given disk.
1055
1056 Args:
1057 extra_disk_name: String, name of the disk.
1058 zone: String, representing zone name, e.g. "us-central1-f"
1059
1060 Returns:
1061 A dictionary of disk args.
1062 """
1063 return [{
1064 "type": "PERSISTENT",
1065 "mode": "READ_WRITE",
1066 "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone,
1067 extra_disk_name),
1068 "autoDelete": True,
1069 "boot": False,
1070 "interface": "SCSI",
1071 "deviceName": extra_disk_name,
1072 }]
1073
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001074 # pylint: disable=too-many-locals
herbertxue308f7662018-05-18 03:25:58 +00001075 def CreateInstance(self,
1076 instance,
1077 image_name,
1078 machine_type,
1079 metadata,
1080 network,
1081 zone,
1082 disk_args=None,
1083 image_project=None,
1084 gpu=None,
Sam Chiu56c58892018-10-25 09:53:19 +08001085 extra_disk_name=None,
1086 labels=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001087 """Create a gce instance with a gce image.
1088
1089 Args:
herbertxue308f7662018-05-18 03:25:58 +00001090 instance: String, instance name.
1091 image_name: String, source image used to create this disk.
1092 machine_type: String, representing machine_type,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001093 e.g. "n1-standard-1"
herbertxue308f7662018-05-18 03:25:58 +00001094 metadata: Dict, maps a metadata name to its value.
1095 network: String, representing network name, e.g. "default"
1096 zone: String, representing zone name, e.g. "us-central1-f"
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001097 disk_args: A list of extra disk args (strings), see _GetDiskArgs
1098 for example, if None, will create a disk using the given
1099 image.
herbertxue308f7662018-05-18 03:25:58 +00001100 image_project: String, name of the project where the image
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001101 belongs. Assume the default project if None.
herbertxue308f7662018-05-18 03:25:58 +00001102 gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001103 None no gpus will be attached. For more details see:
Kevin Chengb5963882018-05-09 00:06:27 -07001104 https://cloud.google.com/compute/docs/gpus/add-gpus
herbertxue308f7662018-05-18 03:25:58 +00001105 extra_disk_name: String,the name of the extra disk to attach.
Sam Chiu56c58892018-10-25 09:53:19 +08001106 labels: Dict, will be added to the instance's labels.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001107 """
herbertxue308f7662018-05-18 03:25:58 +00001108 disk_args = (disk_args
1109 or self._GetDiskArgs(instance, image_name, image_project))
1110 if extra_disk_name:
1111 disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001112 body = {
1113 "machineType": self.GetMachineType(machine_type, zone)["selfLink"],
1114 "name": instance,
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001115 "networkInterfaces": [self._GetNetworkArgs(network, zone)],
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001116 "disks": disk_args,
herbertxue308f7662018-05-18 03:25:58 +00001117 "serviceAccounts": [{
1118 "email": "default",
1119 "scopes": self.DEFAULT_INSTANCE_SCOPE
1120 }],
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001121 }
1122
Sam Chiu56c58892018-10-25 09:53:19 +08001123 if labels is not None:
1124 body["labels"] = labels
Kevin Chengb5963882018-05-09 00:06:27 -07001125 if gpu:
1126 body["guestAccelerators"] = [{
1127 "acceleratorType": self.GetAcceleratorUrl(gpu, zone),
1128 "acceleratorCount": 1
1129 }]
1130 # Instances with GPUs cannot live migrate because they are assigned
1131 # to specific hardware devices.
1132 body["scheduling"] = {"onHostMaintenance": "terminate"}
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001133 if metadata:
herbertxue308f7662018-05-18 03:25:58 +00001134 metadata_list = [{
cylan64af2db2019-01-17 15:13:59 +08001135 _METADATA_KEY: key,
1136 _METADATA_KEY_VALUE: val
herbertxue308f7662018-05-18 03:25:58 +00001137 } for key, val in metadata.iteritems()]
cylan64af2db2019-01-17 15:13:59 +08001138 body[_METADATA] = {_ITEMS: metadata_list}
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001139 logger.info("Creating instance: project %s, zone %s, body:%s",
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001140 self._project, zone, body)
herbertxue308f7662018-05-18 03:25:58 +00001141 api = self.service.instances().insert(
1142 project=self._project, zone=zone, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001143 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001144 self.WaitOnOperation(
1145 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001146 logger.info("Instance %s has been created.", instance)
1147
1148 def DeleteInstance(self, instance, zone):
1149 """Delete a gce instance.
1150
1151 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001152 instance: A string, instance name.
1153 zone: A string, e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001154 """
1155 logger.info("Deleting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001156 api = self.service.instances().delete(
1157 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001158 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001159 self.WaitOnOperation(
1160 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001161 logger.info("Deleted instance: %s", instance)
1162
1163 def DeleteInstances(self, instances, zone):
1164 """Delete multiple instances.
1165
1166 Args:
1167 instances: A list of instance names.
1168 zone: A string, e.g. "us-central1-f".
1169
1170 Returns:
1171 A tuple, (deleted, failed, error_msgs)
1172 deleted: A list of names of instances that have been deleted.
1173 failed: A list of names of instances that we fail to delete.
1174 error_msgs: A list of failure messages.
1175 """
herbertxue308f7662018-05-18 03:25:58 +00001176 action = functools.partial(
1177 self.service.instances().delete, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001178 return self._BatchExecuteOnInstances(instances, zone, action)
1179
1180 def ResetInstance(self, instance, zone):
1181 """Reset the gce instance.
1182
1183 Args:
1184 instance: A string, instance name.
1185 zone: A string, e.g. "us-central1-f".
1186 """
1187 logger.info("Resetting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001188 api = self.service.instances().reset(
1189 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001190 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001191 self.WaitOnOperation(
1192 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001193 logger.info("Instance has been reset: %s", instance)
1194
1195 def GetMachineType(self, machine_type, zone):
1196 """Get URL for a given machine typle.
1197
1198 Args:
1199 machine_type: A string, name of the machine type.
1200 zone: A string, e.g. "us-central1-f"
1201
1202 Returns:
1203 A machine type resource in json.
1204 https://cloud.google.com/compute/docs/reference/latest/
1205 machineTypes#resource
1206 """
herbertxue308f7662018-05-18 03:25:58 +00001207 api = self.service.machineTypes().get(
1208 project=self._project, zone=zone, machineType=machine_type)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001209 return self.Execute(api)
1210
Kevin Chengb5963882018-05-09 00:06:27 -07001211 def GetAcceleratorUrl(self, accelerator_type, zone):
1212 """Get URL for a given type of accelator.
1213
1214 Args:
1215 accelerator_type: A string, representing the accelerator, e.g
1216 "nvidia-tesla-k80"
1217 zone: A string representing a zone, e.g. "us-west1-b"
1218
1219 Returns:
1220 A URL that points to the accelerator resource, e.g.
1221 https://www.googleapis.com/compute/v1/projects/<project id>/zones/
1222 us-west1-b/acceleratorTypes/nvidia-tesla-k80
1223 """
herbertxue308f7662018-05-18 03:25:58 +00001224 api = self.service.acceleratorTypes().get(
1225 project=self._project, zone=zone, acceleratorType=accelerator_type)
Kevin Chengb5963882018-05-09 00:06:27 -07001226 result = self.Execute(api)
1227 return result["selfLink"]
1228
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001229 def GetNetworkUrl(self, network):
1230 """Get URL for a given network.
1231
1232 Args:
1233 network: A string, representing network name, e.g "default"
1234
1235 Returns:
1236 A URL that points to the network resource, e.g.
1237 https://www.googleapis.com/compute/v1/projects/<project id>/
1238 global/networks/default
1239 """
herbertxue308f7662018-05-18 03:25:58 +00001240 api = self.service.networks().get(
1241 project=self._project, network=network)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001242 result = self.Execute(api)
1243 return result["selfLink"]
1244
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001245 def GetSubnetworkUrl(self, network, zone):
1246 """Get URL for a given network and zone.
1247
1248 Return the subnetwork for the network in the specified region that the
1249 specified zone resides in. If there is no subnetwork for the specified
1250 zone, raise an exception.
1251
1252 Args:
1253 network: A string, representing network name, e.g "default"
1254 zone: String, representing zone name, e.g. "us-central1-f"
1255
1256 Returns:
1257 A URL that points to the network resource, e.g.
1258 https://www.googleapis.com/compute/v1/projects/<project id>/
1259 global/networks/default
1260
1261 Raises:
1262 errors.NoSubnetwork: When no subnetwork exists for the zone
1263 specified.
1264 """
1265 api = self.service.networks().get(
1266 project=self._project, network=network)
1267 result = self.Execute(api)
1268 region = zone.rsplit("-", 1)[0]
1269 for subnetwork in result["subnetworks"]:
1270 if region in subnetwork:
1271 return subnetwork
1272 raise errors.NoSubnetwork("No subnetwork for network %s in region %s" %
1273 (network, region))
1274
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001275 def CompareMachineSize(self, machine_type_1, machine_type_2, zone):
1276 """Compare the size of two machine types.
1277
1278 Args:
1279 machine_type_1: A string representing a machine type, e.g. n1-standard-1
1280 machine_type_2: A string representing a machine type, e.g. n1-standard-1
1281 zone: A string representing a zone, e.g. "us-central1-f"
1282
1283 Returns:
Kevin Cheng4ae42772018-10-02 11:39:48 -07001284 -1 if any metric of machine size of the first type is smaller than
1285 the second type.
1286 0 if all metrics of machine size are equal.
1287 1 if at least one metric of machine size of the first type is
1288 greater than the second type and all metrics of first type are
1289 greater or equal to the second type.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001290
1291 Raises:
1292 errors.DriverError: For malformed response.
1293 """
1294 machine_info_1 = self.GetMachineType(machine_type_1, zone)
1295 machine_info_2 = self.GetMachineType(machine_type_2, zone)
Kevin Cheng4ae42772018-10-02 11:39:48 -07001296 result = 0
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001297 for metric in self.MACHINE_SIZE_METRICS:
1298 if metric not in machine_info_1 or metric not in machine_info_2:
1299 raise errors.DriverError(
1300 "Malformed machine size record: Can't find '%s' in %s or %s"
1301 % (metric, machine_info_1, machine_info_2))
Kevin Cheng4ae42772018-10-02 11:39:48 -07001302 cmp_result = machine_info_1[metric] - machine_info_2[metric]
1303 if cmp_result < 0:
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001304 return -1
Kevin Cheng4ae42772018-10-02 11:39:48 -07001305 elif cmp_result > 0:
1306 result = 1
1307 return result
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001308
1309 def GetSerialPortOutput(self, instance, zone, port=1):
1310 """Get serial port output.
1311
1312 Args:
1313 instance: string, instance name.
1314 zone: string, zone name.
1315 port: int, which COM port to read from, 1-4, default to 1.
1316
1317 Returns:
1318 String, contents of the output.
1319
1320 Raises:
1321 errors.DriverError: For malformed response.
1322 """
1323 api = self.service.instances().getSerialPortOutput(
herbertxue308f7662018-05-18 03:25:58 +00001324 project=self._project, zone=zone, instance=instance, port=port)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001325 result = self.Execute(api)
1326 if "contents" not in result:
1327 raise errors.DriverError(
1328 "Malformed response for GetSerialPortOutput: %s" % result)
1329 return result["contents"]
1330
1331 def GetInstanceNamesByIPs(self, ips, zone):
1332 """Get Instance names by IPs.
1333
1334 This function will go through all instances, which
1335 could be slow if there are too many instances. However, currently
1336 GCE doesn't support search for instance by IP.
1337
1338 Args:
1339 ips: A set of IPs.
1340 zone: String, name of the zone.
1341
1342 Returns:
1343 A dictionary where key is IP and value is instance name or None
1344 if instance is not found for the given IP.
1345 """
1346 ip_name_map = dict.fromkeys(ips)
1347 for instance in self.ListInstances(zone):
1348 try:
1349 ip = instance["networkInterfaces"][0]["accessConfigs"][0][
1350 "natIP"]
1351 if ip in ips:
1352 ip_name_map[ip] = instance["name"]
1353 except (IndexError, KeyError) as e:
1354 logger.error("Could not get instance names by ips: %s", str(e))
1355 return ip_name_map
1356
1357 def GetInstanceIP(self, instance, zone):
1358 """Get Instance IP given instance name.
1359
1360 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001361 instance: String, representing instance name.
1362 zone: String, name of the zone.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001363
1364 Returns:
Kevin Cheng86d43c72018-08-30 10:59:14 -07001365 NamedTuple of (internal, external) IP of the instance.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001366 """
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001367 instance = self.GetInstance(instance, zone)
Kevin Cheng86d43c72018-08-30 10:59:14 -07001368 internal_ip = instance["networkInterfaces"][0]["networkIP"]
1369 external_ip = instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"]
1370 return IP(internal=internal_ip, external=external_ip)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001371
cylan64af2db2019-01-17 15:13:59 +08001372 @utils.TimeExecute(function_description="Updating instance metadata: ")
1373 def SetInstanceMetadata(self, zone, instance, body):
1374 """Set instance metadata.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001375
1376 Args:
cylan64af2db2019-01-17 15:13:59 +08001377 zone: String, name of zone.
1378 instance: String, representing instance name.
1379 body: Dict, Metadata body.
Kevin Chengb5963882018-05-09 00:06:27 -07001380 metdata is in the following format.
1381 {
1382 "kind": "compute#metadata",
1383 "fingerprint": "a-23icsyx4E=",
1384 "items": [
1385 {
cylan64af2db2019-01-17 15:13:59 +08001386 "key": "sshKeys",
1387 "value": "key"
Kevin Chengb5963882018-05-09 00:06:27 -07001388 }, ...
1389 ]
1390 }
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001391 """
cylan64af2db2019-01-17 15:13:59 +08001392 api = self.service.instances().setMetadata(
1393 project=self._project, zone=zone, instance=instance, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001394 operation = self.Execute(api)
cylan64af2db2019-01-17 15:13:59 +08001395 self.WaitOnOperation(
1396 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001397
cylan64af2db2019-01-17 15:13:59 +08001398 def AddSshRsaInstanceMetadata(self, zone, user, ssh_rsa_path, instance):
1399 """Add the public rsa key to the instance's metadata.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001400
cylan64af2db2019-01-17 15:13:59 +08001401 Confirm that the instance has this public key in the instance's
1402 metadata, if not we will add this public key.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001403
1404 Args:
cylan64af2db2019-01-17 15:13:59 +08001405 zone: String, name of zone.
1406 user: String, name of the user which the key belongs to.
1407 ssh_rsa_path: String, The absolute path to public rsa key.
1408 instance: String, representing instance name.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001409 """
cylan64af2db2019-01-17 15:13:59 +08001410 ssh_rsa_path = os.path.expanduser(ssh_rsa_path)
1411 rsa = GetRsaKey(ssh_rsa_path)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001412 entry = "%s:%s" % (user, rsa)
1413 logger.debug("New RSA entry: %s", entry)
cylan64af2db2019-01-17 15:13:59 +08001414
1415 gce_instance = self.GetInstance(instance, zone)
1416 metadata = gce_instance.get(_METADATA)
1417 if RsaNotInMetadata(metadata, entry):
1418 self.UpdateRsaInMetadata(zone, instance, metadata, entry)
Fang Dengcef4b112017-03-02 11:20:17 -08001419
1420 def CheckAccess(self):
1421 """Check if the user has read access to the cloud project.
1422
1423 Returns:
1424 True if the user has at least read access to the project.
1425 False otherwise.
1426
1427 Raises:
1428 errors.HttpError if other unexpected error happens when
1429 accessing the project.
1430 """
1431 api = self.service.zones().list(project=self._project)
1432 retry_http_codes = copy.copy(self.RETRY_HTTP_CODES)
1433 retry_http_codes.remove(self.ACCESS_DENIED_CODE)
1434 try:
1435 self.Execute(api, retry_http_codes=retry_http_codes)
1436 except errors.HttpError as e:
1437 if e.code == self.ACCESS_DENIED_CODE:
1438 return False
1439 raise
1440 return True
cylan64af2db2019-01-17 15:13:59 +08001441
1442 def UpdateRsaInMetadata(self, zone, instance, metadata, entry):
1443 """Update ssh public key to sshKeys's value in this metadata.
1444
1445 Args:
1446 zone: String, name of zone.
1447 instance: String, representing instance name.
1448 metadata: Dict, maps a metadata name to its value.
1449 entry: String, ssh public key.
1450 """
1451 ssh_key_item = GetSshKeyFromMetadata(metadata)
1452 if ssh_key_item:
1453 # The ssh key exists in the metadata so update the reference to it
1454 # in the metadata. There may not be an actual ssh key value so
1455 # that's why we filter for None to avoid an empty line in front.
1456 ssh_key_item[_METADATA_KEY_VALUE] = "\n".join(
1457 filter(None, [ssh_key_item[_METADATA_KEY_VALUE], entry]))
1458 else:
1459 # Since there is no ssh key item in the metadata, we need to add it in.
1460 ssh_key_item = {_METADATA_KEY: _SSH_KEYS_NAME,
1461 _METADATA_KEY_VALUE: entry}
1462 metadata[_ITEMS].append(ssh_key_item)
1463 utils.PrintColorString(
1464 "Ssh public key doesn't exist in the instance(%s), adding it."
1465 % instance, utils.TextColors.WARNING)
1466 self.SetInstanceMetadata(zone, instance, metadata)
1467
1468
1469def RsaNotInMetadata(metadata, entry):
1470 """Check ssh public key exist in sshKeys's value.
1471
1472 Args:
1473 metadata: Dict, maps a metadata name to its value.
1474 entry: String, ssh public key.
1475
1476 Returns:
1477 Boolean. True if ssh public key doesn't exist in metadata.
1478 """
1479 for item in metadata.setdefault(_ITEMS, []):
1480 if item[_METADATA_KEY] == _SSH_KEYS_NAME:
1481 if entry in item[_METADATA_KEY_VALUE]:
1482 return False
1483 return True
1484
1485
1486def GetSshKeyFromMetadata(metadata):
1487 """Get ssh key item from metadata.
1488
1489 Args:
1490 metadata: Dict, maps a metadata name to its value.
1491
1492 Returns:
1493 Dict of ssk_key_item in metadata, None if can't find the ssh key item
1494 in metadata.
1495 """
1496 for item in metadata.setdefault(_ITEMS, []):
1497 if item.get(_METADATA_KEY, '') == _SSH_KEYS_NAME:
1498 return item
1499 return None
1500
1501
1502def GetRsaKey(ssh_rsa_path):
1503 """Get rsa key from rsa path.
1504
1505 Args:
1506 ssh_rsa_path: String, The absolute path to public rsa key.
1507
1508 Returns:
1509 String, rsa key.
1510
1511 Raises:
1512 errors.DriverError: RSA file does not exist.
1513 """
1514 ssh_rsa_path = os.path.expanduser(ssh_rsa_path)
1515 if not os.path.exists(ssh_rsa_path):
1516 raise errors.DriverError(
1517 "RSA file %s does not exist." % ssh_rsa_path)
1518
1519 with open(ssh_rsa_path) as f:
1520 rsa = f.read()
1521 # The space must be removed here for string processing,
1522 # if it is not string, it doesn't have a strip function.
1523 rsa = rsa.strip() if rsa else rsa
1524 utils.VerifyRsaPubKey(rsa)
1525 return rsa