blob: ada5306feb905bf783806c6c522bab3c95ddac85 [file] [log] [blame]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001#!/usr/bin/env python
2#
3# Copyright 2016 - The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070016"""A client that manages Google Compute Engine.
17
18** ComputeClient **
19
20ComputeClient is a wrapper around Google Compute Engine APIs.
21It provides a set of methods for managing a google compute engine project,
22such as creating images, creating instances, etc.
23
24Design philosophy: We tried to make ComputeClient as stateless as possible,
25and it only keeps states about authentication. ComputeClient should be very
26generic, and only knows how to talk to Compute Engine APIs.
27"""
Kevin Cheng5c124ec2018-05-16 13:28:51 -070028# pylint: disable=too-many-lines
Kevin Cheng86d43c72018-08-30 10:59:14 -070029import collections
Fang Dengcef4b112017-03-02 11:20:17 -080030import copy
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070031import functools
32import logging
33import os
34
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070035from acloud.internal.lib import base_cloud_client
36from acloud.internal.lib import utils
37from acloud.public import errors
38
39logger = logging.getLogger(__name__)
40
Kevin Chengb5963882018-05-09 00:06:27 -070041_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10
42
Kevin Cheng5c124ec2018-05-16 13:28:51 -070043BASE_DISK_ARGS = {
44 "type": "PERSISTENT",
45 "boot": True,
46 "mode": "READ_WRITE",
47 "autoDelete": True,
48 "initializeParams": {},
49}
50
Kevin Cheng86d43c72018-08-30 10:59:14 -070051IP = collections.namedtuple("IP", ["external", "internal"])
52
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070053
54class OperationScope(object):
55 """Represents operation scope enum."""
56 ZONE = "zone"
57 REGION = "region"
58 GLOBAL = "global"
59
60
Kevin Chengb5963882018-05-09 00:06:27 -070061class PersistentDiskType(object):
62 """Represents different persistent disk types.
63
64 pd-standard for regular hard disk.
65 pd-ssd for solid state disk.
66 """
67 STANDARD = "pd-standard"
68 SSD = "pd-ssd"
69
70
71class ImageStatus(object):
72 """Represents the status of an image."""
73 PENDING = "PENDING"
74 READY = "READY"
75 FAILED = "FAILED"
76
77
78def _IsFingerPrintError(exc):
79 """Determine if the exception is a HTTP error with code 412.
80
81 Args:
82 exc: Exception instance.
83
84 Returns:
85 Boolean. True if the exception is a "Precondition Failed" error.
86 """
87 return isinstance(exc, errors.HttpError) and exc.code == 412
88
89
Kevin Cheng5c124ec2018-05-16 13:28:51 -070090# pylint: disable=too-many-public-methods
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070091class ComputeClient(base_cloud_client.BaseCloudApiClient):
92 """Client that manages GCE."""
93
94 # API settings, used by BaseCloudApiClient.
95 API_NAME = "compute"
96 API_VERSION = "v1"
herbertxue308f7662018-05-18 03:25:58 +000097 SCOPE = " ".join([
98 "https://www.googleapis.com/auth/compute",
99 "https://www.googleapis.com/auth/devstorage.read_write"
100 ])
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700101 # Default settings for gce operations
102 DEFAULT_INSTANCE_SCOPE = [
103 "https://www.googleapis.com/auth/devstorage.read_only",
104 "https://www.googleapis.com/auth/logging.write"
105 ]
Kevin Chengb5963882018-05-09 00:06:27 -0700106 OPERATION_TIMEOUT_SECS = 30 * 60 # 30 mins
107 OPERATION_POLL_INTERVAL_SECS = 20
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700108 MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"]
Fang Dengcef4b112017-03-02 11:20:17 -0800109 ACCESS_DENIED_CODE = 403
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700110
111 def __init__(self, acloud_config, oauth2_credentials):
112 """Initialize.
113
114 Args:
115 acloud_config: An AcloudConfig object.
116 oauth2_credentials: An oauth2client.OAuth2Credentials instance.
117 """
118 super(ComputeClient, self).__init__(oauth2_credentials)
119 self._project = acloud_config.project
120
121 def _GetOperationStatus(self, operation, operation_scope, scope_name=None):
122 """Get status of an operation.
123
124 Args:
125 operation: An Operation resource in the format of json.
126 operation_scope: A value from OperationScope, "zone", "region",
127 or "global".
128 scope_name: If operation_scope is "zone" or "region", this should be
129 the name of the zone or region, e.g. "us-central1-f".
130
131 Returns:
132 Status of the operation, one of "DONE", "PENDING", "RUNNING".
133
134 Raises:
135 errors.DriverError: if the operation fails.
136 """
137 operation_name = operation["name"]
138 if operation_scope == OperationScope.GLOBAL:
herbertxue308f7662018-05-18 03:25:58 +0000139 api = self.service.globalOperations().get(
140 project=self._project, operation=operation_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700141 result = self.Execute(api)
142 elif operation_scope == OperationScope.ZONE:
herbertxue308f7662018-05-18 03:25:58 +0000143 api = self.service.zoneOperations().get(
144 project=self._project,
145 operation=operation_name,
146 zone=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700147 result = self.Execute(api)
148 elif operation_scope == OperationScope.REGION:
herbertxue308f7662018-05-18 03:25:58 +0000149 api = self.service.regionOperations().get(
150 project=self._project,
151 operation=operation_name,
152 region=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700153 result = self.Execute(api)
154
155 if result.get("error"):
156 errors_list = result["error"]["errors"]
herbertxue308f7662018-05-18 03:25:58 +0000157 raise errors.DriverError(
158 "Get operation state failed, errors: %s" % str(errors_list))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700159 return result["status"]
160
161 def WaitOnOperation(self, operation, operation_scope, scope_name=None):
162 """Wait for an operation to finish.
163
164 Args:
165 operation: An Operation resource in the format of json.
166 operation_scope: A value from OperationScope, "zone", "region",
167 or "global".
168 scope_name: If operation_scope is "zone" or "region", this should be
169 the name of the zone or region, e.g. "us-central1-f".
170 """
171 timeout_exception = errors.GceOperationTimeoutError(
172 "Operation hits timeout, did not complete within %d secs." %
173 self.OPERATION_TIMEOUT_SECS)
174 utils.PollAndWait(
175 func=self._GetOperationStatus,
176 expected_return="DONE",
177 timeout_exception=timeout_exception,
178 timeout_secs=self.OPERATION_TIMEOUT_SECS,
179 sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS,
180 operation=operation,
181 operation_scope=operation_scope,
182 scope_name=scope_name)
183
184 def GetProject(self):
185 """Get project information.
186
187 Returns:
188 A project resource in json.
189 """
190 api = self.service.projects().get(project=self._project)
191 return self.Execute(api)
192
193 def GetDisk(self, disk_name, zone):
194 """Get disk information.
195
196 Args:
197 disk_name: A string.
198 zone: String, name of zone.
199
200 Returns:
201 An disk resource in json.
202 https://cloud.google.com/compute/docs/reference/latest/disks#resource
203 """
herbertxue308f7662018-05-18 03:25:58 +0000204 api = self.service.disks().get(
205 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700206 return self.Execute(api)
207
208 def CheckDiskExists(self, disk_name, zone):
209 """Check if disk exists.
210
211 Args:
212 disk_name: A string
213 zone: String, name of zone.
214
215 Returns:
216 True if disk exists, otherwise False.
217 """
218 try:
219 self.GetDisk(disk_name, zone)
220 exists = True
221 except errors.ResourceNotFoundError:
222 exists = False
223 logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name,
224 exists)
225 return exists
226
herbertxue308f7662018-05-18 03:25:58 +0000227 def CreateDisk(self,
228 disk_name,
229 source_image,
230 size_gb,
231 zone,
232 source_project=None,
233 disk_type=PersistentDiskType.STANDARD):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700234 """Create a gce disk.
235
236 Args:
herbertxue308f7662018-05-18 03:25:58 +0000237 disk_name: String
238 source_image: String, name of the image.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700239 size_gb: Integer, size in gb.
herbertxue308f7662018-05-18 03:25:58 +0000240 zone: String, name of the zone, e.g. us-central1-b.
Kevin Chengb5963882018-05-09 00:06:27 -0700241 source_project: String, required if the image is located in a different
242 project.
243 disk_type: String, a value from PersistentDiskType, STANDARD
244 for regular hard disk or SSD for solid state disk.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700245 """
Kevin Chengb5963882018-05-09 00:06:27 -0700246 source_project = source_project or self._project
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700247 source_image = "projects/%s/global/images/%s" % (
Kevin Chengb5963882018-05-09 00:06:27 -0700248 source_project, source_image) if source_image else None
249 logger.info("Creating disk %s, size_gb: %d, source_image: %s",
250 disk_name, size_gb, str(source_image))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700251 body = {
252 "name": disk_name,
253 "sizeGb": size_gb,
herbertxue308f7662018-05-18 03:25:58 +0000254 "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone,
255 disk_type),
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700256 }
herbertxue308f7662018-05-18 03:25:58 +0000257 api = self.service.disks().insert(
258 project=self._project,
259 sourceImage=source_image,
260 zone=zone,
261 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700262 operation = self.Execute(api)
263 try:
herbertxue308f7662018-05-18 03:25:58 +0000264 self.WaitOnOperation(
265 operation=operation,
266 operation_scope=OperationScope.ZONE,
267 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700268 except errors.DriverError:
269 logger.error("Creating disk failed, cleaning up: %s", disk_name)
270 if self.CheckDiskExists(disk_name, zone):
271 self.DeleteDisk(disk_name, zone)
272 raise
273 logger.info("Disk %s has been created.", disk_name)
274
275 def DeleteDisk(self, disk_name, zone):
276 """Delete a gce disk.
277
278 Args:
279 disk_name: A string, name of disk.
280 zone: A string, name of zone.
281 """
282 logger.info("Deleting disk %s", disk_name)
herbertxue308f7662018-05-18 03:25:58 +0000283 api = self.service.disks().delete(
284 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700285 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000286 self.WaitOnOperation(
287 operation=operation,
288 operation_scope=OperationScope.ZONE,
289 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700290 logger.info("Deleted disk %s", disk_name)
291
292 def DeleteDisks(self, disk_names, zone):
293 """Delete multiple disks.
294
295 Args:
296 disk_names: A list of disk names.
297 zone: A string, name of zone.
298
299 Returns:
300 A tuple, (deleted, failed, error_msgs)
301 deleted: A list of names of disks that have been deleted.
302 failed: A list of names of disks that we fail to delete.
303 error_msgs: A list of failure messages.
304 """
305 if not disk_names:
306 logger.warn("Nothing to delete. Arg disk_names is not provided.")
307 return [], [], []
308 # Batch send deletion requests.
309 logger.info("Deleting disks: %s", disk_names)
310 delete_requests = {}
311 for disk_name in set(disk_names):
herbertxue308f7662018-05-18 03:25:58 +0000312 request = self.service.disks().delete(
313 project=self._project, disk=disk_name, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700314 delete_requests[disk_name] = request
herbertxue308f7662018-05-18 03:25:58 +0000315 return self._BatchExecuteAndWait(
316 delete_requests, OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700317
318 def ListDisks(self, zone, disk_filter=None):
319 """List disks.
320
321 Args:
322 zone: A string, representing zone name. e.g. "us-central1-f"
323 disk_filter: A string representing a filter in format of
324 FIELD_NAME COMPARISON_STRING LITERAL_STRING
325 e.g. "name ne example-instance"
326 e.g. "name eq "example-instance-[0-9]+""
327
328 Returns:
329 A list of disks.
330 """
herbertxue308f7662018-05-18 03:25:58 +0000331 return self.ListWithMultiPages(
332 api_resource=self.service.disks().list,
333 project=self._project,
334 zone=zone,
335 filter=disk_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700336
herbertxue308f7662018-05-18 03:25:58 +0000337 def CreateImage(self,
338 image_name,
339 source_uri=None,
340 source_disk=None,
Kevin Chengb5963882018-05-09 00:06:27 -0700341 labels=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700342 """Create a Gce image.
343
344 Args:
herbertxue308f7662018-05-18 03:25:58 +0000345 image_name: String, name of image
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700346 source_uri: Full Google Cloud Storage URL where the disk image is
Kevin Chengb5963882018-05-09 00:06:27 -0700347 stored. e.g. "https://storage.googleapis.com/my-bucket/
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700348 avd-system-2243663.tar.gz"
Kevin Chengb5963882018-05-09 00:06:27 -0700349 source_disk: String, this should be the disk's selfLink value
350 (including zone and project), rather than the disk_name
351 e.g. https://www.googleapis.com/compute/v1/projects/
352 google.com:android-builds-project/zones/
353 us-east1-d/disks/<disk_name>
354 labels: Dict, will be added to the image's labels.
355
Kevin Chengb5963882018-05-09 00:06:27 -0700356 Raises:
357 errors.DriverError: For malformed request or response.
358 errors.GceOperationTimeoutError: Operation takes too long to finish.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700359 """
herbertxue308f7662018-05-18 03:25:58 +0000360 if self.CheckImageExists(image_name):
361 return
362 if (source_uri and source_disk) or (not source_uri
363 and not source_disk):
Kevin Chengb5963882018-05-09 00:06:27 -0700364 raise errors.DriverError(
365 "Creating image %s requires either source_uri %s or "
herbertxue308f7662018-05-18 03:25:58 +0000366 "source_disk %s but not both" % (image_name, source_uri,
367 source_disk))
Kevin Chengb5963882018-05-09 00:06:27 -0700368 elif source_uri:
herbertxue308f7662018-05-18 03:25:58 +0000369 logger.info("Creating image %s, source_uri %s", image_name,
370 source_uri)
Kevin Chengb5963882018-05-09 00:06:27 -0700371 body = {
372 "name": image_name,
373 "rawDisk": {
374 "source": source_uri,
375 },
376 }
377 else:
herbertxue308f7662018-05-18 03:25:58 +0000378 logger.info("Creating image %s, source_disk %s", image_name,
379 source_disk)
Kevin Chengb5963882018-05-09 00:06:27 -0700380 body = {
381 "name": image_name,
382 "sourceDisk": source_disk,
383 }
384 if labels is not None:
385 body["labels"] = labels
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700386 api = self.service.images().insert(project=self._project, body=body)
387 operation = self.Execute(api)
388 try:
herbertxue308f7662018-05-18 03:25:58 +0000389 self.WaitOnOperation(
390 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700391 except errors.DriverError:
392 logger.error("Creating image failed, cleaning up: %s", image_name)
393 if self.CheckImageExists(image_name):
394 self.DeleteImage(image_name)
395 raise
396 logger.info("Image %s has been created.", image_name)
397
Kevin Chengb5963882018-05-09 00:06:27 -0700398 @utils.RetryOnException(_IsFingerPrintError,
399 _MAX_RETRIES_ON_FINGERPRINT_CONFLICT)
400 def SetImageLabels(self, image_name, new_labels):
401 """Update image's labels. Retry for finger print conflict.
402
403 Note: Decorator RetryOnException will retry the call for FingerPrint
404 conflict (HTTP error code 412). The fingerprint is used to detect
405 conflicts of GCE resource updates. The fingerprint is initially generated
406 by Compute Engine and changes after every request to modify or update
407 resources (e.g. GCE "image" resource has "fingerPrint" for "labels"
408 updates).
409
410 Args:
411 image_name: A string, the image name.
412 new_labels: Dict, will be added to the image's labels.
413
414 Returns:
415 A GlobalOperation resouce.
416 https://cloud.google.com/compute/docs/reference/latest/globalOperations
417 """
418 image = self.GetImage(image_name)
419 labels = image.get("labels", {})
420 labels.update(new_labels)
421 body = {
422 "labels": labels,
423 "labelFingerprint": image["labelFingerprint"]
424 }
herbertxue308f7662018-05-18 03:25:58 +0000425 api = self.service.images().setLabels(
426 project=self._project, resource=image_name, body=body)
Kevin Chengb5963882018-05-09 00:06:27 -0700427 return self.Execute(api)
428
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700429 def CheckImageExists(self, image_name):
430 """Check if image exists.
431
432 Args:
433 image_name: A string
434
435 Returns:
436 True if image exists, otherwise False.
437 """
438 try:
439 self.GetImage(image_name)
440 exists = True
441 except errors.ResourceNotFoundError:
442 exists = False
443 logger.debug("CheckImageExists: image_name: %s, result: %s",
444 image_name, exists)
445 return exists
446
Kevin Chengb5963882018-05-09 00:06:27 -0700447 def GetImage(self, image_name, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700448 """Get image information.
449
450 Args:
451 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -0700452 image_project: A string
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700453
454 Returns:
455 An image resource in json.
456 https://cloud.google.com/compute/docs/reference/latest/images#resource
457 """
herbertxue308f7662018-05-18 03:25:58 +0000458 api = self.service.images().get(
459 project=image_project or self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700460 return self.Execute(api)
461
462 def DeleteImage(self, image_name):
463 """Delete an image.
464
465 Args:
466 image_name: A string
467 """
468 logger.info("Deleting image %s", image_name)
herbertxue308f7662018-05-18 03:25:58 +0000469 api = self.service.images().delete(
470 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700471 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000472 self.WaitOnOperation(
473 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700474 logger.info("Deleted image %s", image_name)
475
476 def DeleteImages(self, image_names):
477 """Delete multiple images.
478
479 Args:
480 image_names: A list of image names.
481
482 Returns:
483 A tuple, (deleted, failed, error_msgs)
484 deleted: A list of names of images that have been deleted.
485 failed: A list of names of images that we fail to delete.
486 error_msgs: A list of failure messages.
487 """
488 if not image_names:
489 return [], [], []
490 # Batch send deletion requests.
491 logger.info("Deleting images: %s", image_names)
492 delete_requests = {}
493 for image_name in set(image_names):
herbertxue308f7662018-05-18 03:25:58 +0000494 request = self.service.images().delete(
495 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700496 delete_requests[image_name] = request
497 return self._BatchExecuteAndWait(delete_requests,
498 OperationScope.GLOBAL)
499
Kevin Chengb5963882018-05-09 00:06:27 -0700500 def ListImages(self, image_filter=None, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700501 """List images.
502
503 Args:
504 image_filter: A string representing a filter in format of
505 FIELD_NAME COMPARISON_STRING LITERAL_STRING
506 e.g. "name ne example-image"
507 e.g. "name eq "example-image-[0-9]+""
Kevin Chengb5963882018-05-09 00:06:27 -0700508 image_project: String. If not provided, will list images from the default
509 project. Otherwise, will list images from the given
510 project, which can be any arbitrary project where the
511 account has read access
512 (i.e. has the role "roles/compute.imageUser")
513
514 Read more about image sharing across project:
515 https://cloud.google.com/compute/docs/images/sharing-images-across-projects
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700516
517 Returns:
518 A list of images.
519 """
herbertxue308f7662018-05-18 03:25:58 +0000520 return self.ListWithMultiPages(
521 api_resource=self.service.images().list,
522 project=image_project or self._project,
523 filter=image_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700524
525 def GetInstance(self, instance, zone):
526 """Get information about an instance.
527
528 Args:
529 instance: A string, representing instance name.
530 zone: A string, representing zone name. e.g. "us-central1-f"
531
532 Returns:
533 An instance resource in json.
534 https://cloud.google.com/compute/docs/reference/latest/instances#resource
535 """
herbertxue308f7662018-05-18 03:25:58 +0000536 api = self.service.instances().get(
537 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700538 return self.Execute(api)
539
Kevin Chengb5963882018-05-09 00:06:27 -0700540 def AttachAccelerator(self, instance, zone, accelerator_count,
541 accelerator_type):
542 """Attach a GPU accelerator to the instance.
543
544 Note: In order for this to succeed the following must hold:
545 - The machine schedule must be set to "terminate" i.e:
546 SetScheduling(self, instance, zone, on_host_maintenance="terminate")
547 must have been called.
548 - The machine is not starting or running. i.e.
549 StopInstance(self, instance) must have been called.
550
551 Args:
552 instance: A string, representing instance name.
553 zone: String, name of zone.
554 accelerator_count: The number accelerators to be attached to the instance.
555 a value of 0 will detach all accelerators.
556 accelerator_type: The type of accelerator to attach. e.g.
557 "nvidia-tesla-k80"
558 """
559 body = {
560 "guestAccelerators": [{
herbertxue308f7662018-05-18 03:25:58 +0000561 "acceleratorType":
562 self.GetAcceleratorUrl(accelerator_type, zone),
563 "acceleratorCount":
564 accelerator_count
Kevin Chengb5963882018-05-09 00:06:27 -0700565 }]
566 }
567 api = self.service.instances().setMachineResources(
568 project=self._project, zone=zone, instance=instance, body=body)
569 operation = self.Execute(api)
570 try:
571 self.WaitOnOperation(
572 operation=operation,
573 operation_scope=OperationScope.ZONE,
574 scope_name=zone)
575 except errors.GceOperationTimeoutError:
576 logger.error("Attach instance failed: %s", instance)
577 raise
herbertxue308f7662018-05-18 03:25:58 +0000578 logger.info("%d x %s have been attached to instance %s.",
579 accelerator_count, accelerator_type, instance)
Kevin Chengb5963882018-05-09 00:06:27 -0700580
581 def AttachDisk(self, instance, zone, **kwargs):
582 """Attach the external disk to the instance.
583
584 Args:
585 instance: A string, representing instance name.
586 zone: String, name of zone.
587 **kwargs: The attachDisk request body. See "https://cloud.google.com/
588 compute/docs/reference/latest/instances/attachDisk" for detail.
589 {
590 "kind": "compute#attachedDisk",
591 "type": string,
592 "mode": string,
593 "source": string,
594 "deviceName": string,
595 "index": integer,
596 "boot": boolean,
597 "initializeParams": {
598 "diskName": string,
599 "sourceImage": string,
600 "diskSizeGb": long,
601 "diskType": string,
602 "sourceImageEncryptionKey": {
603 "rawKey": string,
604 "sha256": string
605 }
606 },
607 "autoDelete": boolean,
608 "licenses": [
609 string
610 ],
611 "interface": string,
612 "diskEncryptionKey": {
613 "rawKey": string,
614 "sha256": string
615 }
616 }
617
618 Returns:
619 An disk resource in json.
620 https://cloud.google.com/compute/docs/reference/latest/disks#resource
621
622
623 Raises:
624 errors.GceOperationTimeoutError: Operation takes too long to finish.
625 """
626 api = self.service.instances().attachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000627 project=self._project, zone=zone, instance=instance, body=kwargs)
Kevin Chengb5963882018-05-09 00:06:27 -0700628 operation = self.Execute(api)
629 try:
630 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000631 operation=operation,
632 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700633 scope_name=zone)
634 except errors.GceOperationTimeoutError:
635 logger.error("Attach instance failed: %s", instance)
636 raise
637 logger.info("Disk has been attached to instance %s.", instance)
638
639 def DetachDisk(self, instance, zone, disk_name):
640 """Attach the external disk to the instance.
641
642 Args:
643 instance: A string, representing instance name.
644 zone: String, name of zone.
645 disk_name: A string, the name of the detach disk.
646
647 Returns:
648 A ZoneOperation resource.
649 See https://cloud.google.com/compute/docs/reference/latest/zoneOperations
650
651 Raises:
652 errors.GceOperationTimeoutError: Operation takes too long to finish.
653 """
654 api = self.service.instances().detachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000655 project=self._project,
656 zone=zone,
657 instance=instance,
Kevin Chengb5963882018-05-09 00:06:27 -0700658 deviceName=disk_name)
659 operation = self.Execute(api)
660 try:
661 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000662 operation=operation,
663 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700664 scope_name=zone)
665 except errors.GceOperationTimeoutError:
666 logger.error("Detach instance failed: %s", instance)
667 raise
668 logger.info("Disk has been detached to instance %s.", instance)
669
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700670 def StartInstance(self, instance, zone):
671 """Start |instance| in |zone|.
672
673 Args:
674 instance: A string, representing instance name.
675 zone: A string, representing zone name. e.g. "us-central1-f"
676
677 Raises:
678 errors.GceOperationTimeoutError: Operation takes too long to finish.
679 """
herbertxue308f7662018-05-18 03:25:58 +0000680 api = self.service.instances().start(
681 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700682 operation = self.Execute(api)
683 try:
herbertxue308f7662018-05-18 03:25:58 +0000684 self.WaitOnOperation(
685 operation=operation,
686 operation_scope=OperationScope.ZONE,
687 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700688 except errors.GceOperationTimeoutError:
689 logger.error("Start instance failed: %s", instance)
690 raise
691 logger.info("Instance %s has been started.", instance)
692
693 def StartInstances(self, instances, zone):
694 """Start |instances| in |zone|.
695
696 Args:
697 instances: A list of strings, representing instance names's list.
698 zone: A string, representing zone name. e.g. "us-central1-f"
699
700 Returns:
701 A tuple, (done, failed, error_msgs)
702 done: A list of string, representing the names of instances that
703 have been executed.
704 failed: A list of string, representing the names of instances that
705 we failed to execute.
706 error_msgs: A list of string, representing the failure messages.
707 """
herbertxue308f7662018-05-18 03:25:58 +0000708 action = functools.partial(
709 self.service.instances().start, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700710 return self._BatchExecuteOnInstances(instances, zone, action)
711
712 def StopInstance(self, instance, zone):
713 """Stop |instance| in |zone|.
714
715 Args:
716 instance: A string, representing instance name.
717 zone: A string, representing zone name. e.g. "us-central1-f"
718
719 Raises:
720 errors.GceOperationTimeoutError: Operation takes too long to finish.
721 """
herbertxue308f7662018-05-18 03:25:58 +0000722 api = self.service.instances().stop(
723 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700724 operation = self.Execute(api)
725 try:
herbertxue308f7662018-05-18 03:25:58 +0000726 self.WaitOnOperation(
727 operation=operation,
728 operation_scope=OperationScope.ZONE,
729 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700730 except errors.GceOperationTimeoutError:
731 logger.error("Stop instance failed: %s", instance)
732 raise
733 logger.info("Instance %s has been terminated.", instance)
734
735 def StopInstances(self, instances, zone):
736 """Stop |instances| in |zone|.
737
738 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700739 instances: A list of strings, representing instance names's list.
740 zone: A string, representing zone name. e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700741
742 Returns:
743 A tuple, (done, failed, error_msgs)
744 done: A list of string, representing the names of instances that
745 have been executed.
746 failed: A list of string, representing the names of instances that
747 we failed to execute.
748 error_msgs: A list of string, representing the failure messages.
749 """
herbertxue308f7662018-05-18 03:25:58 +0000750 action = functools.partial(
751 self.service.instances().stop, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700752 return self._BatchExecuteOnInstances(instances, zone, action)
753
754 def SetScheduling(self,
755 instance,
756 zone,
757 automatic_restart=True,
758 on_host_maintenance="MIGRATE"):
759 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
760
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700761 Args:
762 instance: A string, representing instance name.
763 zone: A string, representing zone name. e.g. "us-central1-f".
764 automatic_restart: Boolean, determine whether the instance will
765 automatically restart if it crashes or not,
766 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700767 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700768 The instance's maintenance behavior, which
769 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700770 "MIGRATE" or "TERMINATE" when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700771 a maintenance event.
772
773 Raises:
774 errors.GceOperationTimeoutError: Operation takes too long to finish.
775 """
herbertxue308f7662018-05-18 03:25:58 +0000776 body = {
777 "automaticRestart": automatic_restart,
778 "onHostMaintenance": on_host_maintenance
779 }
780 api = self.service.instances().setScheduling(
781 project=self._project, zone=zone, instance=instance, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700782 operation = self.Execute(api)
783 try:
herbertxue308f7662018-05-18 03:25:58 +0000784 self.WaitOnOperation(
785 operation=operation,
786 operation_scope=OperationScope.ZONE,
787 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700788 except errors.GceOperationTimeoutError:
789 logger.error("Set instance scheduling failed: %s", instance)
790 raise
herbertxue308f7662018-05-18 03:25:58 +0000791 logger.info(
792 "Instance scheduling changed:\n"
793 " automaticRestart: %s\n"
794 " onHostMaintenance: %s\n",
795 str(automatic_restart).lower(), on_host_maintenance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700796
797 def ListInstances(self, zone, instance_filter=None):
798 """List instances.
799
800 Args:
801 zone: A string, representing zone name. e.g. "us-central1-f"
802 instance_filter: A string representing a filter in format of
803 FIELD_NAME COMPARISON_STRING LITERAL_STRING
804 e.g. "name ne example-instance"
805 e.g. "name eq "example-instance-[0-9]+""
806
807 Returns:
808 A list of instances.
809 """
810 return self.ListWithMultiPages(
811 api_resource=self.service.instances().list,
812 project=self._project,
813 zone=zone,
814 filter=instance_filter)
815
816 def SetSchedulingInstances(self,
817 instances,
818 zone,
819 automatic_restart=True,
820 on_host_maintenance="MIGRATE"):
821 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
822
823 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option.
824
825 Args:
826 instances: A list of string, representing instance names.
827 zone: A string, representing zone name. e.g. "us-central1-f".
828 automatic_restart: Boolean, determine whether the instance will
829 automatically restart if it crashes or not,
830 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700831 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700832 The instance's maintenance behavior, which
833 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700834 migrated or terminated when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700835 a maintenance event.
836
837 Returns:
838 A tuple, (done, failed, error_msgs)
839 done: A list of string, representing the names of instances that
840 have been executed.
841 failed: A list of string, representing the names of instances that
842 we failed to execute.
843 error_msgs: A list of string, representing the failure messages.
844 """
herbertxue308f7662018-05-18 03:25:58 +0000845 body = {
846 "automaticRestart": automatic_restart,
847 "OnHostMaintenance": on_host_maintenance
848 }
849 action = functools.partial(
850 self.service.instances().setScheduling,
851 project=self._project,
852 zone=zone,
853 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700854 return self._BatchExecuteOnInstances(instances, zone, action)
855
856 def _BatchExecuteOnInstances(self, instances, zone, action):
857 """Batch processing operations requiring computing time.
858
859 Args:
860 instances: A list of instance names.
861 zone: A string, e.g. "us-central1-f".
862 action: partial func, all kwargs for this gcloud action has been
863 defined in the caller function (e.g. See "StartInstances")
864 except 'instance' which will be defined by iterating the
865 |instances|.
866
867 Returns:
868 A tuple, (done, failed, error_msgs)
869 done: A list of string, representing the names of instances that
870 have been executed.
871 failed: A list of string, representing the names of instances that
872 we failed to execute.
873 error_msgs: A list of string, representing the failure messages.
874 """
875 if not instances:
876 return [], [], []
877 # Batch send requests.
878 logger.info("Batch executing instances: %s", instances)
879 requests = {}
880 for instance_name in set(instances):
881 requests[instance_name] = action(instance=instance_name)
herbertxue308f7662018-05-18 03:25:58 +0000882 return self._BatchExecuteAndWait(
883 requests, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700884
885 def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None):
886 """Batch processing requests and wait on the operation.
887
888 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700889 requests: A dictionary. The key is a string representing the resource
890 name. For example, an instance name, or an image name.
891 operation_scope: A value from OperationScope, "zone", "region",
892 or "global".
893 scope_name: If operation_scope is "zone" or "region", this should be
894 the name of the zone or region, e.g. "us-central1-f".
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700895 Returns:
Kevin Chengb5963882018-05-09 00:06:27 -0700896 A tuple, (done, failed, error_msgs)
897 done: A list of string, representing the resource names that have
898 been executed.
899 failed: A list of string, representing resource names that
900 we failed to execute.
901 error_msgs: A list of string, representing the failure messages.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700902 """
903 results = self.BatchExecute(requests)
904 # Initialize return values
905 failed = []
906 error_msgs = []
907 for resource_name, (_, error) in results.iteritems():
908 if error is not None:
909 failed.append(resource_name)
910 error_msgs.append(str(error))
911 done = []
912 # Wait for the executing operations to finish.
913 logger.info("Waiting for executing operations")
914 for resource_name in requests.iterkeys():
915 operation, _ = results[resource_name]
916 if operation:
917 try:
918 self.WaitOnOperation(operation, operation_scope,
919 scope_name)
920 done.append(resource_name)
921 except errors.DriverError as exc:
922 failed.append(resource_name)
923 error_msgs.append(str(exc))
924 return done, failed, error_msgs
925
926 def ListZones(self):
927 """List all zone instances in the project.
928
929 Returns:
930 Gcompute response instance. For example:
931 {
932 "id": "projects/google.com%3Aandroid-build-staging/zones",
933 "kind": "compute#zoneList",
934 "selfLink": "https://www.googleapis.com/compute/v1/projects/"
935 "google.com:android-build-staging/zones"
936 "items": [
937 {
938 'creationTimestamp': '2014-07-15T10:44:08.663-07:00',
939 'description': 'asia-east1-c',
940 'id': '2222',
941 'kind': 'compute#zone',
942 'name': 'asia-east1-c',
943 'region': 'https://www.googleapis.com/compute/v1/projects/'
944 'google.com:android-build-staging/regions/asia-east1',
945 'selfLink': 'https://www.googleapis.com/compute/v1/projects/'
946 'google.com:android-build-staging/zones/asia-east1-c',
947 'status': 'UP'
948 }, {
949 'creationTimestamp': '2014-05-30T18:35:16.575-07:00',
950 'description': 'asia-east1-b',
951 'id': '2221',
952 'kind': 'compute#zone',
953 'name': 'asia-east1-b',
954 'region': 'https://www.googleapis.com/compute/v1/projects/'
955 'google.com:android-build-staging/regions/asia-east1',
956 'selfLink': 'https://www.googleapis.com/compute/v1/projects'
957 '/google.com:android-build-staging/zones/asia-east1-b',
958 'status': 'UP'
959 }]
960 }
961 See cloud cluster's api/mixer_zones.proto
962 """
963 api = self.service.zones().list(project=self._project)
964 return self.Execute(api)
965
Kevin Chengb5963882018-05-09 00:06:27 -0700966 def ListRegions(self):
967 """List all the regions for a project.
968
969 Returns:
970 A dictionary containing all the zones and additional data. See this link
971 for the detailed response:
972 https://cloud.google.com/compute/docs/reference/latest/regions/list.
973 Example:
974 {
975 'items': [{
976 'name':
977 'us-central1',
978 'quotas': [{
979 'usage': 2.0,
980 'limit': 24.0,
981 'metric': 'CPUS'
982 }, {
983 'usage': 1.0,
984 'limit': 23.0,
985 'metric': 'IN_USE_ADDRESSES'
986 }, {
987 'usage': 209.0,
988 'limit': 10240.0,
989 'metric': 'DISKS_TOTAL_GB'
990 }, {
991 'usage': 1000.0,
992 'limit': 20000.0,
993 'metric': 'INSTANCES'
994 }]
995 },..]
996 }
997 """
998 api = self.service.regions().list(project=self._project)
999 return self.Execute(api)
1000
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001001 def _GetNetworkArgs(self, network, zone):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001002 """Helper to generate network args that is used to create an instance.
1003
1004 Args:
1005 network: A string, e.g. "default".
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001006 zone: String, representing zone name, e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001007
1008 Returns:
1009 A dictionary representing network args.
1010 """
1011 return {
1012 "network": self.GetNetworkUrl(network),
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001013 "subnetwork": self.GetSubnetworkUrl(network, zone),
herbertxue308f7662018-05-18 03:25:58 +00001014 "accessConfigs": [{
1015 "name": "External NAT",
1016 "type": "ONE_TO_ONE_NAT"
1017 }]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001018 }
1019
herbertxue308f7662018-05-18 03:25:58 +00001020 def _GetDiskArgs(self,
1021 disk_name,
1022 image_name,
1023 image_project=None,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001024 disk_size_gb=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001025 """Helper to generate disk args that is used to create an instance.
1026
1027 Args:
1028 disk_name: A string
1029 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -07001030 image_project: A string
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001031 disk_size_gb: An integer
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001032
1033 Returns:
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001034 List holding dict of disk args.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001035 """
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001036 args = copy.deepcopy(BASE_DISK_ARGS)
1037 args["initializeParams"] = {
1038 "diskName": disk_name,
herbertxue308f7662018-05-18 03:25:58 +00001039 "sourceImage": self.GetImage(image_name,
1040 image_project)["selfLink"],
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001041 }
1042 # TODO: Remove this check once it's validated that we can either pass in
1043 # a None diskSizeGb or we find an appropriate default val.
1044 if disk_size_gb:
1045 args["diskSizeGb"] = disk_size_gb
1046 return [args]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001047
herbertxue308f7662018-05-18 03:25:58 +00001048 def _GetExtraDiskArgs(self, extra_disk_name, zone):
1049 """Get extra disk arg for given disk.
1050
1051 Args:
1052 extra_disk_name: String, name of the disk.
1053 zone: String, representing zone name, e.g. "us-central1-f"
1054
1055 Returns:
1056 A dictionary of disk args.
1057 """
1058 return [{
1059 "type": "PERSISTENT",
1060 "mode": "READ_WRITE",
1061 "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone,
1062 extra_disk_name),
1063 "autoDelete": True,
1064 "boot": False,
1065 "interface": "SCSI",
1066 "deviceName": extra_disk_name,
1067 }]
1068
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001069 # pylint: disable=too-many-locals
herbertxue308f7662018-05-18 03:25:58 +00001070 def CreateInstance(self,
1071 instance,
1072 image_name,
1073 machine_type,
1074 metadata,
1075 network,
1076 zone,
1077 disk_args=None,
1078 image_project=None,
1079 gpu=None,
Sam Chiu56c58892018-10-25 09:53:19 +08001080 extra_disk_name=None,
1081 labels=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001082 """Create a gce instance with a gce image.
1083
1084 Args:
herbertxue308f7662018-05-18 03:25:58 +00001085 instance: String, instance name.
1086 image_name: String, source image used to create this disk.
1087 machine_type: String, representing machine_type,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001088 e.g. "n1-standard-1"
herbertxue308f7662018-05-18 03:25:58 +00001089 metadata: Dict, maps a metadata name to its value.
1090 network: String, representing network name, e.g. "default"
1091 zone: String, representing zone name, e.g. "us-central1-f"
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001092 disk_args: A list of extra disk args (strings), see _GetDiskArgs
1093 for example, if None, will create a disk using the given
1094 image.
herbertxue308f7662018-05-18 03:25:58 +00001095 image_project: String, name of the project where the image
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001096 belongs. Assume the default project if None.
herbertxue308f7662018-05-18 03:25:58 +00001097 gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001098 None no gpus will be attached. For more details see:
Kevin Chengb5963882018-05-09 00:06:27 -07001099 https://cloud.google.com/compute/docs/gpus/add-gpus
herbertxue308f7662018-05-18 03:25:58 +00001100 extra_disk_name: String,the name of the extra disk to attach.
Sam Chiu56c58892018-10-25 09:53:19 +08001101 labels: Dict, will be added to the instance's labels.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001102 """
herbertxue308f7662018-05-18 03:25:58 +00001103 disk_args = (disk_args
1104 or self._GetDiskArgs(instance, image_name, image_project))
1105 if extra_disk_name:
1106 disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001107 body = {
1108 "machineType": self.GetMachineType(machine_type, zone)["selfLink"],
1109 "name": instance,
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001110 "networkInterfaces": [self._GetNetworkArgs(network, zone)],
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001111 "disks": disk_args,
herbertxue308f7662018-05-18 03:25:58 +00001112 "serviceAccounts": [{
1113 "email": "default",
1114 "scopes": self.DEFAULT_INSTANCE_SCOPE
1115 }],
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001116 }
1117
Sam Chiu56c58892018-10-25 09:53:19 +08001118 if labels is not None:
1119 body["labels"] = labels
Kevin Chengb5963882018-05-09 00:06:27 -07001120 if gpu:
1121 body["guestAccelerators"] = [{
1122 "acceleratorType": self.GetAcceleratorUrl(gpu, zone),
1123 "acceleratorCount": 1
1124 }]
1125 # Instances with GPUs cannot live migrate because they are assigned
1126 # to specific hardware devices.
1127 body["scheduling"] = {"onHostMaintenance": "terminate"}
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001128 if metadata:
herbertxue308f7662018-05-18 03:25:58 +00001129 metadata_list = [{
1130 "key": key,
1131 "value": val
1132 } for key, val in metadata.iteritems()]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001133 body["metadata"] = {"items": metadata_list}
1134 logger.info("Creating instance: project %s, zone %s, body:%s",
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001135 self._project, zone, body)
herbertxue308f7662018-05-18 03:25:58 +00001136 api = self.service.instances().insert(
1137 project=self._project, zone=zone, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001138 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001139 self.WaitOnOperation(
1140 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001141 logger.info("Instance %s has been created.", instance)
1142
1143 def DeleteInstance(self, instance, zone):
1144 """Delete a gce instance.
1145
1146 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001147 instance: A string, instance name.
1148 zone: A string, e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001149 """
1150 logger.info("Deleting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001151 api = self.service.instances().delete(
1152 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001153 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001154 self.WaitOnOperation(
1155 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001156 logger.info("Deleted instance: %s", instance)
1157
1158 def DeleteInstances(self, instances, zone):
1159 """Delete multiple instances.
1160
1161 Args:
1162 instances: A list of instance names.
1163 zone: A string, e.g. "us-central1-f".
1164
1165 Returns:
1166 A tuple, (deleted, failed, error_msgs)
1167 deleted: A list of names of instances that have been deleted.
1168 failed: A list of names of instances that we fail to delete.
1169 error_msgs: A list of failure messages.
1170 """
herbertxue308f7662018-05-18 03:25:58 +00001171 action = functools.partial(
1172 self.service.instances().delete, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001173 return self._BatchExecuteOnInstances(instances, zone, action)
1174
1175 def ResetInstance(self, instance, zone):
1176 """Reset the gce instance.
1177
1178 Args:
1179 instance: A string, instance name.
1180 zone: A string, e.g. "us-central1-f".
1181 """
1182 logger.info("Resetting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001183 api = self.service.instances().reset(
1184 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001185 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001186 self.WaitOnOperation(
1187 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001188 logger.info("Instance has been reset: %s", instance)
1189
1190 def GetMachineType(self, machine_type, zone):
1191 """Get URL for a given machine typle.
1192
1193 Args:
1194 machine_type: A string, name of the machine type.
1195 zone: A string, e.g. "us-central1-f"
1196
1197 Returns:
1198 A machine type resource in json.
1199 https://cloud.google.com/compute/docs/reference/latest/
1200 machineTypes#resource
1201 """
herbertxue308f7662018-05-18 03:25:58 +00001202 api = self.service.machineTypes().get(
1203 project=self._project, zone=zone, machineType=machine_type)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001204 return self.Execute(api)
1205
Kevin Chengb5963882018-05-09 00:06:27 -07001206 def GetAcceleratorUrl(self, accelerator_type, zone):
1207 """Get URL for a given type of accelator.
1208
1209 Args:
1210 accelerator_type: A string, representing the accelerator, e.g
1211 "nvidia-tesla-k80"
1212 zone: A string representing a zone, e.g. "us-west1-b"
1213
1214 Returns:
1215 A URL that points to the accelerator resource, e.g.
1216 https://www.googleapis.com/compute/v1/projects/<project id>/zones/
1217 us-west1-b/acceleratorTypes/nvidia-tesla-k80
1218 """
herbertxue308f7662018-05-18 03:25:58 +00001219 api = self.service.acceleratorTypes().get(
1220 project=self._project, zone=zone, acceleratorType=accelerator_type)
Kevin Chengb5963882018-05-09 00:06:27 -07001221 result = self.Execute(api)
1222 return result["selfLink"]
1223
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001224 def GetNetworkUrl(self, network):
1225 """Get URL for a given network.
1226
1227 Args:
1228 network: A string, representing network name, e.g "default"
1229
1230 Returns:
1231 A URL that points to the network resource, e.g.
1232 https://www.googleapis.com/compute/v1/projects/<project id>/
1233 global/networks/default
1234 """
herbertxue308f7662018-05-18 03:25:58 +00001235 api = self.service.networks().get(
1236 project=self._project, network=network)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001237 result = self.Execute(api)
1238 return result["selfLink"]
1239
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001240 def GetSubnetworkUrl(self, network, zone):
1241 """Get URL for a given network and zone.
1242
1243 Return the subnetwork for the network in the specified region that the
1244 specified zone resides in. If there is no subnetwork for the specified
1245 zone, raise an exception.
1246
1247 Args:
1248 network: A string, representing network name, e.g "default"
1249 zone: String, representing zone name, e.g. "us-central1-f"
1250
1251 Returns:
1252 A URL that points to the network resource, e.g.
1253 https://www.googleapis.com/compute/v1/projects/<project id>/
1254 global/networks/default
1255
1256 Raises:
1257 errors.NoSubnetwork: When no subnetwork exists for the zone
1258 specified.
1259 """
1260 api = self.service.networks().get(
1261 project=self._project, network=network)
1262 result = self.Execute(api)
1263 region = zone.rsplit("-", 1)[0]
1264 for subnetwork in result["subnetworks"]:
1265 if region in subnetwork:
1266 return subnetwork
1267 raise errors.NoSubnetwork("No subnetwork for network %s in region %s" %
1268 (network, region))
1269
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001270 def CompareMachineSize(self, machine_type_1, machine_type_2, zone):
1271 """Compare the size of two machine types.
1272
1273 Args:
1274 machine_type_1: A string representing a machine type, e.g. n1-standard-1
1275 machine_type_2: A string representing a machine type, e.g. n1-standard-1
1276 zone: A string representing a zone, e.g. "us-central1-f"
1277
1278 Returns:
Kevin Cheng4ae42772018-10-02 11:39:48 -07001279 -1 if any metric of machine size of the first type is smaller than
1280 the second type.
1281 0 if all metrics of machine size are equal.
1282 1 if at least one metric of machine size of the first type is
1283 greater than the second type and all metrics of first type are
1284 greater or equal to the second type.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001285
1286 Raises:
1287 errors.DriverError: For malformed response.
1288 """
1289 machine_info_1 = self.GetMachineType(machine_type_1, zone)
1290 machine_info_2 = self.GetMachineType(machine_type_2, zone)
Kevin Cheng4ae42772018-10-02 11:39:48 -07001291 result = 0
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001292 for metric in self.MACHINE_SIZE_METRICS:
1293 if metric not in machine_info_1 or metric not in machine_info_2:
1294 raise errors.DriverError(
1295 "Malformed machine size record: Can't find '%s' in %s or %s"
1296 % (metric, machine_info_1, machine_info_2))
Kevin Cheng4ae42772018-10-02 11:39:48 -07001297 cmp_result = machine_info_1[metric] - machine_info_2[metric]
1298 if cmp_result < 0:
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001299 return -1
Kevin Cheng4ae42772018-10-02 11:39:48 -07001300 elif cmp_result > 0:
1301 result = 1
1302 return result
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001303
1304 def GetSerialPortOutput(self, instance, zone, port=1):
1305 """Get serial port output.
1306
1307 Args:
1308 instance: string, instance name.
1309 zone: string, zone name.
1310 port: int, which COM port to read from, 1-4, default to 1.
1311
1312 Returns:
1313 String, contents of the output.
1314
1315 Raises:
1316 errors.DriverError: For malformed response.
1317 """
1318 api = self.service.instances().getSerialPortOutput(
herbertxue308f7662018-05-18 03:25:58 +00001319 project=self._project, zone=zone, instance=instance, port=port)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001320 result = self.Execute(api)
1321 if "contents" not in result:
1322 raise errors.DriverError(
1323 "Malformed response for GetSerialPortOutput: %s" % result)
1324 return result["contents"]
1325
1326 def GetInstanceNamesByIPs(self, ips, zone):
1327 """Get Instance names by IPs.
1328
1329 This function will go through all instances, which
1330 could be slow if there are too many instances. However, currently
1331 GCE doesn't support search for instance by IP.
1332
1333 Args:
1334 ips: A set of IPs.
1335 zone: String, name of the zone.
1336
1337 Returns:
1338 A dictionary where key is IP and value is instance name or None
1339 if instance is not found for the given IP.
1340 """
1341 ip_name_map = dict.fromkeys(ips)
1342 for instance in self.ListInstances(zone):
1343 try:
1344 ip = instance["networkInterfaces"][0]["accessConfigs"][0][
1345 "natIP"]
1346 if ip in ips:
1347 ip_name_map[ip] = instance["name"]
1348 except (IndexError, KeyError) as e:
1349 logger.error("Could not get instance names by ips: %s", str(e))
1350 return ip_name_map
1351
1352 def GetInstanceIP(self, instance, zone):
1353 """Get Instance IP given instance name.
1354
1355 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001356 instance: String, representing instance name.
1357 zone: String, name of the zone.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001358
1359 Returns:
Kevin Cheng86d43c72018-08-30 10:59:14 -07001360 NamedTuple of (internal, external) IP of the instance.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001361 """
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001362 instance = self.GetInstance(instance, zone)
Kevin Cheng86d43c72018-08-30 10:59:14 -07001363 internal_ip = instance["networkInterfaces"][0]["networkIP"]
1364 external_ip = instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"]
1365 return IP(internal=internal_ip, external=external_ip)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001366
1367 def SetCommonInstanceMetadata(self, body):
1368 """Set project-wide metadata.
1369
1370 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001371 body: Metadata body.
1372 metdata is in the following format.
1373 {
1374 "kind": "compute#metadata",
1375 "fingerprint": "a-23icsyx4E=",
1376 "items": [
1377 {
1378 "key": "google-compute-default-region",
1379 "value": "us-central1"
1380 }, ...
1381 ]
1382 }
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001383 """
1384 api = self.service.projects().setCommonInstanceMetadata(
1385 project=self._project, body=body)
1386 operation = self.Execute(api)
1387 self.WaitOnOperation(operation, operation_scope=OperationScope.GLOBAL)
1388
1389 def AddSshRsa(self, user, ssh_rsa_path):
1390 """Add the public rsa key to the project's metadata.
1391
1392 Compute engine instances that are created after will
1393 by default contain the key.
1394
1395 Args:
1396 user: the name of the user which the key belongs to.
1397 ssh_rsa_path: The absolute path to public rsa key.
1398 """
1399 if not os.path.exists(ssh_rsa_path):
herbertxue308f7662018-05-18 03:25:58 +00001400 raise errors.DriverError(
1401 "RSA file %s does not exist." % ssh_rsa_path)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001402
1403 logger.info("Adding ssh rsa key from %s to project %s for user: %s",
1404 ssh_rsa_path, self._project, user)
1405 project = self.GetProject()
1406 with open(ssh_rsa_path) as f:
1407 rsa = f.read()
1408 rsa = rsa.strip() if rsa else rsa
1409 utils.VerifyRsaPubKey(rsa)
1410 metadata = project["commonInstanceMetadata"]
1411 for item in metadata.setdefault("items", []):
1412 if item["key"] == "sshKeys":
1413 sshkey_item = item
1414 break
1415 else:
1416 sshkey_item = {"key": "sshKeys", "value": ""}
1417 metadata["items"].append(sshkey_item)
1418
1419 entry = "%s:%s" % (user, rsa)
1420 logger.debug("New RSA entry: %s", entry)
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001421 sshkey_item["value"] = "\n".join([sshkey_item["value"].strip(),
1422 entry]).strip()
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001423 self.SetCommonInstanceMetadata(metadata)
Fang Dengcef4b112017-03-02 11:20:17 -08001424
1425 def CheckAccess(self):
1426 """Check if the user has read access to the cloud project.
1427
1428 Returns:
1429 True if the user has at least read access to the project.
1430 False otherwise.
1431
1432 Raises:
1433 errors.HttpError if other unexpected error happens when
1434 accessing the project.
1435 """
1436 api = self.service.zones().list(project=self._project)
1437 retry_http_codes = copy.copy(self.RETRY_HTTP_CODES)
1438 retry_http_codes.remove(self.ACCESS_DENIED_CODE)
1439 try:
1440 self.Execute(api, retry_http_codes=retry_http_codes)
1441 except errors.HttpError as e:
1442 if e.code == self.ACCESS_DENIED_CODE:
1443 return False
1444 raise
1445 return True