blob: cd30763e558ad70526d1005d3072465cca2249f7 [file] [log] [blame]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001#!/usr/bin/env python
2#
3# Copyright 2016 - The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070016"""A client that manages Google Compute Engine.
17
18** ComputeClient **
19
20ComputeClient is a wrapper around Google Compute Engine APIs.
21It provides a set of methods for managing a google compute engine project,
22such as creating images, creating instances, etc.
23
24Design philosophy: We tried to make ComputeClient as stateless as possible,
25and it only keeps states about authentication. ComputeClient should be very
26generic, and only knows how to talk to Compute Engine APIs.
27"""
Kevin Cheng5c124ec2018-05-16 13:28:51 -070028# pylint: disable=too-many-lines
Kevin Cheng86d43c72018-08-30 10:59:14 -070029import collections
Fang Dengcef4b112017-03-02 11:20:17 -080030import copy
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070031import functools
32import logging
33import os
34
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070035from acloud.internal.lib import base_cloud_client
36from acloud.internal.lib import utils
37from acloud.public import errors
38
39logger = logging.getLogger(__name__)
40
Kevin Chengb5963882018-05-09 00:06:27 -070041_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10
42
Kevin Cheng5c124ec2018-05-16 13:28:51 -070043BASE_DISK_ARGS = {
44 "type": "PERSISTENT",
45 "boot": True,
46 "mode": "READ_WRITE",
47 "autoDelete": True,
48 "initializeParams": {},
49}
50
Kevin Cheng86d43c72018-08-30 10:59:14 -070051IP = collections.namedtuple("IP", ["external", "internal"])
52
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070053
54class OperationScope(object):
55 """Represents operation scope enum."""
56 ZONE = "zone"
57 REGION = "region"
58 GLOBAL = "global"
59
60
Kevin Chengb5963882018-05-09 00:06:27 -070061class PersistentDiskType(object):
62 """Represents different persistent disk types.
63
64 pd-standard for regular hard disk.
65 pd-ssd for solid state disk.
66 """
67 STANDARD = "pd-standard"
68 SSD = "pd-ssd"
69
70
71class ImageStatus(object):
72 """Represents the status of an image."""
73 PENDING = "PENDING"
74 READY = "READY"
75 FAILED = "FAILED"
76
77
78def _IsFingerPrintError(exc):
79 """Determine if the exception is a HTTP error with code 412.
80
81 Args:
82 exc: Exception instance.
83
84 Returns:
85 Boolean. True if the exception is a "Precondition Failed" error.
86 """
87 return isinstance(exc, errors.HttpError) and exc.code == 412
88
89
Kevin Cheng5c124ec2018-05-16 13:28:51 -070090# pylint: disable=too-many-public-methods
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070091class ComputeClient(base_cloud_client.BaseCloudApiClient):
92 """Client that manages GCE."""
93
94 # API settings, used by BaseCloudApiClient.
95 API_NAME = "compute"
96 API_VERSION = "v1"
herbertxue308f7662018-05-18 03:25:58 +000097 SCOPE = " ".join([
98 "https://www.googleapis.com/auth/compute",
99 "https://www.googleapis.com/auth/devstorage.read_write"
100 ])
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700101 # Default settings for gce operations
102 DEFAULT_INSTANCE_SCOPE = [
103 "https://www.googleapis.com/auth/devstorage.read_only",
104 "https://www.googleapis.com/auth/logging.write"
105 ]
Kevin Chengb5963882018-05-09 00:06:27 -0700106 OPERATION_TIMEOUT_SECS = 30 * 60 # 30 mins
107 OPERATION_POLL_INTERVAL_SECS = 20
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700108 MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"]
Fang Dengcef4b112017-03-02 11:20:17 -0800109 ACCESS_DENIED_CODE = 403
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700110
111 def __init__(self, acloud_config, oauth2_credentials):
112 """Initialize.
113
114 Args:
115 acloud_config: An AcloudConfig object.
116 oauth2_credentials: An oauth2client.OAuth2Credentials instance.
117 """
118 super(ComputeClient, self).__init__(oauth2_credentials)
119 self._project = acloud_config.project
120
121 def _GetOperationStatus(self, operation, operation_scope, scope_name=None):
122 """Get status of an operation.
123
124 Args:
125 operation: An Operation resource in the format of json.
126 operation_scope: A value from OperationScope, "zone", "region",
127 or "global".
128 scope_name: If operation_scope is "zone" or "region", this should be
129 the name of the zone or region, e.g. "us-central1-f".
130
131 Returns:
132 Status of the operation, one of "DONE", "PENDING", "RUNNING".
133
134 Raises:
135 errors.DriverError: if the operation fails.
136 """
137 operation_name = operation["name"]
138 if operation_scope == OperationScope.GLOBAL:
herbertxue308f7662018-05-18 03:25:58 +0000139 api = self.service.globalOperations().get(
140 project=self._project, operation=operation_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700141 result = self.Execute(api)
142 elif operation_scope == OperationScope.ZONE:
herbertxue308f7662018-05-18 03:25:58 +0000143 api = self.service.zoneOperations().get(
144 project=self._project,
145 operation=operation_name,
146 zone=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700147 result = self.Execute(api)
148 elif operation_scope == OperationScope.REGION:
herbertxue308f7662018-05-18 03:25:58 +0000149 api = self.service.regionOperations().get(
150 project=self._project,
151 operation=operation_name,
152 region=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700153 result = self.Execute(api)
154
155 if result.get("error"):
156 errors_list = result["error"]["errors"]
herbertxue308f7662018-05-18 03:25:58 +0000157 raise errors.DriverError(
158 "Get operation state failed, errors: %s" % str(errors_list))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700159 return result["status"]
160
161 def WaitOnOperation(self, operation, operation_scope, scope_name=None):
162 """Wait for an operation to finish.
163
164 Args:
165 operation: An Operation resource in the format of json.
166 operation_scope: A value from OperationScope, "zone", "region",
167 or "global".
168 scope_name: If operation_scope is "zone" or "region", this should be
169 the name of the zone or region, e.g. "us-central1-f".
170 """
171 timeout_exception = errors.GceOperationTimeoutError(
172 "Operation hits timeout, did not complete within %d secs." %
173 self.OPERATION_TIMEOUT_SECS)
174 utils.PollAndWait(
175 func=self._GetOperationStatus,
176 expected_return="DONE",
177 timeout_exception=timeout_exception,
178 timeout_secs=self.OPERATION_TIMEOUT_SECS,
179 sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS,
180 operation=operation,
181 operation_scope=operation_scope,
182 scope_name=scope_name)
183
184 def GetProject(self):
185 """Get project information.
186
187 Returns:
188 A project resource in json.
189 """
190 api = self.service.projects().get(project=self._project)
191 return self.Execute(api)
192
193 def GetDisk(self, disk_name, zone):
194 """Get disk information.
195
196 Args:
197 disk_name: A string.
198 zone: String, name of zone.
199
200 Returns:
201 An disk resource in json.
202 https://cloud.google.com/compute/docs/reference/latest/disks#resource
203 """
herbertxue308f7662018-05-18 03:25:58 +0000204 api = self.service.disks().get(
205 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700206 return self.Execute(api)
207
208 def CheckDiskExists(self, disk_name, zone):
209 """Check if disk exists.
210
211 Args:
212 disk_name: A string
213 zone: String, name of zone.
214
215 Returns:
216 True if disk exists, otherwise False.
217 """
218 try:
219 self.GetDisk(disk_name, zone)
220 exists = True
221 except errors.ResourceNotFoundError:
222 exists = False
223 logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name,
224 exists)
225 return exists
226
herbertxue308f7662018-05-18 03:25:58 +0000227 def CreateDisk(self,
228 disk_name,
229 source_image,
230 size_gb,
231 zone,
232 source_project=None,
233 disk_type=PersistentDiskType.STANDARD):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700234 """Create a gce disk.
235
236 Args:
herbertxue308f7662018-05-18 03:25:58 +0000237 disk_name: String
238 source_image: String, name of the image.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700239 size_gb: Integer, size in gb.
herbertxue308f7662018-05-18 03:25:58 +0000240 zone: String, name of the zone, e.g. us-central1-b.
Kevin Chengb5963882018-05-09 00:06:27 -0700241 source_project: String, required if the image is located in a different
242 project.
243 disk_type: String, a value from PersistentDiskType, STANDARD
244 for regular hard disk or SSD for solid state disk.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700245 """
Kevin Chengb5963882018-05-09 00:06:27 -0700246 source_project = source_project or self._project
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700247 source_image = "projects/%s/global/images/%s" % (
Kevin Chengb5963882018-05-09 00:06:27 -0700248 source_project, source_image) if source_image else None
249 logger.info("Creating disk %s, size_gb: %d, source_image: %s",
250 disk_name, size_gb, str(source_image))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700251 body = {
252 "name": disk_name,
253 "sizeGb": size_gb,
herbertxue308f7662018-05-18 03:25:58 +0000254 "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone,
255 disk_type),
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700256 }
herbertxue308f7662018-05-18 03:25:58 +0000257 api = self.service.disks().insert(
258 project=self._project,
259 sourceImage=source_image,
260 zone=zone,
261 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700262 operation = self.Execute(api)
263 try:
herbertxue308f7662018-05-18 03:25:58 +0000264 self.WaitOnOperation(
265 operation=operation,
266 operation_scope=OperationScope.ZONE,
267 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700268 except errors.DriverError:
269 logger.error("Creating disk failed, cleaning up: %s", disk_name)
270 if self.CheckDiskExists(disk_name, zone):
271 self.DeleteDisk(disk_name, zone)
272 raise
273 logger.info("Disk %s has been created.", disk_name)
274
275 def DeleteDisk(self, disk_name, zone):
276 """Delete a gce disk.
277
278 Args:
279 disk_name: A string, name of disk.
280 zone: A string, name of zone.
281 """
282 logger.info("Deleting disk %s", disk_name)
herbertxue308f7662018-05-18 03:25:58 +0000283 api = self.service.disks().delete(
284 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700285 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000286 self.WaitOnOperation(
287 operation=operation,
288 operation_scope=OperationScope.ZONE,
289 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700290 logger.info("Deleted disk %s", disk_name)
291
292 def DeleteDisks(self, disk_names, zone):
293 """Delete multiple disks.
294
295 Args:
296 disk_names: A list of disk names.
297 zone: A string, name of zone.
298
299 Returns:
300 A tuple, (deleted, failed, error_msgs)
301 deleted: A list of names of disks that have been deleted.
302 failed: A list of names of disks that we fail to delete.
303 error_msgs: A list of failure messages.
304 """
305 if not disk_names:
306 logger.warn("Nothing to delete. Arg disk_names is not provided.")
307 return [], [], []
308 # Batch send deletion requests.
309 logger.info("Deleting disks: %s", disk_names)
310 delete_requests = {}
311 for disk_name in set(disk_names):
herbertxue308f7662018-05-18 03:25:58 +0000312 request = self.service.disks().delete(
313 project=self._project, disk=disk_name, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700314 delete_requests[disk_name] = request
herbertxue308f7662018-05-18 03:25:58 +0000315 return self._BatchExecuteAndWait(
316 delete_requests, OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700317
318 def ListDisks(self, zone, disk_filter=None):
319 """List disks.
320
321 Args:
322 zone: A string, representing zone name. e.g. "us-central1-f"
323 disk_filter: A string representing a filter in format of
324 FIELD_NAME COMPARISON_STRING LITERAL_STRING
325 e.g. "name ne example-instance"
326 e.g. "name eq "example-instance-[0-9]+""
327
328 Returns:
329 A list of disks.
330 """
herbertxue308f7662018-05-18 03:25:58 +0000331 return self.ListWithMultiPages(
332 api_resource=self.service.disks().list,
333 project=self._project,
334 zone=zone,
335 filter=disk_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700336
herbertxue308f7662018-05-18 03:25:58 +0000337 def CreateImage(self,
338 image_name,
339 source_uri=None,
340 source_disk=None,
Kevin Chengb5963882018-05-09 00:06:27 -0700341 labels=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700342 """Create a Gce image.
343
344 Args:
herbertxue308f7662018-05-18 03:25:58 +0000345 image_name: String, name of image
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700346 source_uri: Full Google Cloud Storage URL where the disk image is
Kevin Chengb5963882018-05-09 00:06:27 -0700347 stored. e.g. "https://storage.googleapis.com/my-bucket/
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700348 avd-system-2243663.tar.gz"
Kevin Chengb5963882018-05-09 00:06:27 -0700349 source_disk: String, this should be the disk's selfLink value
350 (including zone and project), rather than the disk_name
351 e.g. https://www.googleapis.com/compute/v1/projects/
352 google.com:android-builds-project/zones/
353 us-east1-d/disks/<disk_name>
354 labels: Dict, will be added to the image's labels.
355
Kevin Chengb5963882018-05-09 00:06:27 -0700356 Raises:
357 errors.DriverError: For malformed request or response.
358 errors.GceOperationTimeoutError: Operation takes too long to finish.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700359 """
herbertxue308f7662018-05-18 03:25:58 +0000360 if self.CheckImageExists(image_name):
361 return
362 if (source_uri and source_disk) or (not source_uri
363 and not source_disk):
Kevin Chengb5963882018-05-09 00:06:27 -0700364 raise errors.DriverError(
365 "Creating image %s requires either source_uri %s or "
herbertxue308f7662018-05-18 03:25:58 +0000366 "source_disk %s but not both" % (image_name, source_uri,
367 source_disk))
Kevin Chengb5963882018-05-09 00:06:27 -0700368 elif source_uri:
herbertxue308f7662018-05-18 03:25:58 +0000369 logger.info("Creating image %s, source_uri %s", image_name,
370 source_uri)
Kevin Chengb5963882018-05-09 00:06:27 -0700371 body = {
372 "name": image_name,
373 "rawDisk": {
374 "source": source_uri,
375 },
376 }
377 else:
herbertxue308f7662018-05-18 03:25:58 +0000378 logger.info("Creating image %s, source_disk %s", image_name,
379 source_disk)
Kevin Chengb5963882018-05-09 00:06:27 -0700380 body = {
381 "name": image_name,
382 "sourceDisk": source_disk,
383 }
384 if labels is not None:
385 body["labels"] = labels
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700386 api = self.service.images().insert(project=self._project, body=body)
387 operation = self.Execute(api)
388 try:
herbertxue308f7662018-05-18 03:25:58 +0000389 self.WaitOnOperation(
390 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700391 except errors.DriverError:
392 logger.error("Creating image failed, cleaning up: %s", image_name)
393 if self.CheckImageExists(image_name):
394 self.DeleteImage(image_name)
395 raise
396 logger.info("Image %s has been created.", image_name)
397
Kevin Chengb5963882018-05-09 00:06:27 -0700398 @utils.RetryOnException(_IsFingerPrintError,
399 _MAX_RETRIES_ON_FINGERPRINT_CONFLICT)
400 def SetImageLabels(self, image_name, new_labels):
401 """Update image's labels. Retry for finger print conflict.
402
403 Note: Decorator RetryOnException will retry the call for FingerPrint
404 conflict (HTTP error code 412). The fingerprint is used to detect
405 conflicts of GCE resource updates. The fingerprint is initially generated
406 by Compute Engine and changes after every request to modify or update
407 resources (e.g. GCE "image" resource has "fingerPrint" for "labels"
408 updates).
409
410 Args:
411 image_name: A string, the image name.
412 new_labels: Dict, will be added to the image's labels.
413
414 Returns:
415 A GlobalOperation resouce.
416 https://cloud.google.com/compute/docs/reference/latest/globalOperations
417 """
418 image = self.GetImage(image_name)
419 labels = image.get("labels", {})
420 labels.update(new_labels)
421 body = {
422 "labels": labels,
423 "labelFingerprint": image["labelFingerprint"]
424 }
herbertxue308f7662018-05-18 03:25:58 +0000425 api = self.service.images().setLabels(
426 project=self._project, resource=image_name, body=body)
Kevin Chengb5963882018-05-09 00:06:27 -0700427 return self.Execute(api)
428
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700429 def CheckImageExists(self, image_name):
430 """Check if image exists.
431
432 Args:
433 image_name: A string
434
435 Returns:
436 True if image exists, otherwise False.
437 """
438 try:
439 self.GetImage(image_name)
440 exists = True
441 except errors.ResourceNotFoundError:
442 exists = False
443 logger.debug("CheckImageExists: image_name: %s, result: %s",
444 image_name, exists)
445 return exists
446
Kevin Chengb5963882018-05-09 00:06:27 -0700447 def GetImage(self, image_name, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700448 """Get image information.
449
450 Args:
451 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -0700452 image_project: A string
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700453
454 Returns:
455 An image resource in json.
456 https://cloud.google.com/compute/docs/reference/latest/images#resource
457 """
herbertxue308f7662018-05-18 03:25:58 +0000458 api = self.service.images().get(
459 project=image_project or self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700460 return self.Execute(api)
461
462 def DeleteImage(self, image_name):
463 """Delete an image.
464
465 Args:
466 image_name: A string
467 """
468 logger.info("Deleting image %s", image_name)
herbertxue308f7662018-05-18 03:25:58 +0000469 api = self.service.images().delete(
470 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700471 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000472 self.WaitOnOperation(
473 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700474 logger.info("Deleted image %s", image_name)
475
476 def DeleteImages(self, image_names):
477 """Delete multiple images.
478
479 Args:
480 image_names: A list of image names.
481
482 Returns:
483 A tuple, (deleted, failed, error_msgs)
484 deleted: A list of names of images that have been deleted.
485 failed: A list of names of images that we fail to delete.
486 error_msgs: A list of failure messages.
487 """
488 if not image_names:
489 return [], [], []
490 # Batch send deletion requests.
491 logger.info("Deleting images: %s", image_names)
492 delete_requests = {}
493 for image_name in set(image_names):
herbertxue308f7662018-05-18 03:25:58 +0000494 request = self.service.images().delete(
495 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700496 delete_requests[image_name] = request
497 return self._BatchExecuteAndWait(delete_requests,
498 OperationScope.GLOBAL)
499
Kevin Chengb5963882018-05-09 00:06:27 -0700500 def ListImages(self, image_filter=None, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700501 """List images.
502
503 Args:
504 image_filter: A string representing a filter in format of
505 FIELD_NAME COMPARISON_STRING LITERAL_STRING
506 e.g. "name ne example-image"
507 e.g. "name eq "example-image-[0-9]+""
Kevin Chengb5963882018-05-09 00:06:27 -0700508 image_project: String. If not provided, will list images from the default
509 project. Otherwise, will list images from the given
510 project, which can be any arbitrary project where the
511 account has read access
512 (i.e. has the role "roles/compute.imageUser")
513
514 Read more about image sharing across project:
515 https://cloud.google.com/compute/docs/images/sharing-images-across-projects
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700516
517 Returns:
518 A list of images.
519 """
herbertxue308f7662018-05-18 03:25:58 +0000520 return self.ListWithMultiPages(
521 api_resource=self.service.images().list,
522 project=image_project or self._project,
523 filter=image_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700524
525 def GetInstance(self, instance, zone):
526 """Get information about an instance.
527
528 Args:
529 instance: A string, representing instance name.
530 zone: A string, representing zone name. e.g. "us-central1-f"
531
532 Returns:
533 An instance resource in json.
534 https://cloud.google.com/compute/docs/reference/latest/instances#resource
535 """
herbertxue308f7662018-05-18 03:25:58 +0000536 api = self.service.instances().get(
537 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700538 return self.Execute(api)
539
Kevin Chengb5963882018-05-09 00:06:27 -0700540 def AttachAccelerator(self, instance, zone, accelerator_count,
541 accelerator_type):
542 """Attach a GPU accelerator to the instance.
543
544 Note: In order for this to succeed the following must hold:
545 - The machine schedule must be set to "terminate" i.e:
546 SetScheduling(self, instance, zone, on_host_maintenance="terminate")
547 must have been called.
548 - The machine is not starting or running. i.e.
549 StopInstance(self, instance) must have been called.
550
551 Args:
552 instance: A string, representing instance name.
553 zone: String, name of zone.
554 accelerator_count: The number accelerators to be attached to the instance.
555 a value of 0 will detach all accelerators.
556 accelerator_type: The type of accelerator to attach. e.g.
557 "nvidia-tesla-k80"
558 """
559 body = {
560 "guestAccelerators": [{
herbertxue308f7662018-05-18 03:25:58 +0000561 "acceleratorType":
562 self.GetAcceleratorUrl(accelerator_type, zone),
563 "acceleratorCount":
564 accelerator_count
Kevin Chengb5963882018-05-09 00:06:27 -0700565 }]
566 }
567 api = self.service.instances().setMachineResources(
568 project=self._project, zone=zone, instance=instance, body=body)
569 operation = self.Execute(api)
570 try:
571 self.WaitOnOperation(
572 operation=operation,
573 operation_scope=OperationScope.ZONE,
574 scope_name=zone)
575 except errors.GceOperationTimeoutError:
576 logger.error("Attach instance failed: %s", instance)
577 raise
herbertxue308f7662018-05-18 03:25:58 +0000578 logger.info("%d x %s have been attached to instance %s.",
579 accelerator_count, accelerator_type, instance)
Kevin Chengb5963882018-05-09 00:06:27 -0700580
581 def AttachDisk(self, instance, zone, **kwargs):
582 """Attach the external disk to the instance.
583
584 Args:
585 instance: A string, representing instance name.
586 zone: String, name of zone.
587 **kwargs: The attachDisk request body. See "https://cloud.google.com/
588 compute/docs/reference/latest/instances/attachDisk" for detail.
589 {
590 "kind": "compute#attachedDisk",
591 "type": string,
592 "mode": string,
593 "source": string,
594 "deviceName": string,
595 "index": integer,
596 "boot": boolean,
597 "initializeParams": {
598 "diskName": string,
599 "sourceImage": string,
600 "diskSizeGb": long,
601 "diskType": string,
602 "sourceImageEncryptionKey": {
603 "rawKey": string,
604 "sha256": string
605 }
606 },
607 "autoDelete": boolean,
608 "licenses": [
609 string
610 ],
611 "interface": string,
612 "diskEncryptionKey": {
613 "rawKey": string,
614 "sha256": string
615 }
616 }
617
618 Returns:
619 An disk resource in json.
620 https://cloud.google.com/compute/docs/reference/latest/disks#resource
621
622
623 Raises:
624 errors.GceOperationTimeoutError: Operation takes too long to finish.
625 """
626 api = self.service.instances().attachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000627 project=self._project, zone=zone, instance=instance, body=kwargs)
Kevin Chengb5963882018-05-09 00:06:27 -0700628 operation = self.Execute(api)
629 try:
630 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000631 operation=operation,
632 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700633 scope_name=zone)
634 except errors.GceOperationTimeoutError:
635 logger.error("Attach instance failed: %s", instance)
636 raise
637 logger.info("Disk has been attached to instance %s.", instance)
638
639 def DetachDisk(self, instance, zone, disk_name):
640 """Attach the external disk to the instance.
641
642 Args:
643 instance: A string, representing instance name.
644 zone: String, name of zone.
645 disk_name: A string, the name of the detach disk.
646
647 Returns:
648 A ZoneOperation resource.
649 See https://cloud.google.com/compute/docs/reference/latest/zoneOperations
650
651 Raises:
652 errors.GceOperationTimeoutError: Operation takes too long to finish.
653 """
654 api = self.service.instances().detachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000655 project=self._project,
656 zone=zone,
657 instance=instance,
Kevin Chengb5963882018-05-09 00:06:27 -0700658 deviceName=disk_name)
659 operation = self.Execute(api)
660 try:
661 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000662 operation=operation,
663 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700664 scope_name=zone)
665 except errors.GceOperationTimeoutError:
666 logger.error("Detach instance failed: %s", instance)
667 raise
668 logger.info("Disk has been detached to instance %s.", instance)
669
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700670 def StartInstance(self, instance, zone):
671 """Start |instance| in |zone|.
672
673 Args:
674 instance: A string, representing instance name.
675 zone: A string, representing zone name. e.g. "us-central1-f"
676
677 Raises:
678 errors.GceOperationTimeoutError: Operation takes too long to finish.
679 """
herbertxue308f7662018-05-18 03:25:58 +0000680 api = self.service.instances().start(
681 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700682 operation = self.Execute(api)
683 try:
herbertxue308f7662018-05-18 03:25:58 +0000684 self.WaitOnOperation(
685 operation=operation,
686 operation_scope=OperationScope.ZONE,
687 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700688 except errors.GceOperationTimeoutError:
689 logger.error("Start instance failed: %s", instance)
690 raise
691 logger.info("Instance %s has been started.", instance)
692
693 def StartInstances(self, instances, zone):
694 """Start |instances| in |zone|.
695
696 Args:
697 instances: A list of strings, representing instance names's list.
698 zone: A string, representing zone name. e.g. "us-central1-f"
699
700 Returns:
701 A tuple, (done, failed, error_msgs)
702 done: A list of string, representing the names of instances that
703 have been executed.
704 failed: A list of string, representing the names of instances that
705 we failed to execute.
706 error_msgs: A list of string, representing the failure messages.
707 """
herbertxue308f7662018-05-18 03:25:58 +0000708 action = functools.partial(
709 self.service.instances().start, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700710 return self._BatchExecuteOnInstances(instances, zone, action)
711
712 def StopInstance(self, instance, zone):
713 """Stop |instance| in |zone|.
714
715 Args:
716 instance: A string, representing instance name.
717 zone: A string, representing zone name. e.g. "us-central1-f"
718
719 Raises:
720 errors.GceOperationTimeoutError: Operation takes too long to finish.
721 """
herbertxue308f7662018-05-18 03:25:58 +0000722 api = self.service.instances().stop(
723 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700724 operation = self.Execute(api)
725 try:
herbertxue308f7662018-05-18 03:25:58 +0000726 self.WaitOnOperation(
727 operation=operation,
728 operation_scope=OperationScope.ZONE,
729 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700730 except errors.GceOperationTimeoutError:
731 logger.error("Stop instance failed: %s", instance)
732 raise
733 logger.info("Instance %s has been terminated.", instance)
734
735 def StopInstances(self, instances, zone):
736 """Stop |instances| in |zone|.
737
738 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700739 instances: A list of strings, representing instance names's list.
740 zone: A string, representing zone name. e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700741
742 Returns:
743 A tuple, (done, failed, error_msgs)
744 done: A list of string, representing the names of instances that
745 have been executed.
746 failed: A list of string, representing the names of instances that
747 we failed to execute.
748 error_msgs: A list of string, representing the failure messages.
749 """
herbertxue308f7662018-05-18 03:25:58 +0000750 action = functools.partial(
751 self.service.instances().stop, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700752 return self._BatchExecuteOnInstances(instances, zone, action)
753
754 def SetScheduling(self,
755 instance,
756 zone,
757 automatic_restart=True,
758 on_host_maintenance="MIGRATE"):
759 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
760
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700761 Args:
762 instance: A string, representing instance name.
763 zone: A string, representing zone name. e.g. "us-central1-f".
764 automatic_restart: Boolean, determine whether the instance will
765 automatically restart if it crashes or not,
766 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700767 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700768 The instance's maintenance behavior, which
769 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700770 "MIGRATE" or "TERMINATE" when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700771 a maintenance event.
772
773 Raises:
774 errors.GceOperationTimeoutError: Operation takes too long to finish.
775 """
herbertxue308f7662018-05-18 03:25:58 +0000776 body = {
777 "automaticRestart": automatic_restart,
778 "onHostMaintenance": on_host_maintenance
779 }
780 api = self.service.instances().setScheduling(
781 project=self._project, zone=zone, instance=instance, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700782 operation = self.Execute(api)
783 try:
herbertxue308f7662018-05-18 03:25:58 +0000784 self.WaitOnOperation(
785 operation=operation,
786 operation_scope=OperationScope.ZONE,
787 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700788 except errors.GceOperationTimeoutError:
789 logger.error("Set instance scheduling failed: %s", instance)
790 raise
herbertxue308f7662018-05-18 03:25:58 +0000791 logger.info(
792 "Instance scheduling changed:\n"
793 " automaticRestart: %s\n"
794 " onHostMaintenance: %s\n",
795 str(automatic_restart).lower(), on_host_maintenance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700796
797 def ListInstances(self, zone, instance_filter=None):
798 """List instances.
799
800 Args:
801 zone: A string, representing zone name. e.g. "us-central1-f"
802 instance_filter: A string representing a filter in format of
803 FIELD_NAME COMPARISON_STRING LITERAL_STRING
804 e.g. "name ne example-instance"
805 e.g. "name eq "example-instance-[0-9]+""
806
807 Returns:
808 A list of instances.
809 """
810 return self.ListWithMultiPages(
811 api_resource=self.service.instances().list,
812 project=self._project,
813 zone=zone,
814 filter=instance_filter)
815
816 def SetSchedulingInstances(self,
817 instances,
818 zone,
819 automatic_restart=True,
820 on_host_maintenance="MIGRATE"):
821 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
822
823 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option.
824
825 Args:
826 instances: A list of string, representing instance names.
827 zone: A string, representing zone name. e.g. "us-central1-f".
828 automatic_restart: Boolean, determine whether the instance will
829 automatically restart if it crashes or not,
830 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700831 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700832 The instance's maintenance behavior, which
833 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700834 migrated or terminated when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700835 a maintenance event.
836
837 Returns:
838 A tuple, (done, failed, error_msgs)
839 done: A list of string, representing the names of instances that
840 have been executed.
841 failed: A list of string, representing the names of instances that
842 we failed to execute.
843 error_msgs: A list of string, representing the failure messages.
844 """
herbertxue308f7662018-05-18 03:25:58 +0000845 body = {
846 "automaticRestart": automatic_restart,
847 "OnHostMaintenance": on_host_maintenance
848 }
849 action = functools.partial(
850 self.service.instances().setScheduling,
851 project=self._project,
852 zone=zone,
853 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700854 return self._BatchExecuteOnInstances(instances, zone, action)
855
856 def _BatchExecuteOnInstances(self, instances, zone, action):
857 """Batch processing operations requiring computing time.
858
859 Args:
860 instances: A list of instance names.
861 zone: A string, e.g. "us-central1-f".
862 action: partial func, all kwargs for this gcloud action has been
863 defined in the caller function (e.g. See "StartInstances")
864 except 'instance' which will be defined by iterating the
865 |instances|.
866
867 Returns:
868 A tuple, (done, failed, error_msgs)
869 done: A list of string, representing the names of instances that
870 have been executed.
871 failed: A list of string, representing the names of instances that
872 we failed to execute.
873 error_msgs: A list of string, representing the failure messages.
874 """
875 if not instances:
876 return [], [], []
877 # Batch send requests.
878 logger.info("Batch executing instances: %s", instances)
879 requests = {}
880 for instance_name in set(instances):
881 requests[instance_name] = action(instance=instance_name)
herbertxue308f7662018-05-18 03:25:58 +0000882 return self._BatchExecuteAndWait(
883 requests, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700884
885 def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None):
886 """Batch processing requests and wait on the operation.
887
888 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700889 requests: A dictionary. The key is a string representing the resource
890 name. For example, an instance name, or an image name.
891 operation_scope: A value from OperationScope, "zone", "region",
892 or "global".
893 scope_name: If operation_scope is "zone" or "region", this should be
894 the name of the zone or region, e.g. "us-central1-f".
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700895 Returns:
Kevin Chengb5963882018-05-09 00:06:27 -0700896 A tuple, (done, failed, error_msgs)
897 done: A list of string, representing the resource names that have
898 been executed.
899 failed: A list of string, representing resource names that
900 we failed to execute.
901 error_msgs: A list of string, representing the failure messages.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700902 """
903 results = self.BatchExecute(requests)
904 # Initialize return values
905 failed = []
906 error_msgs = []
907 for resource_name, (_, error) in results.iteritems():
908 if error is not None:
909 failed.append(resource_name)
910 error_msgs.append(str(error))
911 done = []
912 # Wait for the executing operations to finish.
913 logger.info("Waiting for executing operations")
914 for resource_name in requests.iterkeys():
915 operation, _ = results[resource_name]
916 if operation:
917 try:
918 self.WaitOnOperation(operation, operation_scope,
919 scope_name)
920 done.append(resource_name)
921 except errors.DriverError as exc:
922 failed.append(resource_name)
923 error_msgs.append(str(exc))
924 return done, failed, error_msgs
925
926 def ListZones(self):
927 """List all zone instances in the project.
928
929 Returns:
930 Gcompute response instance. For example:
931 {
932 "id": "projects/google.com%3Aandroid-build-staging/zones",
933 "kind": "compute#zoneList",
934 "selfLink": "https://www.googleapis.com/compute/v1/projects/"
935 "google.com:android-build-staging/zones"
936 "items": [
937 {
938 'creationTimestamp': '2014-07-15T10:44:08.663-07:00',
939 'description': 'asia-east1-c',
940 'id': '2222',
941 'kind': 'compute#zone',
942 'name': 'asia-east1-c',
943 'region': 'https://www.googleapis.com/compute/v1/projects/'
944 'google.com:android-build-staging/regions/asia-east1',
945 'selfLink': 'https://www.googleapis.com/compute/v1/projects/'
946 'google.com:android-build-staging/zones/asia-east1-c',
947 'status': 'UP'
948 }, {
949 'creationTimestamp': '2014-05-30T18:35:16.575-07:00',
950 'description': 'asia-east1-b',
951 'id': '2221',
952 'kind': 'compute#zone',
953 'name': 'asia-east1-b',
954 'region': 'https://www.googleapis.com/compute/v1/projects/'
955 'google.com:android-build-staging/regions/asia-east1',
956 'selfLink': 'https://www.googleapis.com/compute/v1/projects'
957 '/google.com:android-build-staging/zones/asia-east1-b',
958 'status': 'UP'
959 }]
960 }
961 See cloud cluster's api/mixer_zones.proto
962 """
963 api = self.service.zones().list(project=self._project)
964 return self.Execute(api)
965
Kevin Chengb5963882018-05-09 00:06:27 -0700966 def ListRegions(self):
967 """List all the regions for a project.
968
969 Returns:
970 A dictionary containing all the zones and additional data. See this link
971 for the detailed response:
972 https://cloud.google.com/compute/docs/reference/latest/regions/list.
973 Example:
974 {
975 'items': [{
976 'name':
977 'us-central1',
978 'quotas': [{
979 'usage': 2.0,
980 'limit': 24.0,
981 'metric': 'CPUS'
982 }, {
983 'usage': 1.0,
984 'limit': 23.0,
985 'metric': 'IN_USE_ADDRESSES'
986 }, {
987 'usage': 209.0,
988 'limit': 10240.0,
989 'metric': 'DISKS_TOTAL_GB'
990 }, {
991 'usage': 1000.0,
992 'limit': 20000.0,
993 'metric': 'INSTANCES'
994 }]
995 },..]
996 }
997 """
998 api = self.service.regions().list(project=self._project)
999 return self.Execute(api)
1000
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001001 def _GetNetworkArgs(self, network, zone):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001002 """Helper to generate network args that is used to create an instance.
1003
1004 Args:
1005 network: A string, e.g. "default".
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001006 zone: String, representing zone name, e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001007
1008 Returns:
1009 A dictionary representing network args.
1010 """
1011 return {
1012 "network": self.GetNetworkUrl(network),
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001013 "subnetwork": self.GetSubnetworkUrl(network, zone),
herbertxue308f7662018-05-18 03:25:58 +00001014 "accessConfigs": [{
1015 "name": "External NAT",
1016 "type": "ONE_TO_ONE_NAT"
1017 }]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001018 }
1019
herbertxue308f7662018-05-18 03:25:58 +00001020 def _GetDiskArgs(self,
1021 disk_name,
1022 image_name,
1023 image_project=None,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001024 disk_size_gb=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001025 """Helper to generate disk args that is used to create an instance.
1026
1027 Args:
1028 disk_name: A string
1029 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -07001030 image_project: A string
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001031 disk_size_gb: An integer
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001032
1033 Returns:
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001034 List holding dict of disk args.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001035 """
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001036 args = copy.deepcopy(BASE_DISK_ARGS)
1037 args["initializeParams"] = {
1038 "diskName": disk_name,
herbertxue308f7662018-05-18 03:25:58 +00001039 "sourceImage": self.GetImage(image_name,
1040 image_project)["selfLink"],
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001041 }
1042 # TODO: Remove this check once it's validated that we can either pass in
1043 # a None diskSizeGb or we find an appropriate default val.
1044 if disk_size_gb:
1045 args["diskSizeGb"] = disk_size_gb
1046 return [args]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001047
herbertxue308f7662018-05-18 03:25:58 +00001048 def _GetExtraDiskArgs(self, extra_disk_name, zone):
1049 """Get extra disk arg for given disk.
1050
1051 Args:
1052 extra_disk_name: String, name of the disk.
1053 zone: String, representing zone name, e.g. "us-central1-f"
1054
1055 Returns:
1056 A dictionary of disk args.
1057 """
1058 return [{
1059 "type": "PERSISTENT",
1060 "mode": "READ_WRITE",
1061 "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone,
1062 extra_disk_name),
1063 "autoDelete": True,
1064 "boot": False,
1065 "interface": "SCSI",
1066 "deviceName": extra_disk_name,
1067 }]
1068
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001069 # pylint: disable=too-many-locals
herbertxue308f7662018-05-18 03:25:58 +00001070 def CreateInstance(self,
1071 instance,
1072 image_name,
1073 machine_type,
1074 metadata,
1075 network,
1076 zone,
1077 disk_args=None,
1078 image_project=None,
1079 gpu=None,
1080 extra_disk_name=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001081 """Create a gce instance with a gce image.
1082
1083 Args:
herbertxue308f7662018-05-18 03:25:58 +00001084 instance: String, instance name.
1085 image_name: String, source image used to create this disk.
1086 machine_type: String, representing machine_type,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001087 e.g. "n1-standard-1"
herbertxue308f7662018-05-18 03:25:58 +00001088 metadata: Dict, maps a metadata name to its value.
1089 network: String, representing network name, e.g. "default"
1090 zone: String, representing zone name, e.g. "us-central1-f"
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001091 disk_args: A list of extra disk args (strings), see _GetDiskArgs
1092 for example, if None, will create a disk using the given
1093 image.
herbertxue308f7662018-05-18 03:25:58 +00001094 image_project: String, name of the project where the image
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001095 belongs. Assume the default project if None.
herbertxue308f7662018-05-18 03:25:58 +00001096 gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001097 None no gpus will be attached. For more details see:
Kevin Chengb5963882018-05-09 00:06:27 -07001098 https://cloud.google.com/compute/docs/gpus/add-gpus
herbertxue308f7662018-05-18 03:25:58 +00001099 extra_disk_name: String,the name of the extra disk to attach.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001100 """
herbertxue308f7662018-05-18 03:25:58 +00001101 disk_args = (disk_args
1102 or self._GetDiskArgs(instance, image_name, image_project))
1103 if extra_disk_name:
1104 disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001105 body = {
1106 "machineType": self.GetMachineType(machine_type, zone)["selfLink"],
1107 "name": instance,
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001108 "networkInterfaces": [self._GetNetworkArgs(network, zone)],
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001109 "disks": disk_args,
herbertxue308f7662018-05-18 03:25:58 +00001110 "serviceAccounts": [{
1111 "email": "default",
1112 "scopes": self.DEFAULT_INSTANCE_SCOPE
1113 }],
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001114 }
1115
Kevin Chengb5963882018-05-09 00:06:27 -07001116 if gpu:
1117 body["guestAccelerators"] = [{
1118 "acceleratorType": self.GetAcceleratorUrl(gpu, zone),
1119 "acceleratorCount": 1
1120 }]
1121 # Instances with GPUs cannot live migrate because they are assigned
1122 # to specific hardware devices.
1123 body["scheduling"] = {"onHostMaintenance": "terminate"}
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001124 if metadata:
herbertxue308f7662018-05-18 03:25:58 +00001125 metadata_list = [{
1126 "key": key,
1127 "value": val
1128 } for key, val in metadata.iteritems()]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001129 body["metadata"] = {"items": metadata_list}
1130 logger.info("Creating instance: project %s, zone %s, body:%s",
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001131 self._project, zone, body)
herbertxue308f7662018-05-18 03:25:58 +00001132 api = self.service.instances().insert(
1133 project=self._project, zone=zone, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001134 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001135 self.WaitOnOperation(
1136 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001137 logger.info("Instance %s has been created.", instance)
1138
1139 def DeleteInstance(self, instance, zone):
1140 """Delete a gce instance.
1141
1142 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001143 instance: A string, instance name.
1144 zone: A string, e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001145 """
1146 logger.info("Deleting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001147 api = self.service.instances().delete(
1148 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001149 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001150 self.WaitOnOperation(
1151 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001152 logger.info("Deleted instance: %s", instance)
1153
1154 def DeleteInstances(self, instances, zone):
1155 """Delete multiple instances.
1156
1157 Args:
1158 instances: A list of instance names.
1159 zone: A string, e.g. "us-central1-f".
1160
1161 Returns:
1162 A tuple, (deleted, failed, error_msgs)
1163 deleted: A list of names of instances that have been deleted.
1164 failed: A list of names of instances that we fail to delete.
1165 error_msgs: A list of failure messages.
1166 """
herbertxue308f7662018-05-18 03:25:58 +00001167 action = functools.partial(
1168 self.service.instances().delete, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001169 return self._BatchExecuteOnInstances(instances, zone, action)
1170
1171 def ResetInstance(self, instance, zone):
1172 """Reset the gce instance.
1173
1174 Args:
1175 instance: A string, instance name.
1176 zone: A string, e.g. "us-central1-f".
1177 """
1178 logger.info("Resetting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001179 api = self.service.instances().reset(
1180 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001181 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001182 self.WaitOnOperation(
1183 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001184 logger.info("Instance has been reset: %s", instance)
1185
1186 def GetMachineType(self, machine_type, zone):
1187 """Get URL for a given machine typle.
1188
1189 Args:
1190 machine_type: A string, name of the machine type.
1191 zone: A string, e.g. "us-central1-f"
1192
1193 Returns:
1194 A machine type resource in json.
1195 https://cloud.google.com/compute/docs/reference/latest/
1196 machineTypes#resource
1197 """
herbertxue308f7662018-05-18 03:25:58 +00001198 api = self.service.machineTypes().get(
1199 project=self._project, zone=zone, machineType=machine_type)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001200 return self.Execute(api)
1201
Kevin Chengb5963882018-05-09 00:06:27 -07001202 def GetAcceleratorUrl(self, accelerator_type, zone):
1203 """Get URL for a given type of accelator.
1204
1205 Args:
1206 accelerator_type: A string, representing the accelerator, e.g
1207 "nvidia-tesla-k80"
1208 zone: A string representing a zone, e.g. "us-west1-b"
1209
1210 Returns:
1211 A URL that points to the accelerator resource, e.g.
1212 https://www.googleapis.com/compute/v1/projects/<project id>/zones/
1213 us-west1-b/acceleratorTypes/nvidia-tesla-k80
1214 """
herbertxue308f7662018-05-18 03:25:58 +00001215 api = self.service.acceleratorTypes().get(
1216 project=self._project, zone=zone, acceleratorType=accelerator_type)
Kevin Chengb5963882018-05-09 00:06:27 -07001217 result = self.Execute(api)
1218 return result["selfLink"]
1219
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001220 def GetNetworkUrl(self, network):
1221 """Get URL for a given network.
1222
1223 Args:
1224 network: A string, representing network name, e.g "default"
1225
1226 Returns:
1227 A URL that points to the network resource, e.g.
1228 https://www.googleapis.com/compute/v1/projects/<project id>/
1229 global/networks/default
1230 """
herbertxue308f7662018-05-18 03:25:58 +00001231 api = self.service.networks().get(
1232 project=self._project, network=network)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001233 result = self.Execute(api)
1234 return result["selfLink"]
1235
Kevin Cheng1f582bc2018-10-02 10:37:02 -07001236 def GetSubnetworkUrl(self, network, zone):
1237 """Get URL for a given network and zone.
1238
1239 Return the subnetwork for the network in the specified region that the
1240 specified zone resides in. If there is no subnetwork for the specified
1241 zone, raise an exception.
1242
1243 Args:
1244 network: A string, representing network name, e.g "default"
1245 zone: String, representing zone name, e.g. "us-central1-f"
1246
1247 Returns:
1248 A URL that points to the network resource, e.g.
1249 https://www.googleapis.com/compute/v1/projects/<project id>/
1250 global/networks/default
1251
1252 Raises:
1253 errors.NoSubnetwork: When no subnetwork exists for the zone
1254 specified.
1255 """
1256 api = self.service.networks().get(
1257 project=self._project, network=network)
1258 result = self.Execute(api)
1259 region = zone.rsplit("-", 1)[0]
1260 for subnetwork in result["subnetworks"]:
1261 if region in subnetwork:
1262 return subnetwork
1263 raise errors.NoSubnetwork("No subnetwork for network %s in region %s" %
1264 (network, region))
1265
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001266 def CompareMachineSize(self, machine_type_1, machine_type_2, zone):
1267 """Compare the size of two machine types.
1268
1269 Args:
1270 machine_type_1: A string representing a machine type, e.g. n1-standard-1
1271 machine_type_2: A string representing a machine type, e.g. n1-standard-1
1272 zone: A string representing a zone, e.g. "us-central1-f"
1273
1274 Returns:
Kevin Cheng4ae42772018-10-02 11:39:48 -07001275 -1 if any metric of machine size of the first type is smaller than
1276 the second type.
1277 0 if all metrics of machine size are equal.
1278 1 if at least one metric of machine size of the first type is
1279 greater than the second type and all metrics of first type are
1280 greater or equal to the second type.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001281
1282 Raises:
1283 errors.DriverError: For malformed response.
1284 """
1285 machine_info_1 = self.GetMachineType(machine_type_1, zone)
1286 machine_info_2 = self.GetMachineType(machine_type_2, zone)
Kevin Cheng4ae42772018-10-02 11:39:48 -07001287 result = 0
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001288 for metric in self.MACHINE_SIZE_METRICS:
1289 if metric not in machine_info_1 or metric not in machine_info_2:
1290 raise errors.DriverError(
1291 "Malformed machine size record: Can't find '%s' in %s or %s"
1292 % (metric, machine_info_1, machine_info_2))
Kevin Cheng4ae42772018-10-02 11:39:48 -07001293 cmp_result = machine_info_1[metric] - machine_info_2[metric]
1294 if cmp_result < 0:
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001295 return -1
Kevin Cheng4ae42772018-10-02 11:39:48 -07001296 elif cmp_result > 0:
1297 result = 1
1298 return result
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001299
1300 def GetSerialPortOutput(self, instance, zone, port=1):
1301 """Get serial port output.
1302
1303 Args:
1304 instance: string, instance name.
1305 zone: string, zone name.
1306 port: int, which COM port to read from, 1-4, default to 1.
1307
1308 Returns:
1309 String, contents of the output.
1310
1311 Raises:
1312 errors.DriverError: For malformed response.
1313 """
1314 api = self.service.instances().getSerialPortOutput(
herbertxue308f7662018-05-18 03:25:58 +00001315 project=self._project, zone=zone, instance=instance, port=port)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001316 result = self.Execute(api)
1317 if "contents" not in result:
1318 raise errors.DriverError(
1319 "Malformed response for GetSerialPortOutput: %s" % result)
1320 return result["contents"]
1321
1322 def GetInstanceNamesByIPs(self, ips, zone):
1323 """Get Instance names by IPs.
1324
1325 This function will go through all instances, which
1326 could be slow if there are too many instances. However, currently
1327 GCE doesn't support search for instance by IP.
1328
1329 Args:
1330 ips: A set of IPs.
1331 zone: String, name of the zone.
1332
1333 Returns:
1334 A dictionary where key is IP and value is instance name or None
1335 if instance is not found for the given IP.
1336 """
1337 ip_name_map = dict.fromkeys(ips)
1338 for instance in self.ListInstances(zone):
1339 try:
1340 ip = instance["networkInterfaces"][0]["accessConfigs"][0][
1341 "natIP"]
1342 if ip in ips:
1343 ip_name_map[ip] = instance["name"]
1344 except (IndexError, KeyError) as e:
1345 logger.error("Could not get instance names by ips: %s", str(e))
1346 return ip_name_map
1347
1348 def GetInstanceIP(self, instance, zone):
1349 """Get Instance IP given instance name.
1350
1351 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001352 instance: String, representing instance name.
1353 zone: String, name of the zone.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001354
1355 Returns:
Kevin Cheng86d43c72018-08-30 10:59:14 -07001356 NamedTuple of (internal, external) IP of the instance.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001357 """
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001358 instance = self.GetInstance(instance, zone)
Kevin Cheng86d43c72018-08-30 10:59:14 -07001359 internal_ip = instance["networkInterfaces"][0]["networkIP"]
1360 external_ip = instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"]
1361 return IP(internal=internal_ip, external=external_ip)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001362
1363 def SetCommonInstanceMetadata(self, body):
1364 """Set project-wide metadata.
1365
1366 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001367 body: Metadata body.
1368 metdata is in the following format.
1369 {
1370 "kind": "compute#metadata",
1371 "fingerprint": "a-23icsyx4E=",
1372 "items": [
1373 {
1374 "key": "google-compute-default-region",
1375 "value": "us-central1"
1376 }, ...
1377 ]
1378 }
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001379 """
1380 api = self.service.projects().setCommonInstanceMetadata(
1381 project=self._project, body=body)
1382 operation = self.Execute(api)
1383 self.WaitOnOperation(operation, operation_scope=OperationScope.GLOBAL)
1384
1385 def AddSshRsa(self, user, ssh_rsa_path):
1386 """Add the public rsa key to the project's metadata.
1387
1388 Compute engine instances that are created after will
1389 by default contain the key.
1390
1391 Args:
1392 user: the name of the user which the key belongs to.
1393 ssh_rsa_path: The absolute path to public rsa key.
1394 """
1395 if not os.path.exists(ssh_rsa_path):
herbertxue308f7662018-05-18 03:25:58 +00001396 raise errors.DriverError(
1397 "RSA file %s does not exist." % ssh_rsa_path)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001398
1399 logger.info("Adding ssh rsa key from %s to project %s for user: %s",
1400 ssh_rsa_path, self._project, user)
1401 project = self.GetProject()
1402 with open(ssh_rsa_path) as f:
1403 rsa = f.read()
1404 rsa = rsa.strip() if rsa else rsa
1405 utils.VerifyRsaPubKey(rsa)
1406 metadata = project["commonInstanceMetadata"]
1407 for item in metadata.setdefault("items", []):
1408 if item["key"] == "sshKeys":
1409 sshkey_item = item
1410 break
1411 else:
1412 sshkey_item = {"key": "sshKeys", "value": ""}
1413 metadata["items"].append(sshkey_item)
1414
1415 entry = "%s:%s" % (user, rsa)
1416 logger.debug("New RSA entry: %s", entry)
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001417 sshkey_item["value"] = "\n".join([sshkey_item["value"].strip(),
1418 entry]).strip()
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001419 self.SetCommonInstanceMetadata(metadata)
Fang Dengcef4b112017-03-02 11:20:17 -08001420
1421 def CheckAccess(self):
1422 """Check if the user has read access to the cloud project.
1423
1424 Returns:
1425 True if the user has at least read access to the project.
1426 False otherwise.
1427
1428 Raises:
1429 errors.HttpError if other unexpected error happens when
1430 accessing the project.
1431 """
1432 api = self.service.zones().list(project=self._project)
1433 retry_http_codes = copy.copy(self.RETRY_HTTP_CODES)
1434 retry_http_codes.remove(self.ACCESS_DENIED_CODE)
1435 try:
1436 self.Execute(api, retry_http_codes=retry_http_codes)
1437 except errors.HttpError as e:
1438 if e.code == self.ACCESS_DENIED_CODE:
1439 return False
1440 raise
1441 return True