blob: c645edc2fd65b1e6770abfa24a277def74b18dd2 [file] [log] [blame]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001#!/usr/bin/env python
2#
3# Copyright 2016 - The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070016"""A client that manages Google Compute Engine.
17
18** ComputeClient **
19
20ComputeClient is a wrapper around Google Compute Engine APIs.
21It provides a set of methods for managing a google compute engine project,
22such as creating images, creating instances, etc.
23
24Design philosophy: We tried to make ComputeClient as stateless as possible,
25and it only keeps states about authentication. ComputeClient should be very
26generic, and only knows how to talk to Compute Engine APIs.
27"""
Kevin Cheng5c124ec2018-05-16 13:28:51 -070028# pylint: disable=too-many-lines
Kevin Cheng86d43c72018-08-30 10:59:14 -070029import collections
Fang Dengcef4b112017-03-02 11:20:17 -080030import copy
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070031import functools
32import logging
33import os
34
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070035from acloud.internal.lib import base_cloud_client
36from acloud.internal.lib import utils
37from acloud.public import errors
38
39logger = logging.getLogger(__name__)
40
Kevin Chengb5963882018-05-09 00:06:27 -070041_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10
42
Kevin Cheng5c124ec2018-05-16 13:28:51 -070043BASE_DISK_ARGS = {
44 "type": "PERSISTENT",
45 "boot": True,
46 "mode": "READ_WRITE",
47 "autoDelete": True,
48 "initializeParams": {},
49}
50
Kevin Cheng86d43c72018-08-30 10:59:14 -070051IP = collections.namedtuple("IP", ["external", "internal"])
52
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070053
54class OperationScope(object):
55 """Represents operation scope enum."""
56 ZONE = "zone"
57 REGION = "region"
58 GLOBAL = "global"
59
60
Kevin Chengb5963882018-05-09 00:06:27 -070061class PersistentDiskType(object):
62 """Represents different persistent disk types.
63
64 pd-standard for regular hard disk.
65 pd-ssd for solid state disk.
66 """
67 STANDARD = "pd-standard"
68 SSD = "pd-ssd"
69
70
71class ImageStatus(object):
72 """Represents the status of an image."""
73 PENDING = "PENDING"
74 READY = "READY"
75 FAILED = "FAILED"
76
77
78def _IsFingerPrintError(exc):
79 """Determine if the exception is a HTTP error with code 412.
80
81 Args:
82 exc: Exception instance.
83
84 Returns:
85 Boolean. True if the exception is a "Precondition Failed" error.
86 """
87 return isinstance(exc, errors.HttpError) and exc.code == 412
88
89
Kevin Cheng5c124ec2018-05-16 13:28:51 -070090# pylint: disable=too-many-public-methods
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070091class ComputeClient(base_cloud_client.BaseCloudApiClient):
92 """Client that manages GCE."""
93
94 # API settings, used by BaseCloudApiClient.
95 API_NAME = "compute"
96 API_VERSION = "v1"
herbertxue308f7662018-05-18 03:25:58 +000097 SCOPE = " ".join([
98 "https://www.googleapis.com/auth/compute",
99 "https://www.googleapis.com/auth/devstorage.read_write"
100 ])
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700101 # Default settings for gce operations
102 DEFAULT_INSTANCE_SCOPE = [
103 "https://www.googleapis.com/auth/devstorage.read_only",
104 "https://www.googleapis.com/auth/logging.write"
105 ]
Kevin Chengb5963882018-05-09 00:06:27 -0700106 OPERATION_TIMEOUT_SECS = 30 * 60 # 30 mins
107 OPERATION_POLL_INTERVAL_SECS = 20
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700108 MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"]
Fang Dengcef4b112017-03-02 11:20:17 -0800109 ACCESS_DENIED_CODE = 403
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700110
111 def __init__(self, acloud_config, oauth2_credentials):
112 """Initialize.
113
114 Args:
115 acloud_config: An AcloudConfig object.
116 oauth2_credentials: An oauth2client.OAuth2Credentials instance.
117 """
118 super(ComputeClient, self).__init__(oauth2_credentials)
119 self._project = acloud_config.project
120
121 def _GetOperationStatus(self, operation, operation_scope, scope_name=None):
122 """Get status of an operation.
123
124 Args:
125 operation: An Operation resource in the format of json.
126 operation_scope: A value from OperationScope, "zone", "region",
127 or "global".
128 scope_name: If operation_scope is "zone" or "region", this should be
129 the name of the zone or region, e.g. "us-central1-f".
130
131 Returns:
132 Status of the operation, one of "DONE", "PENDING", "RUNNING".
133
134 Raises:
135 errors.DriverError: if the operation fails.
136 """
137 operation_name = operation["name"]
138 if operation_scope == OperationScope.GLOBAL:
herbertxue308f7662018-05-18 03:25:58 +0000139 api = self.service.globalOperations().get(
140 project=self._project, operation=operation_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700141 result = self.Execute(api)
142 elif operation_scope == OperationScope.ZONE:
herbertxue308f7662018-05-18 03:25:58 +0000143 api = self.service.zoneOperations().get(
144 project=self._project,
145 operation=operation_name,
146 zone=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700147 result = self.Execute(api)
148 elif operation_scope == OperationScope.REGION:
herbertxue308f7662018-05-18 03:25:58 +0000149 api = self.service.regionOperations().get(
150 project=self._project,
151 operation=operation_name,
152 region=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700153 result = self.Execute(api)
154
155 if result.get("error"):
156 errors_list = result["error"]["errors"]
herbertxue308f7662018-05-18 03:25:58 +0000157 raise errors.DriverError(
158 "Get operation state failed, errors: %s" % str(errors_list))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700159 return result["status"]
160
161 def WaitOnOperation(self, operation, operation_scope, scope_name=None):
162 """Wait for an operation to finish.
163
164 Args:
165 operation: An Operation resource in the format of json.
166 operation_scope: A value from OperationScope, "zone", "region",
167 or "global".
168 scope_name: If operation_scope is "zone" or "region", this should be
169 the name of the zone or region, e.g. "us-central1-f".
170 """
171 timeout_exception = errors.GceOperationTimeoutError(
172 "Operation hits timeout, did not complete within %d secs." %
173 self.OPERATION_TIMEOUT_SECS)
174 utils.PollAndWait(
175 func=self._GetOperationStatus,
176 expected_return="DONE",
177 timeout_exception=timeout_exception,
178 timeout_secs=self.OPERATION_TIMEOUT_SECS,
179 sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS,
180 operation=operation,
181 operation_scope=operation_scope,
182 scope_name=scope_name)
183
184 def GetProject(self):
185 """Get project information.
186
187 Returns:
188 A project resource in json.
189 """
190 api = self.service.projects().get(project=self._project)
191 return self.Execute(api)
192
193 def GetDisk(self, disk_name, zone):
194 """Get disk information.
195
196 Args:
197 disk_name: A string.
198 zone: String, name of zone.
199
200 Returns:
201 An disk resource in json.
202 https://cloud.google.com/compute/docs/reference/latest/disks#resource
203 """
herbertxue308f7662018-05-18 03:25:58 +0000204 api = self.service.disks().get(
205 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700206 return self.Execute(api)
207
208 def CheckDiskExists(self, disk_name, zone):
209 """Check if disk exists.
210
211 Args:
212 disk_name: A string
213 zone: String, name of zone.
214
215 Returns:
216 True if disk exists, otherwise False.
217 """
218 try:
219 self.GetDisk(disk_name, zone)
220 exists = True
221 except errors.ResourceNotFoundError:
222 exists = False
223 logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name,
224 exists)
225 return exists
226
herbertxue308f7662018-05-18 03:25:58 +0000227 def CreateDisk(self,
228 disk_name,
229 source_image,
230 size_gb,
231 zone,
232 source_project=None,
233 disk_type=PersistentDiskType.STANDARD):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700234 """Create a gce disk.
235
236 Args:
herbertxue308f7662018-05-18 03:25:58 +0000237 disk_name: String
238 source_image: String, name of the image.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700239 size_gb: Integer, size in gb.
herbertxue308f7662018-05-18 03:25:58 +0000240 zone: String, name of the zone, e.g. us-central1-b.
Kevin Chengb5963882018-05-09 00:06:27 -0700241 source_project: String, required if the image is located in a different
242 project.
243 disk_type: String, a value from PersistentDiskType, STANDARD
244 for regular hard disk or SSD for solid state disk.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700245 """
Kevin Chengb5963882018-05-09 00:06:27 -0700246 source_project = source_project or self._project
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700247 source_image = "projects/%s/global/images/%s" % (
Kevin Chengb5963882018-05-09 00:06:27 -0700248 source_project, source_image) if source_image else None
249 logger.info("Creating disk %s, size_gb: %d, source_image: %s",
250 disk_name, size_gb, str(source_image))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700251 body = {
252 "name": disk_name,
253 "sizeGb": size_gb,
herbertxue308f7662018-05-18 03:25:58 +0000254 "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone,
255 disk_type),
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700256 }
herbertxue308f7662018-05-18 03:25:58 +0000257 api = self.service.disks().insert(
258 project=self._project,
259 sourceImage=source_image,
260 zone=zone,
261 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700262 operation = self.Execute(api)
263 try:
herbertxue308f7662018-05-18 03:25:58 +0000264 self.WaitOnOperation(
265 operation=operation,
266 operation_scope=OperationScope.ZONE,
267 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700268 except errors.DriverError:
269 logger.error("Creating disk failed, cleaning up: %s", disk_name)
270 if self.CheckDiskExists(disk_name, zone):
271 self.DeleteDisk(disk_name, zone)
272 raise
273 logger.info("Disk %s has been created.", disk_name)
274
275 def DeleteDisk(self, disk_name, zone):
276 """Delete a gce disk.
277
278 Args:
279 disk_name: A string, name of disk.
280 zone: A string, name of zone.
281 """
282 logger.info("Deleting disk %s", disk_name)
herbertxue308f7662018-05-18 03:25:58 +0000283 api = self.service.disks().delete(
284 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700285 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000286 self.WaitOnOperation(
287 operation=operation,
288 operation_scope=OperationScope.ZONE,
289 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700290 logger.info("Deleted disk %s", disk_name)
291
292 def DeleteDisks(self, disk_names, zone):
293 """Delete multiple disks.
294
295 Args:
296 disk_names: A list of disk names.
297 zone: A string, name of zone.
298
299 Returns:
300 A tuple, (deleted, failed, error_msgs)
301 deleted: A list of names of disks that have been deleted.
302 failed: A list of names of disks that we fail to delete.
303 error_msgs: A list of failure messages.
304 """
305 if not disk_names:
306 logger.warn("Nothing to delete. Arg disk_names is not provided.")
307 return [], [], []
308 # Batch send deletion requests.
309 logger.info("Deleting disks: %s", disk_names)
310 delete_requests = {}
311 for disk_name in set(disk_names):
herbertxue308f7662018-05-18 03:25:58 +0000312 request = self.service.disks().delete(
313 project=self._project, disk=disk_name, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700314 delete_requests[disk_name] = request
herbertxue308f7662018-05-18 03:25:58 +0000315 return self._BatchExecuteAndWait(
316 delete_requests, OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700317
318 def ListDisks(self, zone, disk_filter=None):
319 """List disks.
320
321 Args:
322 zone: A string, representing zone name. e.g. "us-central1-f"
323 disk_filter: A string representing a filter in format of
324 FIELD_NAME COMPARISON_STRING LITERAL_STRING
325 e.g. "name ne example-instance"
326 e.g. "name eq "example-instance-[0-9]+""
327
328 Returns:
329 A list of disks.
330 """
herbertxue308f7662018-05-18 03:25:58 +0000331 return self.ListWithMultiPages(
332 api_resource=self.service.disks().list,
333 project=self._project,
334 zone=zone,
335 filter=disk_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700336
herbertxue308f7662018-05-18 03:25:58 +0000337 def CreateImage(self,
338 image_name,
339 source_uri=None,
340 source_disk=None,
Kevin Chengb5963882018-05-09 00:06:27 -0700341 labels=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700342 """Create a Gce image.
343
344 Args:
herbertxue308f7662018-05-18 03:25:58 +0000345 image_name: String, name of image
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700346 source_uri: Full Google Cloud Storage URL where the disk image is
Kevin Chengb5963882018-05-09 00:06:27 -0700347 stored. e.g. "https://storage.googleapis.com/my-bucket/
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700348 avd-system-2243663.tar.gz"
Kevin Chengb5963882018-05-09 00:06:27 -0700349 source_disk: String, this should be the disk's selfLink value
350 (including zone and project), rather than the disk_name
351 e.g. https://www.googleapis.com/compute/v1/projects/
352 google.com:android-builds-project/zones/
353 us-east1-d/disks/<disk_name>
354 labels: Dict, will be added to the image's labels.
355
Kevin Chengb5963882018-05-09 00:06:27 -0700356 Raises:
357 errors.DriverError: For malformed request or response.
358 errors.GceOperationTimeoutError: Operation takes too long to finish.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700359 """
herbertxue308f7662018-05-18 03:25:58 +0000360 if self.CheckImageExists(image_name):
361 return
362 if (source_uri and source_disk) or (not source_uri
363 and not source_disk):
Kevin Chengb5963882018-05-09 00:06:27 -0700364 raise errors.DriverError(
365 "Creating image %s requires either source_uri %s or "
herbertxue308f7662018-05-18 03:25:58 +0000366 "source_disk %s but not both" % (image_name, source_uri,
367 source_disk))
Kevin Chengb5963882018-05-09 00:06:27 -0700368 elif source_uri:
herbertxue308f7662018-05-18 03:25:58 +0000369 logger.info("Creating image %s, source_uri %s", image_name,
370 source_uri)
Kevin Chengb5963882018-05-09 00:06:27 -0700371 body = {
372 "name": image_name,
373 "rawDisk": {
374 "source": source_uri,
375 },
376 }
377 else:
herbertxue308f7662018-05-18 03:25:58 +0000378 logger.info("Creating image %s, source_disk %s", image_name,
379 source_disk)
Kevin Chengb5963882018-05-09 00:06:27 -0700380 body = {
381 "name": image_name,
382 "sourceDisk": source_disk,
383 }
384 if labels is not None:
385 body["labels"] = labels
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700386 api = self.service.images().insert(project=self._project, body=body)
387 operation = self.Execute(api)
388 try:
herbertxue308f7662018-05-18 03:25:58 +0000389 self.WaitOnOperation(
390 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700391 except errors.DriverError:
392 logger.error("Creating image failed, cleaning up: %s", image_name)
393 if self.CheckImageExists(image_name):
394 self.DeleteImage(image_name)
395 raise
396 logger.info("Image %s has been created.", image_name)
397
Kevin Chengb5963882018-05-09 00:06:27 -0700398 @utils.RetryOnException(_IsFingerPrintError,
399 _MAX_RETRIES_ON_FINGERPRINT_CONFLICT)
400 def SetImageLabels(self, image_name, new_labels):
401 """Update image's labels. Retry for finger print conflict.
402
403 Note: Decorator RetryOnException will retry the call for FingerPrint
404 conflict (HTTP error code 412). The fingerprint is used to detect
405 conflicts of GCE resource updates. The fingerprint is initially generated
406 by Compute Engine and changes after every request to modify or update
407 resources (e.g. GCE "image" resource has "fingerPrint" for "labels"
408 updates).
409
410 Args:
411 image_name: A string, the image name.
412 new_labels: Dict, will be added to the image's labels.
413
414 Returns:
415 A GlobalOperation resouce.
416 https://cloud.google.com/compute/docs/reference/latest/globalOperations
417 """
418 image = self.GetImage(image_name)
419 labels = image.get("labels", {})
420 labels.update(new_labels)
421 body = {
422 "labels": labels,
423 "labelFingerprint": image["labelFingerprint"]
424 }
herbertxue308f7662018-05-18 03:25:58 +0000425 api = self.service.images().setLabels(
426 project=self._project, resource=image_name, body=body)
Kevin Chengb5963882018-05-09 00:06:27 -0700427 return self.Execute(api)
428
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700429 def CheckImageExists(self, image_name):
430 """Check if image exists.
431
432 Args:
433 image_name: A string
434
435 Returns:
436 True if image exists, otherwise False.
437 """
438 try:
439 self.GetImage(image_name)
440 exists = True
441 except errors.ResourceNotFoundError:
442 exists = False
443 logger.debug("CheckImageExists: image_name: %s, result: %s",
444 image_name, exists)
445 return exists
446
Kevin Chengb5963882018-05-09 00:06:27 -0700447 def GetImage(self, image_name, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700448 """Get image information.
449
450 Args:
451 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -0700452 image_project: A string
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700453
454 Returns:
455 An image resource in json.
456 https://cloud.google.com/compute/docs/reference/latest/images#resource
457 """
herbertxue308f7662018-05-18 03:25:58 +0000458 api = self.service.images().get(
459 project=image_project or self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700460 return self.Execute(api)
461
462 def DeleteImage(self, image_name):
463 """Delete an image.
464
465 Args:
466 image_name: A string
467 """
468 logger.info("Deleting image %s", image_name)
herbertxue308f7662018-05-18 03:25:58 +0000469 api = self.service.images().delete(
470 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700471 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000472 self.WaitOnOperation(
473 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700474 logger.info("Deleted image %s", image_name)
475
476 def DeleteImages(self, image_names):
477 """Delete multiple images.
478
479 Args:
480 image_names: A list of image names.
481
482 Returns:
483 A tuple, (deleted, failed, error_msgs)
484 deleted: A list of names of images that have been deleted.
485 failed: A list of names of images that we fail to delete.
486 error_msgs: A list of failure messages.
487 """
488 if not image_names:
489 return [], [], []
490 # Batch send deletion requests.
491 logger.info("Deleting images: %s", image_names)
492 delete_requests = {}
493 for image_name in set(image_names):
herbertxue308f7662018-05-18 03:25:58 +0000494 request = self.service.images().delete(
495 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700496 delete_requests[image_name] = request
497 return self._BatchExecuteAndWait(delete_requests,
498 OperationScope.GLOBAL)
499
Kevin Chengb5963882018-05-09 00:06:27 -0700500 def ListImages(self, image_filter=None, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700501 """List images.
502
503 Args:
504 image_filter: A string representing a filter in format of
505 FIELD_NAME COMPARISON_STRING LITERAL_STRING
506 e.g. "name ne example-image"
507 e.g. "name eq "example-image-[0-9]+""
Kevin Chengb5963882018-05-09 00:06:27 -0700508 image_project: String. If not provided, will list images from the default
509 project. Otherwise, will list images from the given
510 project, which can be any arbitrary project where the
511 account has read access
512 (i.e. has the role "roles/compute.imageUser")
513
514 Read more about image sharing across project:
515 https://cloud.google.com/compute/docs/images/sharing-images-across-projects
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700516
517 Returns:
518 A list of images.
519 """
herbertxue308f7662018-05-18 03:25:58 +0000520 return self.ListWithMultiPages(
521 api_resource=self.service.images().list,
522 project=image_project or self._project,
523 filter=image_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700524
525 def GetInstance(self, instance, zone):
526 """Get information about an instance.
527
528 Args:
529 instance: A string, representing instance name.
530 zone: A string, representing zone name. e.g. "us-central1-f"
531
532 Returns:
533 An instance resource in json.
534 https://cloud.google.com/compute/docs/reference/latest/instances#resource
535 """
herbertxue308f7662018-05-18 03:25:58 +0000536 api = self.service.instances().get(
537 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700538 return self.Execute(api)
539
Kevin Chengb5963882018-05-09 00:06:27 -0700540 def AttachAccelerator(self, instance, zone, accelerator_count,
541 accelerator_type):
542 """Attach a GPU accelerator to the instance.
543
544 Note: In order for this to succeed the following must hold:
545 - The machine schedule must be set to "terminate" i.e:
546 SetScheduling(self, instance, zone, on_host_maintenance="terminate")
547 must have been called.
548 - The machine is not starting or running. i.e.
549 StopInstance(self, instance) must have been called.
550
551 Args:
552 instance: A string, representing instance name.
553 zone: String, name of zone.
554 accelerator_count: The number accelerators to be attached to the instance.
555 a value of 0 will detach all accelerators.
556 accelerator_type: The type of accelerator to attach. e.g.
557 "nvidia-tesla-k80"
558 """
559 body = {
560 "guestAccelerators": [{
herbertxue308f7662018-05-18 03:25:58 +0000561 "acceleratorType":
562 self.GetAcceleratorUrl(accelerator_type, zone),
563 "acceleratorCount":
564 accelerator_count
Kevin Chengb5963882018-05-09 00:06:27 -0700565 }]
566 }
567 api = self.service.instances().setMachineResources(
568 project=self._project, zone=zone, instance=instance, body=body)
569 operation = self.Execute(api)
570 try:
571 self.WaitOnOperation(
572 operation=operation,
573 operation_scope=OperationScope.ZONE,
574 scope_name=zone)
575 except errors.GceOperationTimeoutError:
576 logger.error("Attach instance failed: %s", instance)
577 raise
herbertxue308f7662018-05-18 03:25:58 +0000578 logger.info("%d x %s have been attached to instance %s.",
579 accelerator_count, accelerator_type, instance)
Kevin Chengb5963882018-05-09 00:06:27 -0700580
581 def AttachDisk(self, instance, zone, **kwargs):
582 """Attach the external disk to the instance.
583
584 Args:
585 instance: A string, representing instance name.
586 zone: String, name of zone.
587 **kwargs: The attachDisk request body. See "https://cloud.google.com/
588 compute/docs/reference/latest/instances/attachDisk" for detail.
589 {
590 "kind": "compute#attachedDisk",
591 "type": string,
592 "mode": string,
593 "source": string,
594 "deviceName": string,
595 "index": integer,
596 "boot": boolean,
597 "initializeParams": {
598 "diskName": string,
599 "sourceImage": string,
600 "diskSizeGb": long,
601 "diskType": string,
602 "sourceImageEncryptionKey": {
603 "rawKey": string,
604 "sha256": string
605 }
606 },
607 "autoDelete": boolean,
608 "licenses": [
609 string
610 ],
611 "interface": string,
612 "diskEncryptionKey": {
613 "rawKey": string,
614 "sha256": string
615 }
616 }
617
618 Returns:
619 An disk resource in json.
620 https://cloud.google.com/compute/docs/reference/latest/disks#resource
621
622
623 Raises:
624 errors.GceOperationTimeoutError: Operation takes too long to finish.
625 """
626 api = self.service.instances().attachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000627 project=self._project, zone=zone, instance=instance, body=kwargs)
Kevin Chengb5963882018-05-09 00:06:27 -0700628 operation = self.Execute(api)
629 try:
630 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000631 operation=operation,
632 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700633 scope_name=zone)
634 except errors.GceOperationTimeoutError:
635 logger.error("Attach instance failed: %s", instance)
636 raise
637 logger.info("Disk has been attached to instance %s.", instance)
638
639 def DetachDisk(self, instance, zone, disk_name):
640 """Attach the external disk to the instance.
641
642 Args:
643 instance: A string, representing instance name.
644 zone: String, name of zone.
645 disk_name: A string, the name of the detach disk.
646
647 Returns:
648 A ZoneOperation resource.
649 See https://cloud.google.com/compute/docs/reference/latest/zoneOperations
650
651 Raises:
652 errors.GceOperationTimeoutError: Operation takes too long to finish.
653 """
654 api = self.service.instances().detachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000655 project=self._project,
656 zone=zone,
657 instance=instance,
Kevin Chengb5963882018-05-09 00:06:27 -0700658 deviceName=disk_name)
659 operation = self.Execute(api)
660 try:
661 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000662 operation=operation,
663 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700664 scope_name=zone)
665 except errors.GceOperationTimeoutError:
666 logger.error("Detach instance failed: %s", instance)
667 raise
668 logger.info("Disk has been detached to instance %s.", instance)
669
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700670 def StartInstance(self, instance, zone):
671 """Start |instance| in |zone|.
672
673 Args:
674 instance: A string, representing instance name.
675 zone: A string, representing zone name. e.g. "us-central1-f"
676
677 Raises:
678 errors.GceOperationTimeoutError: Operation takes too long to finish.
679 """
herbertxue308f7662018-05-18 03:25:58 +0000680 api = self.service.instances().start(
681 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700682 operation = self.Execute(api)
683 try:
herbertxue308f7662018-05-18 03:25:58 +0000684 self.WaitOnOperation(
685 operation=operation,
686 operation_scope=OperationScope.ZONE,
687 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700688 except errors.GceOperationTimeoutError:
689 logger.error("Start instance failed: %s", instance)
690 raise
691 logger.info("Instance %s has been started.", instance)
692
693 def StartInstances(self, instances, zone):
694 """Start |instances| in |zone|.
695
696 Args:
697 instances: A list of strings, representing instance names's list.
698 zone: A string, representing zone name. e.g. "us-central1-f"
699
700 Returns:
701 A tuple, (done, failed, error_msgs)
702 done: A list of string, representing the names of instances that
703 have been executed.
704 failed: A list of string, representing the names of instances that
705 we failed to execute.
706 error_msgs: A list of string, representing the failure messages.
707 """
herbertxue308f7662018-05-18 03:25:58 +0000708 action = functools.partial(
709 self.service.instances().start, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700710 return self._BatchExecuteOnInstances(instances, zone, action)
711
712 def StopInstance(self, instance, zone):
713 """Stop |instance| in |zone|.
714
715 Args:
716 instance: A string, representing instance name.
717 zone: A string, representing zone name. e.g. "us-central1-f"
718
719 Raises:
720 errors.GceOperationTimeoutError: Operation takes too long to finish.
721 """
herbertxue308f7662018-05-18 03:25:58 +0000722 api = self.service.instances().stop(
723 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700724 operation = self.Execute(api)
725 try:
herbertxue308f7662018-05-18 03:25:58 +0000726 self.WaitOnOperation(
727 operation=operation,
728 operation_scope=OperationScope.ZONE,
729 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700730 except errors.GceOperationTimeoutError:
731 logger.error("Stop instance failed: %s", instance)
732 raise
733 logger.info("Instance %s has been terminated.", instance)
734
735 def StopInstances(self, instances, zone):
736 """Stop |instances| in |zone|.
737
738 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700739 instances: A list of strings, representing instance names's list.
740 zone: A string, representing zone name. e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700741
742 Returns:
743 A tuple, (done, failed, error_msgs)
744 done: A list of string, representing the names of instances that
745 have been executed.
746 failed: A list of string, representing the names of instances that
747 we failed to execute.
748 error_msgs: A list of string, representing the failure messages.
749 """
herbertxue308f7662018-05-18 03:25:58 +0000750 action = functools.partial(
751 self.service.instances().stop, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700752 return self._BatchExecuteOnInstances(instances, zone, action)
753
754 def SetScheduling(self,
755 instance,
756 zone,
757 automatic_restart=True,
758 on_host_maintenance="MIGRATE"):
759 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
760
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700761 Args:
762 instance: A string, representing instance name.
763 zone: A string, representing zone name. e.g. "us-central1-f".
764 automatic_restart: Boolean, determine whether the instance will
765 automatically restart if it crashes or not,
766 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700767 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700768 The instance's maintenance behavior, which
769 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700770 "MIGRATE" or "TERMINATE" when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700771 a maintenance event.
772
773 Raises:
774 errors.GceOperationTimeoutError: Operation takes too long to finish.
775 """
herbertxue308f7662018-05-18 03:25:58 +0000776 body = {
777 "automaticRestart": automatic_restart,
778 "onHostMaintenance": on_host_maintenance
779 }
780 api = self.service.instances().setScheduling(
781 project=self._project, zone=zone, instance=instance, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700782 operation = self.Execute(api)
783 try:
herbertxue308f7662018-05-18 03:25:58 +0000784 self.WaitOnOperation(
785 operation=operation,
786 operation_scope=OperationScope.ZONE,
787 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700788 except errors.GceOperationTimeoutError:
789 logger.error("Set instance scheduling failed: %s", instance)
790 raise
herbertxue308f7662018-05-18 03:25:58 +0000791 logger.info(
792 "Instance scheduling changed:\n"
793 " automaticRestart: %s\n"
794 " onHostMaintenance: %s\n",
795 str(automatic_restart).lower(), on_host_maintenance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700796
797 def ListInstances(self, zone, instance_filter=None):
798 """List instances.
799
800 Args:
801 zone: A string, representing zone name. e.g. "us-central1-f"
802 instance_filter: A string representing a filter in format of
803 FIELD_NAME COMPARISON_STRING LITERAL_STRING
804 e.g. "name ne example-instance"
805 e.g. "name eq "example-instance-[0-9]+""
806
807 Returns:
808 A list of instances.
809 """
810 return self.ListWithMultiPages(
811 api_resource=self.service.instances().list,
812 project=self._project,
813 zone=zone,
814 filter=instance_filter)
815
816 def SetSchedulingInstances(self,
817 instances,
818 zone,
819 automatic_restart=True,
820 on_host_maintenance="MIGRATE"):
821 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
822
823 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option.
824
825 Args:
826 instances: A list of string, representing instance names.
827 zone: A string, representing zone name. e.g. "us-central1-f".
828 automatic_restart: Boolean, determine whether the instance will
829 automatically restart if it crashes or not,
830 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700831 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700832 The instance's maintenance behavior, which
833 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700834 migrated or terminated when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700835 a maintenance event.
836
837 Returns:
838 A tuple, (done, failed, error_msgs)
839 done: A list of string, representing the names of instances that
840 have been executed.
841 failed: A list of string, representing the names of instances that
842 we failed to execute.
843 error_msgs: A list of string, representing the failure messages.
844 """
herbertxue308f7662018-05-18 03:25:58 +0000845 body = {
846 "automaticRestart": automatic_restart,
847 "OnHostMaintenance": on_host_maintenance
848 }
849 action = functools.partial(
850 self.service.instances().setScheduling,
851 project=self._project,
852 zone=zone,
853 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700854 return self._BatchExecuteOnInstances(instances, zone, action)
855
856 def _BatchExecuteOnInstances(self, instances, zone, action):
857 """Batch processing operations requiring computing time.
858
859 Args:
860 instances: A list of instance names.
861 zone: A string, e.g. "us-central1-f".
862 action: partial func, all kwargs for this gcloud action has been
863 defined in the caller function (e.g. See "StartInstances")
864 except 'instance' which will be defined by iterating the
865 |instances|.
866
867 Returns:
868 A tuple, (done, failed, error_msgs)
869 done: A list of string, representing the names of instances that
870 have been executed.
871 failed: A list of string, representing the names of instances that
872 we failed to execute.
873 error_msgs: A list of string, representing the failure messages.
874 """
875 if not instances:
876 return [], [], []
877 # Batch send requests.
878 logger.info("Batch executing instances: %s", instances)
879 requests = {}
880 for instance_name in set(instances):
881 requests[instance_name] = action(instance=instance_name)
herbertxue308f7662018-05-18 03:25:58 +0000882 return self._BatchExecuteAndWait(
883 requests, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700884
885 def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None):
886 """Batch processing requests and wait on the operation.
887
888 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700889 requests: A dictionary. The key is a string representing the resource
890 name. For example, an instance name, or an image name.
891 operation_scope: A value from OperationScope, "zone", "region",
892 or "global".
893 scope_name: If operation_scope is "zone" or "region", this should be
894 the name of the zone or region, e.g. "us-central1-f".
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700895 Returns:
Kevin Chengb5963882018-05-09 00:06:27 -0700896 A tuple, (done, failed, error_msgs)
897 done: A list of string, representing the resource names that have
898 been executed.
899 failed: A list of string, representing resource names that
900 we failed to execute.
901 error_msgs: A list of string, representing the failure messages.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700902 """
903 results = self.BatchExecute(requests)
904 # Initialize return values
905 failed = []
906 error_msgs = []
907 for resource_name, (_, error) in results.iteritems():
908 if error is not None:
909 failed.append(resource_name)
910 error_msgs.append(str(error))
911 done = []
912 # Wait for the executing operations to finish.
913 logger.info("Waiting for executing operations")
914 for resource_name in requests.iterkeys():
915 operation, _ = results[resource_name]
916 if operation:
917 try:
918 self.WaitOnOperation(operation, operation_scope,
919 scope_name)
920 done.append(resource_name)
921 except errors.DriverError as exc:
922 failed.append(resource_name)
923 error_msgs.append(str(exc))
924 return done, failed, error_msgs
925
926 def ListZones(self):
927 """List all zone instances in the project.
928
929 Returns:
930 Gcompute response instance. For example:
931 {
932 "id": "projects/google.com%3Aandroid-build-staging/zones",
933 "kind": "compute#zoneList",
934 "selfLink": "https://www.googleapis.com/compute/v1/projects/"
935 "google.com:android-build-staging/zones"
936 "items": [
937 {
938 'creationTimestamp': '2014-07-15T10:44:08.663-07:00',
939 'description': 'asia-east1-c',
940 'id': '2222',
941 'kind': 'compute#zone',
942 'name': 'asia-east1-c',
943 'region': 'https://www.googleapis.com/compute/v1/projects/'
944 'google.com:android-build-staging/regions/asia-east1',
945 'selfLink': 'https://www.googleapis.com/compute/v1/projects/'
946 'google.com:android-build-staging/zones/asia-east1-c',
947 'status': 'UP'
948 }, {
949 'creationTimestamp': '2014-05-30T18:35:16.575-07:00',
950 'description': 'asia-east1-b',
951 'id': '2221',
952 'kind': 'compute#zone',
953 'name': 'asia-east1-b',
954 'region': 'https://www.googleapis.com/compute/v1/projects/'
955 'google.com:android-build-staging/regions/asia-east1',
956 'selfLink': 'https://www.googleapis.com/compute/v1/projects'
957 '/google.com:android-build-staging/zones/asia-east1-b',
958 'status': 'UP'
959 }]
960 }
961 See cloud cluster's api/mixer_zones.proto
962 """
963 api = self.service.zones().list(project=self._project)
964 return self.Execute(api)
965
Kevin Chengb5963882018-05-09 00:06:27 -0700966 def ListRegions(self):
967 """List all the regions for a project.
968
969 Returns:
970 A dictionary containing all the zones and additional data. See this link
971 for the detailed response:
972 https://cloud.google.com/compute/docs/reference/latest/regions/list.
973 Example:
974 {
975 'items': [{
976 'name':
977 'us-central1',
978 'quotas': [{
979 'usage': 2.0,
980 'limit': 24.0,
981 'metric': 'CPUS'
982 }, {
983 'usage': 1.0,
984 'limit': 23.0,
985 'metric': 'IN_USE_ADDRESSES'
986 }, {
987 'usage': 209.0,
988 'limit': 10240.0,
989 'metric': 'DISKS_TOTAL_GB'
990 }, {
991 'usage': 1000.0,
992 'limit': 20000.0,
993 'metric': 'INSTANCES'
994 }]
995 },..]
996 }
997 """
998 api = self.service.regions().list(project=self._project)
999 return self.Execute(api)
1000
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001001 def _GetNetworkArgs(self, network):
1002 """Helper to generate network args that is used to create an instance.
1003
1004 Args:
1005 network: A string, e.g. "default".
1006
1007 Returns:
1008 A dictionary representing network args.
1009 """
1010 return {
1011 "network": self.GetNetworkUrl(network),
herbertxue308f7662018-05-18 03:25:58 +00001012 "accessConfigs": [{
1013 "name": "External NAT",
1014 "type": "ONE_TO_ONE_NAT"
1015 }]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001016 }
1017
herbertxue308f7662018-05-18 03:25:58 +00001018 def _GetDiskArgs(self,
1019 disk_name,
1020 image_name,
1021 image_project=None,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001022 disk_size_gb=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001023 """Helper to generate disk args that is used to create an instance.
1024
1025 Args:
1026 disk_name: A string
1027 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -07001028 image_project: A string
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001029 disk_size_gb: An integer
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001030
1031 Returns:
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001032 List holding dict of disk args.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001033 """
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001034 args = copy.deepcopy(BASE_DISK_ARGS)
1035 args["initializeParams"] = {
1036 "diskName": disk_name,
herbertxue308f7662018-05-18 03:25:58 +00001037 "sourceImage": self.GetImage(image_name,
1038 image_project)["selfLink"],
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001039 }
1040 # TODO: Remove this check once it's validated that we can either pass in
1041 # a None diskSizeGb or we find an appropriate default val.
1042 if disk_size_gb:
1043 args["diskSizeGb"] = disk_size_gb
1044 return [args]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001045
herbertxue308f7662018-05-18 03:25:58 +00001046 def _GetExtraDiskArgs(self, extra_disk_name, zone):
1047 """Get extra disk arg for given disk.
1048
1049 Args:
1050 extra_disk_name: String, name of the disk.
1051 zone: String, representing zone name, e.g. "us-central1-f"
1052
1053 Returns:
1054 A dictionary of disk args.
1055 """
1056 return [{
1057 "type": "PERSISTENT",
1058 "mode": "READ_WRITE",
1059 "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone,
1060 extra_disk_name),
1061 "autoDelete": True,
1062 "boot": False,
1063 "interface": "SCSI",
1064 "deviceName": extra_disk_name,
1065 }]
1066
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001067 # pylint: disable=too-many-locals
herbertxue308f7662018-05-18 03:25:58 +00001068 def CreateInstance(self,
1069 instance,
1070 image_name,
1071 machine_type,
1072 metadata,
1073 network,
1074 zone,
1075 disk_args=None,
1076 image_project=None,
1077 gpu=None,
1078 extra_disk_name=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001079 """Create a gce instance with a gce image.
1080
1081 Args:
herbertxue308f7662018-05-18 03:25:58 +00001082 instance: String, instance name.
1083 image_name: String, source image used to create this disk.
1084 machine_type: String, representing machine_type,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001085 e.g. "n1-standard-1"
herbertxue308f7662018-05-18 03:25:58 +00001086 metadata: Dict, maps a metadata name to its value.
1087 network: String, representing network name, e.g. "default"
1088 zone: String, representing zone name, e.g. "us-central1-f"
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001089 disk_args: A list of extra disk args (strings), see _GetDiskArgs
1090 for example, if None, will create a disk using the given
1091 image.
herbertxue308f7662018-05-18 03:25:58 +00001092 image_project: String, name of the project where the image
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001093 belongs. Assume the default project if None.
herbertxue308f7662018-05-18 03:25:58 +00001094 gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001095 None no gpus will be attached. For more details see:
Kevin Chengb5963882018-05-09 00:06:27 -07001096 https://cloud.google.com/compute/docs/gpus/add-gpus
herbertxue308f7662018-05-18 03:25:58 +00001097 extra_disk_name: String,the name of the extra disk to attach.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001098 """
herbertxue308f7662018-05-18 03:25:58 +00001099 disk_args = (disk_args
1100 or self._GetDiskArgs(instance, image_name, image_project))
1101 if extra_disk_name:
1102 disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001103 body = {
1104 "machineType": self.GetMachineType(machine_type, zone)["selfLink"],
1105 "name": instance,
1106 "networkInterfaces": [self._GetNetworkArgs(network)],
1107 "disks": disk_args,
herbertxue308f7662018-05-18 03:25:58 +00001108 "serviceAccounts": [{
1109 "email": "default",
1110 "scopes": self.DEFAULT_INSTANCE_SCOPE
1111 }],
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001112 }
1113
Kevin Chengb5963882018-05-09 00:06:27 -07001114 if gpu:
1115 body["guestAccelerators"] = [{
1116 "acceleratorType": self.GetAcceleratorUrl(gpu, zone),
1117 "acceleratorCount": 1
1118 }]
1119 # Instances with GPUs cannot live migrate because they are assigned
1120 # to specific hardware devices.
1121 body["scheduling"] = {"onHostMaintenance": "terminate"}
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001122 if metadata:
herbertxue308f7662018-05-18 03:25:58 +00001123 metadata_list = [{
1124 "key": key,
1125 "value": val
1126 } for key, val in metadata.iteritems()]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001127 body["metadata"] = {"items": metadata_list}
1128 logger.info("Creating instance: project %s, zone %s, body:%s",
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001129 self._project, zone, body)
herbertxue308f7662018-05-18 03:25:58 +00001130 api = self.service.instances().insert(
1131 project=self._project, zone=zone, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001132 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001133 self.WaitOnOperation(
1134 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001135 logger.info("Instance %s has been created.", instance)
1136
1137 def DeleteInstance(self, instance, zone):
1138 """Delete a gce instance.
1139
1140 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001141 instance: A string, instance name.
1142 zone: A string, e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001143 """
1144 logger.info("Deleting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001145 api = self.service.instances().delete(
1146 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001147 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001148 self.WaitOnOperation(
1149 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001150 logger.info("Deleted instance: %s", instance)
1151
1152 def DeleteInstances(self, instances, zone):
1153 """Delete multiple instances.
1154
1155 Args:
1156 instances: A list of instance names.
1157 zone: A string, e.g. "us-central1-f".
1158
1159 Returns:
1160 A tuple, (deleted, failed, error_msgs)
1161 deleted: A list of names of instances that have been deleted.
1162 failed: A list of names of instances that we fail to delete.
1163 error_msgs: A list of failure messages.
1164 """
herbertxue308f7662018-05-18 03:25:58 +00001165 action = functools.partial(
1166 self.service.instances().delete, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001167 return self._BatchExecuteOnInstances(instances, zone, action)
1168
1169 def ResetInstance(self, instance, zone):
1170 """Reset the gce instance.
1171
1172 Args:
1173 instance: A string, instance name.
1174 zone: A string, e.g. "us-central1-f".
1175 """
1176 logger.info("Resetting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001177 api = self.service.instances().reset(
1178 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001179 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001180 self.WaitOnOperation(
1181 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001182 logger.info("Instance has been reset: %s", instance)
1183
1184 def GetMachineType(self, machine_type, zone):
1185 """Get URL for a given machine typle.
1186
1187 Args:
1188 machine_type: A string, name of the machine type.
1189 zone: A string, e.g. "us-central1-f"
1190
1191 Returns:
1192 A machine type resource in json.
1193 https://cloud.google.com/compute/docs/reference/latest/
1194 machineTypes#resource
1195 """
herbertxue308f7662018-05-18 03:25:58 +00001196 api = self.service.machineTypes().get(
1197 project=self._project, zone=zone, machineType=machine_type)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001198 return self.Execute(api)
1199
Kevin Chengb5963882018-05-09 00:06:27 -07001200 def GetAcceleratorUrl(self, accelerator_type, zone):
1201 """Get URL for a given type of accelator.
1202
1203 Args:
1204 accelerator_type: A string, representing the accelerator, e.g
1205 "nvidia-tesla-k80"
1206 zone: A string representing a zone, e.g. "us-west1-b"
1207
1208 Returns:
1209 A URL that points to the accelerator resource, e.g.
1210 https://www.googleapis.com/compute/v1/projects/<project id>/zones/
1211 us-west1-b/acceleratorTypes/nvidia-tesla-k80
1212 """
herbertxue308f7662018-05-18 03:25:58 +00001213 api = self.service.acceleratorTypes().get(
1214 project=self._project, zone=zone, acceleratorType=accelerator_type)
Kevin Chengb5963882018-05-09 00:06:27 -07001215 result = self.Execute(api)
1216 return result["selfLink"]
1217
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001218 def GetNetworkUrl(self, network):
1219 """Get URL for a given network.
1220
1221 Args:
1222 network: A string, representing network name, e.g "default"
1223
1224 Returns:
1225 A URL that points to the network resource, e.g.
1226 https://www.googleapis.com/compute/v1/projects/<project id>/
1227 global/networks/default
1228 """
herbertxue308f7662018-05-18 03:25:58 +00001229 api = self.service.networks().get(
1230 project=self._project, network=network)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001231 result = self.Execute(api)
1232 return result["selfLink"]
1233
1234 def CompareMachineSize(self, machine_type_1, machine_type_2, zone):
1235 """Compare the size of two machine types.
1236
1237 Args:
1238 machine_type_1: A string representing a machine type, e.g. n1-standard-1
1239 machine_type_2: A string representing a machine type, e.g. n1-standard-1
1240 zone: A string representing a zone, e.g. "us-central1-f"
1241
1242 Returns:
Kevin Cheng4ae42772018-10-02 11:39:48 -07001243 -1 if any metric of machine size of the first type is smaller than
1244 the second type.
1245 0 if all metrics of machine size are equal.
1246 1 if at least one metric of machine size of the first type is
1247 greater than the second type and all metrics of first type are
1248 greater or equal to the second type.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001249
1250 Raises:
1251 errors.DriverError: For malformed response.
1252 """
1253 machine_info_1 = self.GetMachineType(machine_type_1, zone)
1254 machine_info_2 = self.GetMachineType(machine_type_2, zone)
Kevin Cheng4ae42772018-10-02 11:39:48 -07001255 result = 0
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001256 for metric in self.MACHINE_SIZE_METRICS:
1257 if metric not in machine_info_1 or metric not in machine_info_2:
1258 raise errors.DriverError(
1259 "Malformed machine size record: Can't find '%s' in %s or %s"
1260 % (metric, machine_info_1, machine_info_2))
Kevin Cheng4ae42772018-10-02 11:39:48 -07001261 cmp_result = machine_info_1[metric] - machine_info_2[metric]
1262 if cmp_result < 0:
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001263 return -1
Kevin Cheng4ae42772018-10-02 11:39:48 -07001264 elif cmp_result > 0:
1265 result = 1
1266 return result
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001267
1268 def GetSerialPortOutput(self, instance, zone, port=1):
1269 """Get serial port output.
1270
1271 Args:
1272 instance: string, instance name.
1273 zone: string, zone name.
1274 port: int, which COM port to read from, 1-4, default to 1.
1275
1276 Returns:
1277 String, contents of the output.
1278
1279 Raises:
1280 errors.DriverError: For malformed response.
1281 """
1282 api = self.service.instances().getSerialPortOutput(
herbertxue308f7662018-05-18 03:25:58 +00001283 project=self._project, zone=zone, instance=instance, port=port)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001284 result = self.Execute(api)
1285 if "contents" not in result:
1286 raise errors.DriverError(
1287 "Malformed response for GetSerialPortOutput: %s" % result)
1288 return result["contents"]
1289
1290 def GetInstanceNamesByIPs(self, ips, zone):
1291 """Get Instance names by IPs.
1292
1293 This function will go through all instances, which
1294 could be slow if there are too many instances. However, currently
1295 GCE doesn't support search for instance by IP.
1296
1297 Args:
1298 ips: A set of IPs.
1299 zone: String, name of the zone.
1300
1301 Returns:
1302 A dictionary where key is IP and value is instance name or None
1303 if instance is not found for the given IP.
1304 """
1305 ip_name_map = dict.fromkeys(ips)
1306 for instance in self.ListInstances(zone):
1307 try:
1308 ip = instance["networkInterfaces"][0]["accessConfigs"][0][
1309 "natIP"]
1310 if ip in ips:
1311 ip_name_map[ip] = instance["name"]
1312 except (IndexError, KeyError) as e:
1313 logger.error("Could not get instance names by ips: %s", str(e))
1314 return ip_name_map
1315
1316 def GetInstanceIP(self, instance, zone):
1317 """Get Instance IP given instance name.
1318
1319 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001320 instance: String, representing instance name.
1321 zone: String, name of the zone.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001322
1323 Returns:
Kevin Cheng86d43c72018-08-30 10:59:14 -07001324 NamedTuple of (internal, external) IP of the instance.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001325 """
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001326 instance = self.GetInstance(instance, zone)
Kevin Cheng86d43c72018-08-30 10:59:14 -07001327 internal_ip = instance["networkInterfaces"][0]["networkIP"]
1328 external_ip = instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"]
1329 return IP(internal=internal_ip, external=external_ip)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001330
1331 def SetCommonInstanceMetadata(self, body):
1332 """Set project-wide metadata.
1333
1334 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001335 body: Metadata body.
1336 metdata is in the following format.
1337 {
1338 "kind": "compute#metadata",
1339 "fingerprint": "a-23icsyx4E=",
1340 "items": [
1341 {
1342 "key": "google-compute-default-region",
1343 "value": "us-central1"
1344 }, ...
1345 ]
1346 }
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001347 """
1348 api = self.service.projects().setCommonInstanceMetadata(
1349 project=self._project, body=body)
1350 operation = self.Execute(api)
1351 self.WaitOnOperation(operation, operation_scope=OperationScope.GLOBAL)
1352
1353 def AddSshRsa(self, user, ssh_rsa_path):
1354 """Add the public rsa key to the project's metadata.
1355
1356 Compute engine instances that are created after will
1357 by default contain the key.
1358
1359 Args:
1360 user: the name of the user which the key belongs to.
1361 ssh_rsa_path: The absolute path to public rsa key.
1362 """
1363 if not os.path.exists(ssh_rsa_path):
herbertxue308f7662018-05-18 03:25:58 +00001364 raise errors.DriverError(
1365 "RSA file %s does not exist." % ssh_rsa_path)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001366
1367 logger.info("Adding ssh rsa key from %s to project %s for user: %s",
1368 ssh_rsa_path, self._project, user)
1369 project = self.GetProject()
1370 with open(ssh_rsa_path) as f:
1371 rsa = f.read()
1372 rsa = rsa.strip() if rsa else rsa
1373 utils.VerifyRsaPubKey(rsa)
1374 metadata = project["commonInstanceMetadata"]
1375 for item in metadata.setdefault("items", []):
1376 if item["key"] == "sshKeys":
1377 sshkey_item = item
1378 break
1379 else:
1380 sshkey_item = {"key": "sshKeys", "value": ""}
1381 metadata["items"].append(sshkey_item)
1382
1383 entry = "%s:%s" % (user, rsa)
1384 logger.debug("New RSA entry: %s", entry)
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001385 sshkey_item["value"] = "\n".join([sshkey_item["value"].strip(),
1386 entry]).strip()
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001387 self.SetCommonInstanceMetadata(metadata)
Fang Dengcef4b112017-03-02 11:20:17 -08001388
1389 def CheckAccess(self):
1390 """Check if the user has read access to the cloud project.
1391
1392 Returns:
1393 True if the user has at least read access to the project.
1394 False otherwise.
1395
1396 Raises:
1397 errors.HttpError if other unexpected error happens when
1398 accessing the project.
1399 """
1400 api = self.service.zones().list(project=self._project)
1401 retry_http_codes = copy.copy(self.RETRY_HTTP_CODES)
1402 retry_http_codes.remove(self.ACCESS_DENIED_CODE)
1403 try:
1404 self.Execute(api, retry_http_codes=retry_http_codes)
1405 except errors.HttpError as e:
1406 if e.code == self.ACCESS_DENIED_CODE:
1407 return False
1408 raise
1409 return True