blob: 6e8a9b0010b11e9e21e6c8eb1c99d36c4786d342 [file] [log] [blame]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001#!/usr/bin/env python
2#
3# Copyright 2016 - The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070016"""A client that manages Google Compute Engine.
17
18** ComputeClient **
19
20ComputeClient is a wrapper around Google Compute Engine APIs.
21It provides a set of methods for managing a google compute engine project,
22such as creating images, creating instances, etc.
23
24Design philosophy: We tried to make ComputeClient as stateless as possible,
25and it only keeps states about authentication. ComputeClient should be very
26generic, and only knows how to talk to Compute Engine APIs.
27"""
Kevin Cheng5c124ec2018-05-16 13:28:51 -070028# pylint: disable=too-many-lines
Fang Dengcef4b112017-03-02 11:20:17 -080029import copy
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070030import functools
31import logging
32import os
33
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070034from acloud.internal.lib import base_cloud_client
35from acloud.internal.lib import utils
36from acloud.public import errors
37
38logger = logging.getLogger(__name__)
39
Kevin Chengb5963882018-05-09 00:06:27 -070040_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10
41
Kevin Cheng5c124ec2018-05-16 13:28:51 -070042BASE_DISK_ARGS = {
43 "type": "PERSISTENT",
44 "boot": True,
45 "mode": "READ_WRITE",
46 "autoDelete": True,
47 "initializeParams": {},
48}
49
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070050
51class OperationScope(object):
52 """Represents operation scope enum."""
53 ZONE = "zone"
54 REGION = "region"
55 GLOBAL = "global"
56
57
Kevin Chengb5963882018-05-09 00:06:27 -070058class PersistentDiskType(object):
59 """Represents different persistent disk types.
60
61 pd-standard for regular hard disk.
62 pd-ssd for solid state disk.
63 """
64 STANDARD = "pd-standard"
65 SSD = "pd-ssd"
66
67
68class ImageStatus(object):
69 """Represents the status of an image."""
70 PENDING = "PENDING"
71 READY = "READY"
72 FAILED = "FAILED"
73
74
75def _IsFingerPrintError(exc):
76 """Determine if the exception is a HTTP error with code 412.
77
78 Args:
79 exc: Exception instance.
80
81 Returns:
82 Boolean. True if the exception is a "Precondition Failed" error.
83 """
84 return isinstance(exc, errors.HttpError) and exc.code == 412
85
86
Kevin Cheng5c124ec2018-05-16 13:28:51 -070087# pylint: disable=too-many-public-methods
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070088class ComputeClient(base_cloud_client.BaseCloudApiClient):
89 """Client that manages GCE."""
90
91 # API settings, used by BaseCloudApiClient.
92 API_NAME = "compute"
93 API_VERSION = "v1"
herbertxue308f7662018-05-18 03:25:58 +000094 SCOPE = " ".join([
95 "https://www.googleapis.com/auth/compute",
96 "https://www.googleapis.com/auth/devstorage.read_write"
97 ])
Keun Soo Yimb293fdb2016-09-21 16:03:44 -070098 # Default settings for gce operations
99 DEFAULT_INSTANCE_SCOPE = [
100 "https://www.googleapis.com/auth/devstorage.read_only",
101 "https://www.googleapis.com/auth/logging.write"
102 ]
Kevin Chengb5963882018-05-09 00:06:27 -0700103 OPERATION_TIMEOUT_SECS = 30 * 60 # 30 mins
104 OPERATION_POLL_INTERVAL_SECS = 20
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700105 MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"]
Fang Dengcef4b112017-03-02 11:20:17 -0800106 ACCESS_DENIED_CODE = 403
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700107
108 def __init__(self, acloud_config, oauth2_credentials):
109 """Initialize.
110
111 Args:
112 acloud_config: An AcloudConfig object.
113 oauth2_credentials: An oauth2client.OAuth2Credentials instance.
114 """
115 super(ComputeClient, self).__init__(oauth2_credentials)
116 self._project = acloud_config.project
117
118 def _GetOperationStatus(self, operation, operation_scope, scope_name=None):
119 """Get status of an operation.
120
121 Args:
122 operation: An Operation resource in the format of json.
123 operation_scope: A value from OperationScope, "zone", "region",
124 or "global".
125 scope_name: If operation_scope is "zone" or "region", this should be
126 the name of the zone or region, e.g. "us-central1-f".
127
128 Returns:
129 Status of the operation, one of "DONE", "PENDING", "RUNNING".
130
131 Raises:
132 errors.DriverError: if the operation fails.
133 """
134 operation_name = operation["name"]
135 if operation_scope == OperationScope.GLOBAL:
herbertxue308f7662018-05-18 03:25:58 +0000136 api = self.service.globalOperations().get(
137 project=self._project, operation=operation_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700138 result = self.Execute(api)
139 elif operation_scope == OperationScope.ZONE:
herbertxue308f7662018-05-18 03:25:58 +0000140 api = self.service.zoneOperations().get(
141 project=self._project,
142 operation=operation_name,
143 zone=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700144 result = self.Execute(api)
145 elif operation_scope == OperationScope.REGION:
herbertxue308f7662018-05-18 03:25:58 +0000146 api = self.service.regionOperations().get(
147 project=self._project,
148 operation=operation_name,
149 region=scope_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700150 result = self.Execute(api)
151
152 if result.get("error"):
153 errors_list = result["error"]["errors"]
herbertxue308f7662018-05-18 03:25:58 +0000154 raise errors.DriverError(
155 "Get operation state failed, errors: %s" % str(errors_list))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700156 return result["status"]
157
158 def WaitOnOperation(self, operation, operation_scope, scope_name=None):
159 """Wait for an operation to finish.
160
161 Args:
162 operation: An Operation resource in the format of json.
163 operation_scope: A value from OperationScope, "zone", "region",
164 or "global".
165 scope_name: If operation_scope is "zone" or "region", this should be
166 the name of the zone or region, e.g. "us-central1-f".
167 """
168 timeout_exception = errors.GceOperationTimeoutError(
169 "Operation hits timeout, did not complete within %d secs." %
170 self.OPERATION_TIMEOUT_SECS)
171 utils.PollAndWait(
172 func=self._GetOperationStatus,
173 expected_return="DONE",
174 timeout_exception=timeout_exception,
175 timeout_secs=self.OPERATION_TIMEOUT_SECS,
176 sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS,
177 operation=operation,
178 operation_scope=operation_scope,
179 scope_name=scope_name)
180
181 def GetProject(self):
182 """Get project information.
183
184 Returns:
185 A project resource in json.
186 """
187 api = self.service.projects().get(project=self._project)
188 return self.Execute(api)
189
190 def GetDisk(self, disk_name, zone):
191 """Get disk information.
192
193 Args:
194 disk_name: A string.
195 zone: String, name of zone.
196
197 Returns:
198 An disk resource in json.
199 https://cloud.google.com/compute/docs/reference/latest/disks#resource
200 """
herbertxue308f7662018-05-18 03:25:58 +0000201 api = self.service.disks().get(
202 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700203 return self.Execute(api)
204
205 def CheckDiskExists(self, disk_name, zone):
206 """Check if disk exists.
207
208 Args:
209 disk_name: A string
210 zone: String, name of zone.
211
212 Returns:
213 True if disk exists, otherwise False.
214 """
215 try:
216 self.GetDisk(disk_name, zone)
217 exists = True
218 except errors.ResourceNotFoundError:
219 exists = False
220 logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name,
221 exists)
222 return exists
223
herbertxue308f7662018-05-18 03:25:58 +0000224 def CreateDisk(self,
225 disk_name,
226 source_image,
227 size_gb,
228 zone,
229 source_project=None,
230 disk_type=PersistentDiskType.STANDARD):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700231 """Create a gce disk.
232
233 Args:
herbertxue308f7662018-05-18 03:25:58 +0000234 disk_name: String
235 source_image: String, name of the image.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700236 size_gb: Integer, size in gb.
herbertxue308f7662018-05-18 03:25:58 +0000237 zone: String, name of the zone, e.g. us-central1-b.
Kevin Chengb5963882018-05-09 00:06:27 -0700238 source_project: String, required if the image is located in a different
239 project.
240 disk_type: String, a value from PersistentDiskType, STANDARD
241 for regular hard disk or SSD for solid state disk.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700242 """
Kevin Chengb5963882018-05-09 00:06:27 -0700243 source_project = source_project or self._project
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700244 source_image = "projects/%s/global/images/%s" % (
Kevin Chengb5963882018-05-09 00:06:27 -0700245 source_project, source_image) if source_image else None
246 logger.info("Creating disk %s, size_gb: %d, source_image: %s",
247 disk_name, size_gb, str(source_image))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700248 body = {
249 "name": disk_name,
250 "sizeGb": size_gb,
herbertxue308f7662018-05-18 03:25:58 +0000251 "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone,
252 disk_type),
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700253 }
herbertxue308f7662018-05-18 03:25:58 +0000254 api = self.service.disks().insert(
255 project=self._project,
256 sourceImage=source_image,
257 zone=zone,
258 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700259 operation = self.Execute(api)
260 try:
herbertxue308f7662018-05-18 03:25:58 +0000261 self.WaitOnOperation(
262 operation=operation,
263 operation_scope=OperationScope.ZONE,
264 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700265 except errors.DriverError:
266 logger.error("Creating disk failed, cleaning up: %s", disk_name)
267 if self.CheckDiskExists(disk_name, zone):
268 self.DeleteDisk(disk_name, zone)
269 raise
270 logger.info("Disk %s has been created.", disk_name)
271
272 def DeleteDisk(self, disk_name, zone):
273 """Delete a gce disk.
274
275 Args:
276 disk_name: A string, name of disk.
277 zone: A string, name of zone.
278 """
279 logger.info("Deleting disk %s", disk_name)
herbertxue308f7662018-05-18 03:25:58 +0000280 api = self.service.disks().delete(
281 project=self._project, zone=zone, disk=disk_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700282 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000283 self.WaitOnOperation(
284 operation=operation,
285 operation_scope=OperationScope.ZONE,
286 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700287 logger.info("Deleted disk %s", disk_name)
288
289 def DeleteDisks(self, disk_names, zone):
290 """Delete multiple disks.
291
292 Args:
293 disk_names: A list of disk names.
294 zone: A string, name of zone.
295
296 Returns:
297 A tuple, (deleted, failed, error_msgs)
298 deleted: A list of names of disks that have been deleted.
299 failed: A list of names of disks that we fail to delete.
300 error_msgs: A list of failure messages.
301 """
302 if not disk_names:
303 logger.warn("Nothing to delete. Arg disk_names is not provided.")
304 return [], [], []
305 # Batch send deletion requests.
306 logger.info("Deleting disks: %s", disk_names)
307 delete_requests = {}
308 for disk_name in set(disk_names):
herbertxue308f7662018-05-18 03:25:58 +0000309 request = self.service.disks().delete(
310 project=self._project, disk=disk_name, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700311 delete_requests[disk_name] = request
herbertxue308f7662018-05-18 03:25:58 +0000312 return self._BatchExecuteAndWait(
313 delete_requests, OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700314
315 def ListDisks(self, zone, disk_filter=None):
316 """List disks.
317
318 Args:
319 zone: A string, representing zone name. e.g. "us-central1-f"
320 disk_filter: A string representing a filter in format of
321 FIELD_NAME COMPARISON_STRING LITERAL_STRING
322 e.g. "name ne example-instance"
323 e.g. "name eq "example-instance-[0-9]+""
324
325 Returns:
326 A list of disks.
327 """
herbertxue308f7662018-05-18 03:25:58 +0000328 return self.ListWithMultiPages(
329 api_resource=self.service.disks().list,
330 project=self._project,
331 zone=zone,
332 filter=disk_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700333
herbertxue308f7662018-05-18 03:25:58 +0000334 def CreateImage(self,
335 image_name,
336 source_uri=None,
337 source_disk=None,
Kevin Chengb5963882018-05-09 00:06:27 -0700338 labels=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700339 """Create a Gce image.
340
341 Args:
herbertxue308f7662018-05-18 03:25:58 +0000342 image_name: String, name of image
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700343 source_uri: Full Google Cloud Storage URL where the disk image is
Kevin Chengb5963882018-05-09 00:06:27 -0700344 stored. e.g. "https://storage.googleapis.com/my-bucket/
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700345 avd-system-2243663.tar.gz"
Kevin Chengb5963882018-05-09 00:06:27 -0700346 source_disk: String, this should be the disk's selfLink value
347 (including zone and project), rather than the disk_name
348 e.g. https://www.googleapis.com/compute/v1/projects/
349 google.com:android-builds-project/zones/
350 us-east1-d/disks/<disk_name>
351 labels: Dict, will be added to the image's labels.
352
Kevin Chengb5963882018-05-09 00:06:27 -0700353 Raises:
354 errors.DriverError: For malformed request or response.
355 errors.GceOperationTimeoutError: Operation takes too long to finish.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700356 """
herbertxue308f7662018-05-18 03:25:58 +0000357 if self.CheckImageExists(image_name):
358 return
359 if (source_uri and source_disk) or (not source_uri
360 and not source_disk):
Kevin Chengb5963882018-05-09 00:06:27 -0700361 raise errors.DriverError(
362 "Creating image %s requires either source_uri %s or "
herbertxue308f7662018-05-18 03:25:58 +0000363 "source_disk %s but not both" % (image_name, source_uri,
364 source_disk))
Kevin Chengb5963882018-05-09 00:06:27 -0700365 elif source_uri:
herbertxue308f7662018-05-18 03:25:58 +0000366 logger.info("Creating image %s, source_uri %s", image_name,
367 source_uri)
Kevin Chengb5963882018-05-09 00:06:27 -0700368 body = {
369 "name": image_name,
370 "rawDisk": {
371 "source": source_uri,
372 },
373 }
374 else:
herbertxue308f7662018-05-18 03:25:58 +0000375 logger.info("Creating image %s, source_disk %s", image_name,
376 source_disk)
Kevin Chengb5963882018-05-09 00:06:27 -0700377 body = {
378 "name": image_name,
379 "sourceDisk": source_disk,
380 }
381 if labels is not None:
382 body["labels"] = labels
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700383 api = self.service.images().insert(project=self._project, body=body)
384 operation = self.Execute(api)
385 try:
herbertxue308f7662018-05-18 03:25:58 +0000386 self.WaitOnOperation(
387 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700388 except errors.DriverError:
389 logger.error("Creating image failed, cleaning up: %s", image_name)
390 if self.CheckImageExists(image_name):
391 self.DeleteImage(image_name)
392 raise
393 logger.info("Image %s has been created.", image_name)
394
Kevin Chengb5963882018-05-09 00:06:27 -0700395 @utils.RetryOnException(_IsFingerPrintError,
396 _MAX_RETRIES_ON_FINGERPRINT_CONFLICT)
397 def SetImageLabels(self, image_name, new_labels):
398 """Update image's labels. Retry for finger print conflict.
399
400 Note: Decorator RetryOnException will retry the call for FingerPrint
401 conflict (HTTP error code 412). The fingerprint is used to detect
402 conflicts of GCE resource updates. The fingerprint is initially generated
403 by Compute Engine and changes after every request to modify or update
404 resources (e.g. GCE "image" resource has "fingerPrint" for "labels"
405 updates).
406
407 Args:
408 image_name: A string, the image name.
409 new_labels: Dict, will be added to the image's labels.
410
411 Returns:
412 A GlobalOperation resouce.
413 https://cloud.google.com/compute/docs/reference/latest/globalOperations
414 """
415 image = self.GetImage(image_name)
416 labels = image.get("labels", {})
417 labels.update(new_labels)
418 body = {
419 "labels": labels,
420 "labelFingerprint": image["labelFingerprint"]
421 }
herbertxue308f7662018-05-18 03:25:58 +0000422 api = self.service.images().setLabels(
423 project=self._project, resource=image_name, body=body)
Kevin Chengb5963882018-05-09 00:06:27 -0700424 return self.Execute(api)
425
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700426 def CheckImageExists(self, image_name):
427 """Check if image exists.
428
429 Args:
430 image_name: A string
431
432 Returns:
433 True if image exists, otherwise False.
434 """
435 try:
436 self.GetImage(image_name)
437 exists = True
438 except errors.ResourceNotFoundError:
439 exists = False
440 logger.debug("CheckImageExists: image_name: %s, result: %s",
441 image_name, exists)
442 return exists
443
Kevin Chengb5963882018-05-09 00:06:27 -0700444 def GetImage(self, image_name, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700445 """Get image information.
446
447 Args:
448 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -0700449 image_project: A string
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700450
451 Returns:
452 An image resource in json.
453 https://cloud.google.com/compute/docs/reference/latest/images#resource
454 """
herbertxue308f7662018-05-18 03:25:58 +0000455 api = self.service.images().get(
456 project=image_project or self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700457 return self.Execute(api)
458
459 def DeleteImage(self, image_name):
460 """Delete an image.
461
462 Args:
463 image_name: A string
464 """
465 logger.info("Deleting image %s", image_name)
herbertxue308f7662018-05-18 03:25:58 +0000466 api = self.service.images().delete(
467 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700468 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +0000469 self.WaitOnOperation(
470 operation=operation, operation_scope=OperationScope.GLOBAL)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700471 logger.info("Deleted image %s", image_name)
472
473 def DeleteImages(self, image_names):
474 """Delete multiple images.
475
476 Args:
477 image_names: A list of image names.
478
479 Returns:
480 A tuple, (deleted, failed, error_msgs)
481 deleted: A list of names of images that have been deleted.
482 failed: A list of names of images that we fail to delete.
483 error_msgs: A list of failure messages.
484 """
485 if not image_names:
486 return [], [], []
487 # Batch send deletion requests.
488 logger.info("Deleting images: %s", image_names)
489 delete_requests = {}
490 for image_name in set(image_names):
herbertxue308f7662018-05-18 03:25:58 +0000491 request = self.service.images().delete(
492 project=self._project, image=image_name)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700493 delete_requests[image_name] = request
494 return self._BatchExecuteAndWait(delete_requests,
495 OperationScope.GLOBAL)
496
Kevin Chengb5963882018-05-09 00:06:27 -0700497 def ListImages(self, image_filter=None, image_project=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700498 """List images.
499
500 Args:
501 image_filter: A string representing a filter in format of
502 FIELD_NAME COMPARISON_STRING LITERAL_STRING
503 e.g. "name ne example-image"
504 e.g. "name eq "example-image-[0-9]+""
Kevin Chengb5963882018-05-09 00:06:27 -0700505 image_project: String. If not provided, will list images from the default
506 project. Otherwise, will list images from the given
507 project, which can be any arbitrary project where the
508 account has read access
509 (i.e. has the role "roles/compute.imageUser")
510
511 Read more about image sharing across project:
512 https://cloud.google.com/compute/docs/images/sharing-images-across-projects
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700513
514 Returns:
515 A list of images.
516 """
herbertxue308f7662018-05-18 03:25:58 +0000517 return self.ListWithMultiPages(
518 api_resource=self.service.images().list,
519 project=image_project or self._project,
520 filter=image_filter)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700521
522 def GetInstance(self, instance, zone):
523 """Get information about an instance.
524
525 Args:
526 instance: A string, representing instance name.
527 zone: A string, representing zone name. e.g. "us-central1-f"
528
529 Returns:
530 An instance resource in json.
531 https://cloud.google.com/compute/docs/reference/latest/instances#resource
532 """
herbertxue308f7662018-05-18 03:25:58 +0000533 api = self.service.instances().get(
534 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700535 return self.Execute(api)
536
Kevin Chengb5963882018-05-09 00:06:27 -0700537 def AttachAccelerator(self, instance, zone, accelerator_count,
538 accelerator_type):
539 """Attach a GPU accelerator to the instance.
540
541 Note: In order for this to succeed the following must hold:
542 - The machine schedule must be set to "terminate" i.e:
543 SetScheduling(self, instance, zone, on_host_maintenance="terminate")
544 must have been called.
545 - The machine is not starting or running. i.e.
546 StopInstance(self, instance) must have been called.
547
548 Args:
549 instance: A string, representing instance name.
550 zone: String, name of zone.
551 accelerator_count: The number accelerators to be attached to the instance.
552 a value of 0 will detach all accelerators.
553 accelerator_type: The type of accelerator to attach. e.g.
554 "nvidia-tesla-k80"
555 """
556 body = {
557 "guestAccelerators": [{
herbertxue308f7662018-05-18 03:25:58 +0000558 "acceleratorType":
559 self.GetAcceleratorUrl(accelerator_type, zone),
560 "acceleratorCount":
561 accelerator_count
Kevin Chengb5963882018-05-09 00:06:27 -0700562 }]
563 }
564 api = self.service.instances().setMachineResources(
565 project=self._project, zone=zone, instance=instance, body=body)
566 operation = self.Execute(api)
567 try:
568 self.WaitOnOperation(
569 operation=operation,
570 operation_scope=OperationScope.ZONE,
571 scope_name=zone)
572 except errors.GceOperationTimeoutError:
573 logger.error("Attach instance failed: %s", instance)
574 raise
herbertxue308f7662018-05-18 03:25:58 +0000575 logger.info("%d x %s have been attached to instance %s.",
576 accelerator_count, accelerator_type, instance)
Kevin Chengb5963882018-05-09 00:06:27 -0700577
578 def AttachDisk(self, instance, zone, **kwargs):
579 """Attach the external disk to the instance.
580
581 Args:
582 instance: A string, representing instance name.
583 zone: String, name of zone.
584 **kwargs: The attachDisk request body. See "https://cloud.google.com/
585 compute/docs/reference/latest/instances/attachDisk" for detail.
586 {
587 "kind": "compute#attachedDisk",
588 "type": string,
589 "mode": string,
590 "source": string,
591 "deviceName": string,
592 "index": integer,
593 "boot": boolean,
594 "initializeParams": {
595 "diskName": string,
596 "sourceImage": string,
597 "diskSizeGb": long,
598 "diskType": string,
599 "sourceImageEncryptionKey": {
600 "rawKey": string,
601 "sha256": string
602 }
603 },
604 "autoDelete": boolean,
605 "licenses": [
606 string
607 ],
608 "interface": string,
609 "diskEncryptionKey": {
610 "rawKey": string,
611 "sha256": string
612 }
613 }
614
615 Returns:
616 An disk resource in json.
617 https://cloud.google.com/compute/docs/reference/latest/disks#resource
618
619
620 Raises:
621 errors.GceOperationTimeoutError: Operation takes too long to finish.
622 """
623 api = self.service.instances().attachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000624 project=self._project, zone=zone, instance=instance, body=kwargs)
Kevin Chengb5963882018-05-09 00:06:27 -0700625 operation = self.Execute(api)
626 try:
627 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000628 operation=operation,
629 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700630 scope_name=zone)
631 except errors.GceOperationTimeoutError:
632 logger.error("Attach instance failed: %s", instance)
633 raise
634 logger.info("Disk has been attached to instance %s.", instance)
635
636 def DetachDisk(self, instance, zone, disk_name):
637 """Attach the external disk to the instance.
638
639 Args:
640 instance: A string, representing instance name.
641 zone: String, name of zone.
642 disk_name: A string, the name of the detach disk.
643
644 Returns:
645 A ZoneOperation resource.
646 See https://cloud.google.com/compute/docs/reference/latest/zoneOperations
647
648 Raises:
649 errors.GceOperationTimeoutError: Operation takes too long to finish.
650 """
651 api = self.service.instances().detachDisk(
herbertxue308f7662018-05-18 03:25:58 +0000652 project=self._project,
653 zone=zone,
654 instance=instance,
Kevin Chengb5963882018-05-09 00:06:27 -0700655 deviceName=disk_name)
656 operation = self.Execute(api)
657 try:
658 self.WaitOnOperation(
herbertxue308f7662018-05-18 03:25:58 +0000659 operation=operation,
660 operation_scope=OperationScope.ZONE,
Kevin Chengb5963882018-05-09 00:06:27 -0700661 scope_name=zone)
662 except errors.GceOperationTimeoutError:
663 logger.error("Detach instance failed: %s", instance)
664 raise
665 logger.info("Disk has been detached to instance %s.", instance)
666
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700667 def StartInstance(self, instance, zone):
668 """Start |instance| in |zone|.
669
670 Args:
671 instance: A string, representing instance name.
672 zone: A string, representing zone name. e.g. "us-central1-f"
673
674 Raises:
675 errors.GceOperationTimeoutError: Operation takes too long to finish.
676 """
herbertxue308f7662018-05-18 03:25:58 +0000677 api = self.service.instances().start(
678 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700679 operation = self.Execute(api)
680 try:
herbertxue308f7662018-05-18 03:25:58 +0000681 self.WaitOnOperation(
682 operation=operation,
683 operation_scope=OperationScope.ZONE,
684 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700685 except errors.GceOperationTimeoutError:
686 logger.error("Start instance failed: %s", instance)
687 raise
688 logger.info("Instance %s has been started.", instance)
689
690 def StartInstances(self, instances, zone):
691 """Start |instances| in |zone|.
692
693 Args:
694 instances: A list of strings, representing instance names's list.
695 zone: A string, representing zone name. e.g. "us-central1-f"
696
697 Returns:
698 A tuple, (done, failed, error_msgs)
699 done: A list of string, representing the names of instances that
700 have been executed.
701 failed: A list of string, representing the names of instances that
702 we failed to execute.
703 error_msgs: A list of string, representing the failure messages.
704 """
herbertxue308f7662018-05-18 03:25:58 +0000705 action = functools.partial(
706 self.service.instances().start, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700707 return self._BatchExecuteOnInstances(instances, zone, action)
708
709 def StopInstance(self, instance, zone):
710 """Stop |instance| in |zone|.
711
712 Args:
713 instance: A string, representing instance name.
714 zone: A string, representing zone name. e.g. "us-central1-f"
715
716 Raises:
717 errors.GceOperationTimeoutError: Operation takes too long to finish.
718 """
herbertxue308f7662018-05-18 03:25:58 +0000719 api = self.service.instances().stop(
720 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700721 operation = self.Execute(api)
722 try:
herbertxue308f7662018-05-18 03:25:58 +0000723 self.WaitOnOperation(
724 operation=operation,
725 operation_scope=OperationScope.ZONE,
726 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700727 except errors.GceOperationTimeoutError:
728 logger.error("Stop instance failed: %s", instance)
729 raise
730 logger.info("Instance %s has been terminated.", instance)
731
732 def StopInstances(self, instances, zone):
733 """Stop |instances| in |zone|.
734
735 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700736 instances: A list of strings, representing instance names's list.
737 zone: A string, representing zone name. e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700738
739 Returns:
740 A tuple, (done, failed, error_msgs)
741 done: A list of string, representing the names of instances that
742 have been executed.
743 failed: A list of string, representing the names of instances that
744 we failed to execute.
745 error_msgs: A list of string, representing the failure messages.
746 """
herbertxue308f7662018-05-18 03:25:58 +0000747 action = functools.partial(
748 self.service.instances().stop, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700749 return self._BatchExecuteOnInstances(instances, zone, action)
750
751 def SetScheduling(self,
752 instance,
753 zone,
754 automatic_restart=True,
755 on_host_maintenance="MIGRATE"):
756 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
757
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700758 Args:
759 instance: A string, representing instance name.
760 zone: A string, representing zone name. e.g. "us-central1-f".
761 automatic_restart: Boolean, determine whether the instance will
762 automatically restart if it crashes or not,
763 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700764 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700765 The instance's maintenance behavior, which
766 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700767 "MIGRATE" or "TERMINATE" when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700768 a maintenance event.
769
770 Raises:
771 errors.GceOperationTimeoutError: Operation takes too long to finish.
772 """
herbertxue308f7662018-05-18 03:25:58 +0000773 body = {
774 "automaticRestart": automatic_restart,
775 "onHostMaintenance": on_host_maintenance
776 }
777 api = self.service.instances().setScheduling(
778 project=self._project, zone=zone, instance=instance, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700779 operation = self.Execute(api)
780 try:
herbertxue308f7662018-05-18 03:25:58 +0000781 self.WaitOnOperation(
782 operation=operation,
783 operation_scope=OperationScope.ZONE,
784 scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700785 except errors.GceOperationTimeoutError:
786 logger.error("Set instance scheduling failed: %s", instance)
787 raise
herbertxue308f7662018-05-18 03:25:58 +0000788 logger.info(
789 "Instance scheduling changed:\n"
790 " automaticRestart: %s\n"
791 " onHostMaintenance: %s\n",
792 str(automatic_restart).lower(), on_host_maintenance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700793
794 def ListInstances(self, zone, instance_filter=None):
795 """List instances.
796
797 Args:
798 zone: A string, representing zone name. e.g. "us-central1-f"
799 instance_filter: A string representing a filter in format of
800 FIELD_NAME COMPARISON_STRING LITERAL_STRING
801 e.g. "name ne example-instance"
802 e.g. "name eq "example-instance-[0-9]+""
803
804 Returns:
805 A list of instances.
806 """
807 return self.ListWithMultiPages(
808 api_resource=self.service.instances().list,
809 project=self._project,
810 zone=zone,
811 filter=instance_filter)
812
813 def SetSchedulingInstances(self,
814 instances,
815 zone,
816 automatic_restart=True,
817 on_host_maintenance="MIGRATE"):
818 """Update scheduling config |automatic_restart| and |on_host_maintenance|.
819
820 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option.
821
822 Args:
823 instances: A list of string, representing instance names.
824 zone: A string, representing zone name. e.g. "us-central1-f".
825 automatic_restart: Boolean, determine whether the instance will
826 automatically restart if it crashes or not,
827 default to True.
Kevin Chengb5963882018-05-09 00:06:27 -0700828 on_host_maintenance: enum["MIGRATE", "TERMINATE"]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700829 The instance's maintenance behavior, which
830 determines whether the instance is live
Kevin Chengb5963882018-05-09 00:06:27 -0700831 migrated or terminated when there is
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700832 a maintenance event.
833
834 Returns:
835 A tuple, (done, failed, error_msgs)
836 done: A list of string, representing the names of instances that
837 have been executed.
838 failed: A list of string, representing the names of instances that
839 we failed to execute.
840 error_msgs: A list of string, representing the failure messages.
841 """
herbertxue308f7662018-05-18 03:25:58 +0000842 body = {
843 "automaticRestart": automatic_restart,
844 "OnHostMaintenance": on_host_maintenance
845 }
846 action = functools.partial(
847 self.service.instances().setScheduling,
848 project=self._project,
849 zone=zone,
850 body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700851 return self._BatchExecuteOnInstances(instances, zone, action)
852
853 def _BatchExecuteOnInstances(self, instances, zone, action):
854 """Batch processing operations requiring computing time.
855
856 Args:
857 instances: A list of instance names.
858 zone: A string, e.g. "us-central1-f".
859 action: partial func, all kwargs for this gcloud action has been
860 defined in the caller function (e.g. See "StartInstances")
861 except 'instance' which will be defined by iterating the
862 |instances|.
863
864 Returns:
865 A tuple, (done, failed, error_msgs)
866 done: A list of string, representing the names of instances that
867 have been executed.
868 failed: A list of string, representing the names of instances that
869 we failed to execute.
870 error_msgs: A list of string, representing the failure messages.
871 """
872 if not instances:
873 return [], [], []
874 # Batch send requests.
875 logger.info("Batch executing instances: %s", instances)
876 requests = {}
877 for instance_name in set(instances):
878 requests[instance_name] = action(instance=instance_name)
herbertxue308f7662018-05-18 03:25:58 +0000879 return self._BatchExecuteAndWait(
880 requests, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700881
882 def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None):
883 """Batch processing requests and wait on the operation.
884
885 Args:
Kevin Chengb5963882018-05-09 00:06:27 -0700886 requests: A dictionary. The key is a string representing the resource
887 name. For example, an instance name, or an image name.
888 operation_scope: A value from OperationScope, "zone", "region",
889 or "global".
890 scope_name: If operation_scope is "zone" or "region", this should be
891 the name of the zone or region, e.g. "us-central1-f".
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700892 Returns:
Kevin Chengb5963882018-05-09 00:06:27 -0700893 A tuple, (done, failed, error_msgs)
894 done: A list of string, representing the resource names that have
895 been executed.
896 failed: A list of string, representing resource names that
897 we failed to execute.
898 error_msgs: A list of string, representing the failure messages.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700899 """
900 results = self.BatchExecute(requests)
901 # Initialize return values
902 failed = []
903 error_msgs = []
904 for resource_name, (_, error) in results.iteritems():
905 if error is not None:
906 failed.append(resource_name)
907 error_msgs.append(str(error))
908 done = []
909 # Wait for the executing operations to finish.
910 logger.info("Waiting for executing operations")
911 for resource_name in requests.iterkeys():
912 operation, _ = results[resource_name]
913 if operation:
914 try:
915 self.WaitOnOperation(operation, operation_scope,
916 scope_name)
917 done.append(resource_name)
918 except errors.DriverError as exc:
919 failed.append(resource_name)
920 error_msgs.append(str(exc))
921 return done, failed, error_msgs
922
923 def ListZones(self):
924 """List all zone instances in the project.
925
926 Returns:
927 Gcompute response instance. For example:
928 {
929 "id": "projects/google.com%3Aandroid-build-staging/zones",
930 "kind": "compute#zoneList",
931 "selfLink": "https://www.googleapis.com/compute/v1/projects/"
932 "google.com:android-build-staging/zones"
933 "items": [
934 {
935 'creationTimestamp': '2014-07-15T10:44:08.663-07:00',
936 'description': 'asia-east1-c',
937 'id': '2222',
938 'kind': 'compute#zone',
939 'name': 'asia-east1-c',
940 'region': 'https://www.googleapis.com/compute/v1/projects/'
941 'google.com:android-build-staging/regions/asia-east1',
942 'selfLink': 'https://www.googleapis.com/compute/v1/projects/'
943 'google.com:android-build-staging/zones/asia-east1-c',
944 'status': 'UP'
945 }, {
946 'creationTimestamp': '2014-05-30T18:35:16.575-07:00',
947 'description': 'asia-east1-b',
948 'id': '2221',
949 'kind': 'compute#zone',
950 'name': 'asia-east1-b',
951 'region': 'https://www.googleapis.com/compute/v1/projects/'
952 'google.com:android-build-staging/regions/asia-east1',
953 'selfLink': 'https://www.googleapis.com/compute/v1/projects'
954 '/google.com:android-build-staging/zones/asia-east1-b',
955 'status': 'UP'
956 }]
957 }
958 See cloud cluster's api/mixer_zones.proto
959 """
960 api = self.service.zones().list(project=self._project)
961 return self.Execute(api)
962
Kevin Chengb5963882018-05-09 00:06:27 -0700963 def ListRegions(self):
964 """List all the regions for a project.
965
966 Returns:
967 A dictionary containing all the zones and additional data. See this link
968 for the detailed response:
969 https://cloud.google.com/compute/docs/reference/latest/regions/list.
970 Example:
971 {
972 'items': [{
973 'name':
974 'us-central1',
975 'quotas': [{
976 'usage': 2.0,
977 'limit': 24.0,
978 'metric': 'CPUS'
979 }, {
980 'usage': 1.0,
981 'limit': 23.0,
982 'metric': 'IN_USE_ADDRESSES'
983 }, {
984 'usage': 209.0,
985 'limit': 10240.0,
986 'metric': 'DISKS_TOTAL_GB'
987 }, {
988 'usage': 1000.0,
989 'limit': 20000.0,
990 'metric': 'INSTANCES'
991 }]
992 },..]
993 }
994 """
995 api = self.service.regions().list(project=self._project)
996 return self.Execute(api)
997
Keun Soo Yimb293fdb2016-09-21 16:03:44 -0700998 def _GetNetworkArgs(self, network):
999 """Helper to generate network args that is used to create an instance.
1000
1001 Args:
1002 network: A string, e.g. "default".
1003
1004 Returns:
1005 A dictionary representing network args.
1006 """
1007 return {
1008 "network": self.GetNetworkUrl(network),
herbertxue308f7662018-05-18 03:25:58 +00001009 "accessConfigs": [{
1010 "name": "External NAT",
1011 "type": "ONE_TO_ONE_NAT"
1012 }]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001013 }
1014
herbertxue308f7662018-05-18 03:25:58 +00001015 def _GetDiskArgs(self,
1016 disk_name,
1017 image_name,
1018 image_project=None,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001019 disk_size_gb=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001020 """Helper to generate disk args that is used to create an instance.
1021
1022 Args:
1023 disk_name: A string
1024 image_name: A string
Kevin Chengb5963882018-05-09 00:06:27 -07001025 image_project: A string
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001026 disk_size_gb: An integer
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001027
1028 Returns:
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001029 List holding dict of disk args.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001030 """
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001031 args = copy.deepcopy(BASE_DISK_ARGS)
1032 args["initializeParams"] = {
1033 "diskName": disk_name,
herbertxue308f7662018-05-18 03:25:58 +00001034 "sourceImage": self.GetImage(image_name,
1035 image_project)["selfLink"],
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001036 }
1037 # TODO: Remove this check once it's validated that we can either pass in
1038 # a None diskSizeGb or we find an appropriate default val.
1039 if disk_size_gb:
1040 args["diskSizeGb"] = disk_size_gb
1041 return [args]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001042
herbertxue308f7662018-05-18 03:25:58 +00001043 def _GetExtraDiskArgs(self, extra_disk_name, zone):
1044 """Get extra disk arg for given disk.
1045
1046 Args:
1047 extra_disk_name: String, name of the disk.
1048 zone: String, representing zone name, e.g. "us-central1-f"
1049
1050 Returns:
1051 A dictionary of disk args.
1052 """
1053 return [{
1054 "type": "PERSISTENT",
1055 "mode": "READ_WRITE",
1056 "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone,
1057 extra_disk_name),
1058 "autoDelete": True,
1059 "boot": False,
1060 "interface": "SCSI",
1061 "deviceName": extra_disk_name,
1062 }]
1063
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001064 # pylint: disable=too-many-locals
herbertxue308f7662018-05-18 03:25:58 +00001065 def CreateInstance(self,
1066 instance,
1067 image_name,
1068 machine_type,
1069 metadata,
1070 network,
1071 zone,
1072 disk_args=None,
1073 image_project=None,
1074 gpu=None,
1075 extra_disk_name=None):
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001076 """Create a gce instance with a gce image.
1077
1078 Args:
herbertxue308f7662018-05-18 03:25:58 +00001079 instance: String, instance name.
1080 image_name: String, source image used to create this disk.
1081 machine_type: String, representing machine_type,
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001082 e.g. "n1-standard-1"
herbertxue308f7662018-05-18 03:25:58 +00001083 metadata: Dict, maps a metadata name to its value.
1084 network: String, representing network name, e.g. "default"
1085 zone: String, representing zone name, e.g. "us-central1-f"
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001086 disk_args: A list of extra disk args (strings), see _GetDiskArgs
1087 for example, if None, will create a disk using the given
1088 image.
herbertxue308f7662018-05-18 03:25:58 +00001089 image_project: String, name of the project where the image
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001090 belongs. Assume the default project if None.
herbertxue308f7662018-05-18 03:25:58 +00001091 gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001092 None no gpus will be attached. For more details see:
Kevin Chengb5963882018-05-09 00:06:27 -07001093 https://cloud.google.com/compute/docs/gpus/add-gpus
herbertxue308f7662018-05-18 03:25:58 +00001094 extra_disk_name: String,the name of the extra disk to attach.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001095 """
herbertxue308f7662018-05-18 03:25:58 +00001096 disk_args = (disk_args
1097 or self._GetDiskArgs(instance, image_name, image_project))
1098 if extra_disk_name:
1099 disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone))
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001100 body = {
1101 "machineType": self.GetMachineType(machine_type, zone)["selfLink"],
1102 "name": instance,
1103 "networkInterfaces": [self._GetNetworkArgs(network)],
1104 "disks": disk_args,
herbertxue308f7662018-05-18 03:25:58 +00001105 "serviceAccounts": [{
1106 "email": "default",
1107 "scopes": self.DEFAULT_INSTANCE_SCOPE
1108 }],
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001109 }
1110
Kevin Chengb5963882018-05-09 00:06:27 -07001111 if gpu:
1112 body["guestAccelerators"] = [{
1113 "acceleratorType": self.GetAcceleratorUrl(gpu, zone),
1114 "acceleratorCount": 1
1115 }]
1116 # Instances with GPUs cannot live migrate because they are assigned
1117 # to specific hardware devices.
1118 body["scheduling"] = {"onHostMaintenance": "terminate"}
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001119 if metadata:
herbertxue308f7662018-05-18 03:25:58 +00001120 metadata_list = [{
1121 "key": key,
1122 "value": val
1123 } for key, val in metadata.iteritems()]
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001124 body["metadata"] = {"items": metadata_list}
1125 logger.info("Creating instance: project %s, zone %s, body:%s",
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001126 self._project, zone, body)
herbertxue308f7662018-05-18 03:25:58 +00001127 api = self.service.instances().insert(
1128 project=self._project, zone=zone, body=body)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001129 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001130 self.WaitOnOperation(
1131 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001132 logger.info("Instance %s has been created.", instance)
1133
1134 def DeleteInstance(self, instance, zone):
1135 """Delete a gce instance.
1136
1137 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001138 instance: A string, instance name.
1139 zone: A string, e.g. "us-central1-f"
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001140 """
1141 logger.info("Deleting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001142 api = self.service.instances().delete(
1143 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001144 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001145 self.WaitOnOperation(
1146 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001147 logger.info("Deleted instance: %s", instance)
1148
1149 def DeleteInstances(self, instances, zone):
1150 """Delete multiple instances.
1151
1152 Args:
1153 instances: A list of instance names.
1154 zone: A string, e.g. "us-central1-f".
1155
1156 Returns:
1157 A tuple, (deleted, failed, error_msgs)
1158 deleted: A list of names of instances that have been deleted.
1159 failed: A list of names of instances that we fail to delete.
1160 error_msgs: A list of failure messages.
1161 """
herbertxue308f7662018-05-18 03:25:58 +00001162 action = functools.partial(
1163 self.service.instances().delete, project=self._project, zone=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001164 return self._BatchExecuteOnInstances(instances, zone, action)
1165
1166 def ResetInstance(self, instance, zone):
1167 """Reset the gce instance.
1168
1169 Args:
1170 instance: A string, instance name.
1171 zone: A string, e.g. "us-central1-f".
1172 """
1173 logger.info("Resetting instance: %s", instance)
herbertxue308f7662018-05-18 03:25:58 +00001174 api = self.service.instances().reset(
1175 project=self._project, zone=zone, instance=instance)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001176 operation = self.Execute(api)
herbertxue308f7662018-05-18 03:25:58 +00001177 self.WaitOnOperation(
1178 operation, operation_scope=OperationScope.ZONE, scope_name=zone)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001179 logger.info("Instance has been reset: %s", instance)
1180
1181 def GetMachineType(self, machine_type, zone):
1182 """Get URL for a given machine typle.
1183
1184 Args:
1185 machine_type: A string, name of the machine type.
1186 zone: A string, e.g. "us-central1-f"
1187
1188 Returns:
1189 A machine type resource in json.
1190 https://cloud.google.com/compute/docs/reference/latest/
1191 machineTypes#resource
1192 """
herbertxue308f7662018-05-18 03:25:58 +00001193 api = self.service.machineTypes().get(
1194 project=self._project, zone=zone, machineType=machine_type)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001195 return self.Execute(api)
1196
Kevin Chengb5963882018-05-09 00:06:27 -07001197 def GetAcceleratorUrl(self, accelerator_type, zone):
1198 """Get URL for a given type of accelator.
1199
1200 Args:
1201 accelerator_type: A string, representing the accelerator, e.g
1202 "nvidia-tesla-k80"
1203 zone: A string representing a zone, e.g. "us-west1-b"
1204
1205 Returns:
1206 A URL that points to the accelerator resource, e.g.
1207 https://www.googleapis.com/compute/v1/projects/<project id>/zones/
1208 us-west1-b/acceleratorTypes/nvidia-tesla-k80
1209 """
herbertxue308f7662018-05-18 03:25:58 +00001210 api = self.service.acceleratorTypes().get(
1211 project=self._project, zone=zone, acceleratorType=accelerator_type)
Kevin Chengb5963882018-05-09 00:06:27 -07001212 result = self.Execute(api)
1213 return result["selfLink"]
1214
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001215 def GetNetworkUrl(self, network):
1216 """Get URL for a given network.
1217
1218 Args:
1219 network: A string, representing network name, e.g "default"
1220
1221 Returns:
1222 A URL that points to the network resource, e.g.
1223 https://www.googleapis.com/compute/v1/projects/<project id>/
1224 global/networks/default
1225 """
herbertxue308f7662018-05-18 03:25:58 +00001226 api = self.service.networks().get(
1227 project=self._project, network=network)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001228 result = self.Execute(api)
1229 return result["selfLink"]
1230
1231 def CompareMachineSize(self, machine_type_1, machine_type_2, zone):
1232 """Compare the size of two machine types.
1233
1234 Args:
1235 machine_type_1: A string representing a machine type, e.g. n1-standard-1
1236 machine_type_2: A string representing a machine type, e.g. n1-standard-1
1237 zone: A string representing a zone, e.g. "us-central1-f"
1238
1239 Returns:
1240 1 if size of the first type is greater than the second type.
1241 2 if size of the first type is smaller than the second type.
1242 0 if they are equal.
1243
1244 Raises:
1245 errors.DriverError: For malformed response.
1246 """
1247 machine_info_1 = self.GetMachineType(machine_type_1, zone)
1248 machine_info_2 = self.GetMachineType(machine_type_2, zone)
1249 for metric in self.MACHINE_SIZE_METRICS:
1250 if metric not in machine_info_1 or metric not in machine_info_2:
1251 raise errors.DriverError(
1252 "Malformed machine size record: Can't find '%s' in %s or %s"
1253 % (metric, machine_info_1, machine_info_2))
1254 if machine_info_1[metric] - machine_info_2[metric] > 0:
1255 return 1
1256 elif machine_info_1[metric] - machine_info_2[metric] < 0:
1257 return -1
1258 return 0
1259
1260 def GetSerialPortOutput(self, instance, zone, port=1):
1261 """Get serial port output.
1262
1263 Args:
1264 instance: string, instance name.
1265 zone: string, zone name.
1266 port: int, which COM port to read from, 1-4, default to 1.
1267
1268 Returns:
1269 String, contents of the output.
1270
1271 Raises:
1272 errors.DriverError: For malformed response.
1273 """
1274 api = self.service.instances().getSerialPortOutput(
herbertxue308f7662018-05-18 03:25:58 +00001275 project=self._project, zone=zone, instance=instance, port=port)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001276 result = self.Execute(api)
1277 if "contents" not in result:
1278 raise errors.DriverError(
1279 "Malformed response for GetSerialPortOutput: %s" % result)
1280 return result["contents"]
1281
1282 def GetInstanceNamesByIPs(self, ips, zone):
1283 """Get Instance names by IPs.
1284
1285 This function will go through all instances, which
1286 could be slow if there are too many instances. However, currently
1287 GCE doesn't support search for instance by IP.
1288
1289 Args:
1290 ips: A set of IPs.
1291 zone: String, name of the zone.
1292
1293 Returns:
1294 A dictionary where key is IP and value is instance name or None
1295 if instance is not found for the given IP.
1296 """
1297 ip_name_map = dict.fromkeys(ips)
1298 for instance in self.ListInstances(zone):
1299 try:
1300 ip = instance["networkInterfaces"][0]["accessConfigs"][0][
1301 "natIP"]
1302 if ip in ips:
1303 ip_name_map[ip] = instance["name"]
1304 except (IndexError, KeyError) as e:
1305 logger.error("Could not get instance names by ips: %s", str(e))
1306 return ip_name_map
1307
1308 def GetInstanceIP(self, instance, zone):
1309 """Get Instance IP given instance name.
1310
1311 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001312 instance: String, representing instance name.
1313 zone: String, name of the zone.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001314
1315 Returns:
Kevin Chengb5963882018-05-09 00:06:27 -07001316 string, IP of the instance.
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001317 """
1318 # TODO(fdeng): This is for accessing external IP.
1319 # We should handle internal IP as well when the script is running
1320 # on a GCE instance in the same network of |instance|.
1321 instance = self.GetInstance(instance, zone)
1322 return instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"]
1323
1324 def SetCommonInstanceMetadata(self, body):
1325 """Set project-wide metadata.
1326
1327 Args:
Kevin Chengb5963882018-05-09 00:06:27 -07001328 body: Metadata body.
1329 metdata is in the following format.
1330 {
1331 "kind": "compute#metadata",
1332 "fingerprint": "a-23icsyx4E=",
1333 "items": [
1334 {
1335 "key": "google-compute-default-region",
1336 "value": "us-central1"
1337 }, ...
1338 ]
1339 }
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001340 """
1341 api = self.service.projects().setCommonInstanceMetadata(
1342 project=self._project, body=body)
1343 operation = self.Execute(api)
1344 self.WaitOnOperation(operation, operation_scope=OperationScope.GLOBAL)
1345
1346 def AddSshRsa(self, user, ssh_rsa_path):
1347 """Add the public rsa key to the project's metadata.
1348
1349 Compute engine instances that are created after will
1350 by default contain the key.
1351
1352 Args:
1353 user: the name of the user which the key belongs to.
1354 ssh_rsa_path: The absolute path to public rsa key.
1355 """
1356 if not os.path.exists(ssh_rsa_path):
herbertxue308f7662018-05-18 03:25:58 +00001357 raise errors.DriverError(
1358 "RSA file %s does not exist." % ssh_rsa_path)
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001359
1360 logger.info("Adding ssh rsa key from %s to project %s for user: %s",
1361 ssh_rsa_path, self._project, user)
1362 project = self.GetProject()
1363 with open(ssh_rsa_path) as f:
1364 rsa = f.read()
1365 rsa = rsa.strip() if rsa else rsa
1366 utils.VerifyRsaPubKey(rsa)
1367 metadata = project["commonInstanceMetadata"]
1368 for item in metadata.setdefault("items", []):
1369 if item["key"] == "sshKeys":
1370 sshkey_item = item
1371 break
1372 else:
1373 sshkey_item = {"key": "sshKeys", "value": ""}
1374 metadata["items"].append(sshkey_item)
1375
1376 entry = "%s:%s" % (user, rsa)
1377 logger.debug("New RSA entry: %s", entry)
Kevin Cheng5c124ec2018-05-16 13:28:51 -07001378 sshkey_item["value"] = "\n".join([sshkey_item["value"].strip(),
1379 entry]).strip()
Keun Soo Yimb293fdb2016-09-21 16:03:44 -07001380 self.SetCommonInstanceMetadata(metadata)
Fang Dengcef4b112017-03-02 11:20:17 -08001381
1382 def CheckAccess(self):
1383 """Check if the user has read access to the cloud project.
1384
1385 Returns:
1386 True if the user has at least read access to the project.
1387 False otherwise.
1388
1389 Raises:
1390 errors.HttpError if other unexpected error happens when
1391 accessing the project.
1392 """
1393 api = self.service.zones().list(project=self._project)
1394 retry_http_codes = copy.copy(self.RETRY_HTTP_CODES)
1395 retry_http_codes.remove(self.ACCESS_DENIED_CODE)
1396 try:
1397 self.Execute(api, retry_http_codes=retry_http_codes)
1398 except errors.HttpError as e:
1399 if e.code == self.ACCESS_DENIED_CODE:
1400 return False
1401 raise
1402 return True