blob: 834c7a61e8d3166a17fc625f70941b1523ad3c4d [file] [log] [blame]
lmr6f669ce2009-05-31 19:02:42 +00001import md5, thread, subprocess, time, string, random, socket, os, signal, pty
lmree90dd92009-08-13 04:13:39 +00002import select, re, logging, commands
lmr3f0b0cc2009-06-10 02:25:23 +00003from autotest_lib.client.bin import utils
4from autotest_lib.client.common_lib import error
lmrdc3a5b12009-07-23 01:40:40 +00005import kvm_subprocess
lmr6f669ce2009-05-31 19:02:42 +00006
7"""
8KVM test utility functions.
9
10@copyright: 2008-2009 Red Hat Inc.
11"""
12
13
14def get_sub_dict(dict, name):
15 """
16 Return a "sub-dict" corresponding to a specific object.
17
18 Operate on a copy of dict: for each key that ends with the suffix
19 "_" + name, strip the suffix from the key, and set the value of
20 the stripped key to that of the key. Return the resulting dict.
21
22 @param name: Suffix of the key we want to set the value.
23 """
24 suffix = "_" + name
25 new_dict = dict.copy()
26 for key in dict.keys():
27 if key.endswith(suffix):
28 new_key = key.split(suffix)[0]
29 new_dict[new_key] = dict[key]
30 return new_dict
31
32
33def get_sub_dict_names(dict, keyword):
34 """
35 Return a list of "sub-dict" names that may be extracted with get_sub_dict.
36
37 This function may be modified to change the behavior of all functions that
38 deal with multiple objects defined in dicts (e.g. VMs, images, NICs).
39
40 @param keyword: A key in dict (e.g. "vms", "images", "nics").
41 """
42 names = dict.get(keyword)
43 if names:
44 return names.split()
45 else:
46 return []
47
48
lmrac5089b2009-08-13 04:05:47 +000049# Functions related to MAC/IP addresses
50
51def mac_str_to_int(addr):
52 """
53 Convert MAC address string to integer.
54
55 @param addr: String representing the MAC address.
56 """
57 return sum(int(s, 16) * 256 ** i
58 for i, s in enumerate(reversed(addr.split(":"))))
59
60
61def mac_int_to_str(addr):
62 """
63 Convert MAC address integer to string.
64
65 @param addr: Integer representing the MAC address.
66 """
67 return ":".join("%02x" % (addr >> 8 * i & 0xFF)
68 for i in reversed(range(6)))
69
70
71def ip_str_to_int(addr):
72 """
73 Convert IP address string to integer.
74
75 @param addr: String representing the IP address.
76 """
77 return sum(int(s) * 256 ** i
78 for i, s in enumerate(reversed(addr.split("."))))
79
80
81def ip_int_to_str(addr):
82 """
83 Convert IP address integer to string.
84
85 @param addr: Integer representing the IP address.
86 """
87 return ".".join(str(addr >> 8 * i & 0xFF)
88 for i in reversed(range(4)))
89
90
91def offset_mac(base, offset):
92 """
93 Add offset to a given MAC address.
94
95 @param base: String representing a MAC address.
96 @param offset: Offset to add to base (integer)
97 @return: A string representing the offset MAC address.
98 """
99 return mac_int_to_str(mac_str_to_int(base) + offset)
100
101
102def offset_ip(base, offset):
103 """
104 Add offset to a given IP address.
105
106 @param base: String representing an IP address.
107 @param offset: Offset to add to base (integer)
108 @return: A string representing the offset IP address.
109 """
110 return ip_int_to_str(ip_str_to_int(base) + offset)
111
112
113def get_mac_ip_pair_from_dict(dict):
114 """
115 Fetch a MAC-IP address pair from dict and return it.
116
117 The parameters in dict are expected to conform to a certain syntax.
118 Typical usage may be:
119
120 address_ranges = r1 r2 r3
121
122 address_range_base_mac_r1 = 55:44:33:22:11:00
123 address_range_base_ip_r1 = 10.0.0.0
124 address_range_size_r1 = 16
125
126 address_range_base_mac_r2 = 55:44:33:22:11:40
127 address_range_base_ip_r2 = 10.0.0.60
128 address_range_size_r2 = 25
129
130 address_range_base_mac_r3 = 55:44:33:22:12:10
131 address_range_base_ip_r3 = 10.0.1.20
132 address_range_size_r3 = 230
133
134 address_index = 0
135
136 All parameters except address_index specify a MAC-IP address pool. The
137 pool consists of several MAC-IP address ranges.
138 address_index specified the index of the desired MAC-IP pair from the pool.
139
140 @param dict: The dictionary from which to fetch the addresses.
141 """
142 index = int(dict.get("address_index", 0))
143 for mac_range_name in get_sub_dict_names(dict, "address_ranges"):
144 mac_range_params = get_sub_dict(dict, mac_range_name)
145 mac_base = mac_range_params.get("address_range_base_mac")
146 ip_base = mac_range_params.get("address_range_base_ip")
147 size = int(mac_range_params.get("address_range_size", 1))
148 if index < size:
149 return (mac_base and offset_mac(mac_base, index),
150 ip_base and offset_ip(ip_base, index))
151 index -= size
152 return (None, None)
153
154
lmree90dd92009-08-13 04:13:39 +0000155def verify_ip_address_ownership(ip, macs, timeout=3.0):
156 """
157 Connect to a given IP address and make sure its MAC address equals one of
158 the given MAC address.
159
160 @param ip: An IP address.
161 @param macs: A list or tuple of MAC addresses.
162 @return: True iff ip is assigned to a MAC address in macs.
163 """
164 def check_arp_cache(regex):
165 o = commands.getoutput("/sbin/arp -n")
166 return bool(re.search(regex, o, re.IGNORECASE))
167
168 mac_regex = "|".join("(%s)" % mac for mac in macs)
169 regex = re.compile(r"\b%s\b.*\b(%s)\b" % (ip, mac_regex))
170
171 if check_arp_cache(regex):
172 return True
173
174 s = socket.socket()
175 s.setblocking(False)
176 try:
177 s.connect((ip, 55555))
178 except socket.error:
179 pass
180 end_time = time.time() + timeout
181 while time.time() < end_time:
182 time.sleep(0.2)
183 if check_arp_cache(regex):
184 s.close()
185 return True
186 s.close()
187 return False
188
189
lmr6f669ce2009-05-31 19:02:42 +0000190# Functions for working with the environment (a dict-like object)
191
192def is_vm(obj):
193 """
194 Tests whether a given object is a VM object.
195
196 @param obj: Python object (pretty much everything on python).
197 """
198 return obj.__class__.__name__ == "VM"
199
200
201def env_get_all_vms(env):
202 """
203 Return a list of all VM objects on a given environment.
204
205 @param env: Dictionary with environment items.
206 """
207 vms = []
208 for obj in env.values():
209 if is_vm(obj):
210 vms.append(obj)
211 return vms
212
213
214def env_get_vm(env, name):
215 """
216 Return a VM object by its name.
217
218 @param name: VM name.
219 """
220 return env.get("vm__%s" % name)
221
222
223def env_register_vm(env, name, vm):
224 """
225 Register a given VM in a given env.
226
227 @param env: Environment where we will register the VM.
228 @param name: VM name.
229 @param vm: VM object.
230 """
231 env["vm__%s" % name] = vm
232
233
234def env_unregister_vm(env, name):
235 """
236 Remove a given VM from a given env.
237
238 @param env: Environment where we will un-register the VM.
239 @param name: VM name.
240 """
241 del env["vm__%s" % name]
242
243
244# Utility functions for dealing with external processes
245
246def pid_exists(pid):
247 """
248 Return True if a given PID exists.
249
250 @param pid: Process ID number.
251 """
252 try:
253 os.kill(pid, 0)
254 return True
255 except:
256 return False
257
258
259def safe_kill(pid, signal):
260 """
261 Attempt to send a signal to a given process that may or may not exist.
262
263 @param signal: Signal number.
264 """
265 try:
266 os.kill(pid, signal)
267 return True
268 except:
269 return False
270
271
lmr3f0b0cc2009-06-10 02:25:23 +0000272def get_latest_kvm_release_tag(release_dir):
273 """
274 Fetches the latest release tag for KVM.
275
276 @param release_dir: KVM source forge download location.
277 """
278 try:
279 page_url = os.path.join(release_dir, "showfiles.php")
280 local_web_page = utils.unmap_url("/", page_url, "/tmp")
281 f = open(local_web_page, "r")
282 data = f.read()
283 f.close()
lmr8ea274b2009-07-06 13:42:35 +0000284 rx = re.compile("kvm-(\d+).tar.gz", re.IGNORECASE)
lmr3f0b0cc2009-06-10 02:25:23 +0000285 matches = rx.findall(data)
lmr32525382009-08-10 13:53:37 +0000286 # In all regexp matches to something that looks like a release tag,
287 # get the largest integer. That will be our latest release tag.
288 latest_tag = max(int(x) for x in matches)
289 return str(latest_tag)
lmr3f0b0cc2009-06-10 02:25:23 +0000290 except Exception, e:
291 message = "Could not fetch latest KVM release tag: %s" % str(e)
292 logging.error(message)
293 raise error.TestError(message)
294
295
296def get_git_branch(repository, branch, srcdir, commit=None, lbranch=None):
297 """
298 Retrieves a given git code repository.
299
300 @param repository: Git repository URL
301 """
302 logging.info("Fetching git [REP '%s' BRANCH '%s' TAG '%s'] -> %s",
303 repository, branch, commit, srcdir)
304 if not os.path.exists(srcdir):
305 os.makedirs(srcdir)
306 os.chdir(srcdir)
307
308 if os.path.exists(".git"):
309 utils.system("git reset --hard")
310 else:
311 utils.system("git init")
312
313 if not lbranch:
314 lbranch = branch
315
316 utils.system("git fetch -q -f -u -t %s %s:%s" %
317 (repository, branch, lbranch))
318 utils.system("git checkout %s" % lbranch)
319 if commit:
320 utils.system("git checkout %s" % commit)
321
322 h = utils.system_output('git log --pretty=format:"%H" -1')
323 desc = utils.system_output("git describe")
324 logging.info("Commit hash for %s is %s (%s)" % (repository, h.strip(),
325 desc))
326 return srcdir
327
328
329def unload_module(module_name):
330 """
331 Removes a module. Handles dependencies. If even then it's not possible
332 to remove one of the modules, it will trhow an error.CmdError exception.
333
334 @param module_name: Name of the module we want to remove.
335 """
336 l_raw = utils.system_output("/sbin/lsmod").splitlines()
337 lsmod = [x for x in l_raw if x.split()[0] == module_name]
338 if len(lsmod) > 0:
339 line_parts = lsmod[0].split()
340 if len(line_parts) == 4:
341 submodules = line_parts[3].split(",")
342 for submodule in submodules:
343 unload_module(submodule)
344 utils.system("/sbin/modprobe -r %s" % module_name)
345 logging.info("Module %s unloaded" % module_name)
346 else:
347 logging.info("Module %s is already unloaded" % module_name)
348
349
350def check_kvm_source_dir(source_dir):
351 """
352 Inspects the kvm source directory and verifies its disposition. In some
353 occasions build may be dependant on the source directory disposition.
354 The reason why the return codes are numbers is that we might have more
355 changes on the source directory layout, so it's not scalable to just use
356 strings like 'old_repo', 'new_repo' and such.
357
358 @param source_dir: Source code path that will be inspected.
359 """
360 os.chdir(source_dir)
361 has_qemu_dir = os.path.isdir('qemu')
362 has_kvm_dir = os.path.isdir('kvm')
363 if has_qemu_dir and not has_kvm_dir:
364 logging.debug("qemu directory detected, source dir layout 1")
365 return 1
366 if has_kvm_dir and not has_qemu_dir:
367 logging.debug("kvm directory detected, source dir layout 2")
368 return 2
369 else:
370 raise error.TestError("Unknown source dir layout, cannot proceed.")
371
372
lmrf9349c32009-07-23 01:44:24 +0000373# The following are functions used for SSH, SCP and Telnet communication with
374# guests.
lmr6f669ce2009-05-31 19:02:42 +0000375
376def remote_login(command, password, prompt, linesep="\n", timeout=10):
377 """
378 Log into a remote host (guest) using SSH or Telnet. Run the given command
379 using kvm_spawn and provide answers to the questions asked. If timeout
380 expires while waiting for output from the child (e.g. a password prompt
381 or a shell prompt) -- fail.
382
383 @brief: Log into a remote host (guest) using SSH or Telnet.
384
385 @param command: The command to execute (e.g. "ssh root@localhost")
386 @param password: The password to send in reply to a password prompt
387 @param prompt: The shell prompt that indicates a successful login
388 @param linesep: The line separator to send instead of "\\n"
389 (sometimes "\\r\\n" is required)
390 @param timeout: The maximal time duration (in seconds) to wait for each
391 step of the login procedure (i.e. the "Are you sure" prompt, the
392 password prompt, the shell prompt, etc)
393
394 @return Return the kvm_spawn object on success and None on failure.
395 """
lmrdc3a5b12009-07-23 01:40:40 +0000396 sub = kvm_subprocess.kvm_shell_session(command,
397 linesep=linesep,
398 prompt=prompt)
lmr6f669ce2009-05-31 19:02:42 +0000399
400 password_prompt_count = 0
401
lmr8691f422009-07-28 02:52:30 +0000402 logging.debug("Trying to login with command '%s'" % command)
lmr6f669ce2009-05-31 19:02:42 +0000403
404 while True:
405 (match, text) = sub.read_until_last_line_matches(
lmr3ca79fe2009-06-10 19:24:26 +0000406 [r"[Aa]re you sure", r"[Pp]assword:\s*$", r"^\s*[Ll]ogin:\s*$",
407 r"[Cc]onnection.*closed", r"[Cc]onnection.*refused", prompt],
lmr6f669ce2009-05-31 19:02:42 +0000408 timeout=timeout, internal_timeout=0.5)
409 if match == 0: # "Are you sure you want to continue connecting"
410 logging.debug("Got 'Are you sure...'; sending 'yes'")
411 sub.sendline("yes")
412 continue
413 elif match == 1: # "password:"
414 if password_prompt_count == 0:
415 logging.debug("Got password prompt; sending '%s'" % password)
416 sub.sendline(password)
417 password_prompt_count += 1
418 continue
419 else:
420 logging.debug("Got password prompt again")
421 sub.close()
422 return None
423 elif match == 2: # "login:"
424 logging.debug("Got unexpected login prompt")
425 sub.close()
426 return None
427 elif match == 3: # "Connection closed"
428 logging.debug("Got 'Connection closed'")
429 sub.close()
430 return None
lmr3ca79fe2009-06-10 19:24:26 +0000431 elif match == 4: # "Connection refused"
lmr0d2ed1f2009-07-01 03:23:18 +0000432 logging.debug("Got 'Connection refused'")
lmr3ca79fe2009-06-10 19:24:26 +0000433 sub.close()
434 return None
435 elif match == 5: # prompt
lmr6f669ce2009-05-31 19:02:42 +0000436 logging.debug("Got shell prompt -- logged in")
437 return sub
438 else: # match == None
lmr3ca79fe2009-06-10 19:24:26 +0000439 logging.debug("Timeout elapsed or process terminated")
lmr6f669ce2009-05-31 19:02:42 +0000440 sub.close()
441 return None
442
443
444def remote_scp(command, password, timeout=300, login_timeout=10):
445 """
446 Run the given command using kvm_spawn and provide answers to the questions
447 asked. If timeout expires while waiting for the transfer to complete ,
448 fail. If login_timeout expires while waiting for output from the child
449 (e.g. a password prompt), fail.
450
451 @brief: Transfer files using SCP, given a command line.
452
453 @param command: The command to execute
454 (e.g. "scp -r foobar root@localhost:/tmp/").
455 @param password: The password to send in reply to a password prompt.
456 @param timeout: The time duration (in seconds) to wait for the transfer
457 to complete.
458 @param login_timeout: The maximal time duration (in seconds) to wait for
459 each step of the login procedure (i.e. the "Are you sure" prompt or the
460 password prompt)
461
462 @return: True if the transfer succeeds and False on failure.
463 """
lmrdc3a5b12009-07-23 01:40:40 +0000464 sub = kvm_subprocess.kvm_expect(command)
lmr6f669ce2009-05-31 19:02:42 +0000465
466 password_prompt_count = 0
467 _timeout = login_timeout
468
469 logging.debug("Trying to login...")
470
471 while True:
472 (match, text) = sub.read_until_last_line_matches(
lmr3ca79fe2009-06-10 19:24:26 +0000473 [r"[Aa]re you sure", r"[Pp]assword:\s*$", r"lost connection"],
lmr6f669ce2009-05-31 19:02:42 +0000474 timeout=_timeout, internal_timeout=0.5)
475 if match == 0: # "Are you sure you want to continue connecting"
476 logging.debug("Got 'Are you sure...'; sending 'yes'")
477 sub.sendline("yes")
478 continue
479 elif match == 1: # "password:"
480 if password_prompt_count == 0:
481 logging.debug("Got password prompt; sending '%s'" % password)
482 sub.sendline(password)
483 password_prompt_count += 1
484 _timeout = timeout
485 continue
486 else:
487 logging.debug("Got password prompt again")
488 sub.close()
489 return False
490 elif match == 2: # "lost connection"
491 logging.debug("Got 'lost connection'")
492 sub.close()
493 return False
494 else: # match == None
lmrdc3a5b12009-07-23 01:40:40 +0000495 logging.debug("Timeout elapsed or process terminated")
496 status = sub.get_status()
lmr6f669ce2009-05-31 19:02:42 +0000497 sub.close()
lmrdc3a5b12009-07-23 01:40:40 +0000498 return status == 0
lmr6f669ce2009-05-31 19:02:42 +0000499
500
501def scp_to_remote(host, port, username, password, local_path, remote_path,
502 timeout=300):
503 """
504 Copy files to a remote host (guest).
505
506 @param host: Hostname of the guest
507 @param username: User that will be used to copy the files
508 @param password: Host's password
509 @param local_path: Path on the local machine where we are copying from
510 @param remote_path: Path on the remote machine where we are copying to
511 @param timeout: Time in seconds that we will wait before giving up to
512 copy the files.
513
514 @return: True on success and False on failure.
515 """
lmrd16a67d2009-06-10 19:52:59 +0000516 command = ("scp -o UserKnownHostsFile=/dev/null -r -P %s %s %s@%s:%s" %
517 (port, local_path, username, host, remote_path))
lmr6f669ce2009-05-31 19:02:42 +0000518 return remote_scp(command, password, timeout)
519
520
521def scp_from_remote(host, port, username, password, remote_path, local_path,
522 timeout=300):
523 """
524 Copy files from a remote host (guest).
525
526 @param host: Hostname of the guest
527 @param username: User that will be used to copy the files
528 @param password: Host's password
529 @param local_path: Path on the local machine where we are copying from
530 @param remote_path: Path on the remote machine where we are copying to
531 @param timeout: Time in seconds that we will wait before giving up to copy
532 the files.
533
534 @return: True on success and False on failure.
535 """
lmrd16a67d2009-06-10 19:52:59 +0000536 command = ("scp -o UserKnownHostsFile=/dev/null -r -P %s %s@%s:%s %s" %
537 (port, username, host, remote_path, local_path))
lmr6f669ce2009-05-31 19:02:42 +0000538 return remote_scp(command, password, timeout)
539
540
541def ssh(host, port, username, password, prompt, timeout=10):
542 """
543 Log into a remote host (guest) using SSH.
544
545 @param host: Hostname of the guest
546 @param username: User that will be used to log into the host.
547 @param password: Host's password
548 @timeout: Time in seconds that we will wait before giving up on logging
549 into the host.
550
551 @return: kvm_spawn object on success and None on failure.
552 """
lmrd16a67d2009-06-10 19:52:59 +0000553 command = ("ssh -o UserKnownHostsFile=/dev/null -p %s %s@%s" %
554 (port, username, host))
lmr6f669ce2009-05-31 19:02:42 +0000555 return remote_login(command, password, prompt, "\n", timeout)
556
557
558def telnet(host, port, username, password, prompt, timeout=10):
559 """
560 Log into a remote host (guest) using Telnet.
561
562 @param host: Hostname of the guest
563 @param username: User that will be used to log into the host.
564 @param password: Host's password
565 @timeout: Time in seconds that we will wait before giving up on logging
566 into the host.
567
568 @return: kvm_spawn object on success and None on failure.
569 """
570 command = "telnet -l %s %s %s" % (username, host, port)
571 return remote_login(command, password, prompt, "\r\n", timeout)
572
573
lmr6f669ce2009-05-31 19:02:42 +0000574# The following are utility functions related to ports.
575
576def is_sshd_running(host, port, timeout=10.0):
577 """
578 Connect to the given host and port and wait for output.
579 Return True if the given host and port are responsive.
580
581 @param host: Host's hostname
582 @param port: Host's port
583 @param timeout: Time (seconds) before we giving up on checking the SSH
584 daemon.
585
586 @return: If output is available, return True. If timeout expires and no
587 output was available, return False.
588 """
589 try:
590 # Try to connect
591 #s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
592 s = socket.socket()
593 s.connect((host, port))
594 except socket.error:
595 # Can't connect -- return False
596 s.close()
597 logging.debug("Could not connect")
598 return False
599 s.setblocking(False)
600 # Wait up to 'timeout' seconds
601 end_time = time.time() + timeout
602 while time.time() < end_time:
603 try:
604 time.sleep(0.1)
605 # Try to receive some text
606 str = s.recv(1024)
607 if len(str) > 0:
608 s.shutdown(socket.SHUT_RDWR)
609 s.close()
610 logging.debug("Success! got string %r" % str)
611 return True
612 except socket.error:
613 # No text was available; try again
614 pass
615 # Timeout elapsed and no text was received
616 s.shutdown(socket.SHUT_RDWR)
617 s.close()
618 logging.debug("Timeout")
619 return False
620
621
622def is_port_free(port):
623 """
624 Return True if the given port is available for use.
625
626 @param port: Port number
627 """
628 try:
629 s = socket.socket()
630 #s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
631 s.bind(("localhost", port))
632 free = True
633 except socket.error:
634 free = False
635 s.close()
636 return free
637
638
639def find_free_port(start_port, end_port):
640 """
641 Return a free port in the range [start_port, end_port).
642
643 @param start_port: First port that will be checked.
644 @param end_port: Port immediately after the last one that will be checked.
645 """
646 for i in range(start_port, end_port):
647 if is_port_free(i):
648 return i
649 return None
650
651
652def find_free_ports(start_port, end_port, count):
653 """
654 Return count free ports in the range [start_port, end_port).
655
656 @count: Initial number of ports known to be free in the range.
657 @param start_port: First port that will be checked.
658 @param end_port: Port immediately after the last one that will be checked.
659 """
660 ports = []
661 i = start_port
662 while i < end_port and count > 0:
663 if is_port_free(i):
664 ports.append(i)
665 count -= 1
666 i += 1
667 return ports
668
669
670# The following are miscellaneous utility functions.
671
672def generate_random_string(length):
673 """
674 Return a random string using alphanumeric characters.
675
676 @length: length of the string that will be generated.
677 """
678 str = ""
679 chars = string.letters + string.digits
680 while length > 0:
681 str += random.choice(chars)
682 length -= 1
683 return str
684
685
686def format_str_for_message(str):
687 """
688 Format str so that it can be appended to a message.
689 If str consists of one line, prefix it with a space.
690 If str consists of multiple lines, prefix it with a newline.
691
692 @param str: string that will be formatted.
693 """
lmr57355592009-08-07 21:55:49 +0000694 lines = str.splitlines()
695 num_lines = len(lines)
696 str = "\n".join(lines)
lmr6f669ce2009-05-31 19:02:42 +0000697 if num_lines == 0:
698 return ""
699 elif num_lines == 1:
700 return " " + str
701 else:
702 return "\n" + str
703
704
705def wait_for(func, timeout, first=0.0, step=1.0, text=None):
706 """
707 If func() evaluates to True before timeout expires, return the
708 value of func(). Otherwise return None.
709
710 @brief: Wait until func() evaluates to True.
711
712 @param timeout: Timeout in seconds
713 @param first: Time to sleep before first attempt
714 @param steps: Time to sleep between attempts in seconds
715 @param text: Text to print while waiting, for debug purposes
716 """
717 start_time = time.time()
718 end_time = time.time() + timeout
719
720 time.sleep(first)
721
722 while time.time() < end_time:
723 if text:
724 logging.debug("%s (%f secs)" % (text, time.time() - start_time))
725
726 output = func()
727 if output:
728 return output
729
730 time.sleep(step)
731
732 logging.debug("Timeout elapsed")
733 return None
734
735
736def md5sum_file(filename, size=None):
737 """
738 Calculate the md5sum of filename.
739 If size is not None, limit to first size bytes.
740 Throw exception if something is wrong with filename.
741 Can be also implemented with bash one-liner (assuming size%1024==0):
742 dd if=filename bs=1024 count=size/1024 | md5sum -
743
744 @param filename: Path of the file that will have its md5sum calculated.
745 @param returns: md5sum of the file.
746 """
747 chunksize = 4096
748 fsize = os.path.getsize(filename)
749 if not size or size>fsize:
750 size = fsize
751 f = open(filename, 'rb')
752 o = md5.new()
753 while size > 0:
754 if chunksize > size:
755 chunksize = size
756 data = f.read(chunksize)
757 if len(data) == 0:
758 logging.debug("Nothing left to read but size=%d" % size)
759 break
760 o.update(data)
761 size -= len(data)
762 f.close()
763 return o.hexdigest()