Start monitoring temperature during tests.
There are two parts to this:
1) Added infrastructure to site_utils.py to get machine into defined state
and keep tests honest.
2) Change glbench binary to base its wait times on currently observed
temperatures.
This should make the test faster, as we can cut down on blind sleep times.
But I don't expect on its own to change the numbers.
BUG=chromium:344766
TEST=1) Ran graphics_GLBench on link.
2) Ran hardware_SsdDetection and firmware_TouchMTB on link to ensure
get_board() still works for them.
Change-Id: I3cb908a35d911a3f9ca6ef194aa5a3d1ac3e2152
Reviewed-on: https://chromium-review.googlesource.com/190290
Reviewed-by: Ilja Friedel <ihf@chromium.org>
Commit-Queue: Ilja Friedel <ihf@chromium.org>
Tested-by: Ilja Friedel <ihf@chromium.org>
diff --git a/client/bin/site_utils.py b/client/bin/site_utils.py
index 9f0a9f2..d734e90 100644
--- a/client/bin/site_utils.py
+++ b/client/bin/site_utils.py
@@ -471,7 +471,7 @@
cpu_usage_end = get_cpu_usage()
fraction_active_time = \
compute_active_cpu_time(cpu_usage_start, cpu_usage_end)
- logging.info('After waiting %.1fs CPU utilization is %f.',
+ logging.info('After waiting %.1fs CPU utilization is %.3f.',
time_passed, fraction_active_time)
if time_passed > timeout:
logging.warning('CPU did not become idle.')
@@ -482,7 +482,7 @@
return True
return False
- logging.info('Wait for idle CPU took %fs (utilization = %f).',
+ logging.info('Wait for idle CPU took %.1fs (utilization = %.3f).',
time_passed, fraction_active_time)
return True
@@ -498,14 +498,319 @@
logging.info(output)
-def wait_for_cool_cpu():
- # TODO(ihf): Implement this.
+def wait_for_cool_machine():
+ # TODO(ihf): Implement this. The concept of a cool machine is very
+ # architecture specific. We either need a good heuristic or a table of
+ # board specific temperatures.
+ time.sleep(1.0)
return True
-def wait_for_cool_idle_perf_machine():
- # Wait for 60 seconds for the CPU usage to fall under 10%.
- if not wait_for_idle_cpu(60, 0.1):
- return False
- return wait_for_cool_cpu()
+# System paths for machine performance state.
+_CPUINFO = '/proc/cpuinfo'
+_CPUINFO_MAX_FREQ_FMT = '/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq'
+_CPUINFO_MIN_FREQ_FMT = '/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_min_freq'
+_KERNEL_MAX = '/sys/devices/system/cpu/kernel_max'
+_LSB_RELEASE = '/etc/lsb-release'
+_MEMINFO = '/proc/meminfo'
+_SCALING_GOVERNOR_FMT = '/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor'
+_TEMP_CRIT_FMT = '/sys/class/hwmon/hwmon0/temp%d_crit'
+_TEMP_INPUT_FMT = '/sys/class/hwmon/hwmon0/temp%d_input'
+# Same temperatures for daisy_spring.
+_TEMP_CRIT_VIRT = '/sys/devices/virtual/hwmon/hwmon1/temp1_crit'
+_TEMP_INPUT_VIRT = '/sys/devices/virtual/hwmon/hwmon1/temp1_input'
+_TEMP_SENSOR_RE = 'Reading temperature...([0-9]*)'
+
+def _get_line_from_file(path, line):
+ """
+ line can be an integer or
+ line can be a string that matches the beginning of the line
+ """
+ f = open(path)
+ if (isinstance(line, int)):
+ l = f.readline()
+ for _ in range(0, line):
+ l = f.readline()
+ return l
+ else:
+ for l in f:
+ if l.startswith(line):
+ return l
+ return None
+
+
+def _get_match_from_file(path, line, prefix, postfix):
+ """
+ Matches line in path and returns string between first prefix and postfix.
+ """
+ match = _get_line_from_file(path, line)
+ # Strip everything from front of line including prefix.
+ if prefix:
+ match = re.split(prefix, match)[1]
+ # Strip everything from back of string including first occurence of postfix.
+ if postfix:
+ match = re.split(postfix, match)[0]
+ return match
+
+
+def _get_float_from_file(path, line, prefix, postfix):
+ match = _get_match_from_file(path, line, prefix, postfix)
+ return float(match)
+
+
+def _get_int_from_file(path, line, prefix, postfix):
+ match = _get_match_from_file(path, line, prefix, postfix)
+ return int(match)
+
+
+def _get_hex_from_file(path, line, prefix, postfix):
+ match = _get_match_from_file(path, line, prefix, postfix)
+ return int(match, 16)
+
+
+def get_temperature_critical():
+ """
+ Returns temperature at which we will see some throttling in the system.
+ """
+ min_temperature = 1000.0
+ cpus = _get_number_cpus()
+ for cpu in range(1, cpus + 1):
+ path = _TEMP_CRIT_FMT % cpu
+ #TODO(ihf): Figure out a more consistent way to obtain temperatures.
+ if 'spring' in get_board():
+ path = _TEMP_CRIT_VIRT
+ temperature = _get_float_from_file(path, 0, None, None) * 0.001
+ min_temperature = min(temperature, min_temperature)
+ # Sanity check for real world values.
+ assert ((min_temperature > 60.0) and
+ (min_temperature < 150.0)), 'Unreasonable temperature.'
+ return min_temperature
+
+
+def get_temperature_input_max():
+ """
+ Returns the maximum currently observed temperature.
+ """
+ max_temperature = -1000.0
+ cpus = _get_number_cpus()
+ for cpu in range(1, cpus + 1):
+ path = _TEMP_INPUT_FMT % cpu
+ #TODO(ihf): Figure out a more consistent way to obtain temperatures.
+ if 'spring' in get_board():
+ path = _TEMP_INPUT_VIRT
+ temperature = _get_float_from_file(path, 0, None, None) * 0.001
+ max_temperature = max(temperature, max_temperature)
+ # Sanity check for real world values.
+ assert ((max_temperature > 10.0) and
+ (max_temperature < 150.0)), 'Unreasonable temperature.'
+ return max_temperature
+
+
+def get_ec_temperatures():
+ """
+ Uses ectool to return a list of all sensor temperatures in Celsius.
+ """
+ temperatures = []
+ # TODO(ihf): On my spring ectool temps all returns 200K for all sensors.
+ if 'spring' in get_board():
+ return temperatures
+ try:
+ full_cmd = 'ectool temps all'
+ lines = utils.system_output(full_cmd).splitlines()
+ for line in lines:
+ temperature = int(line.split(': ')[1]) - 273
+ temperatures.append(temperature)
+ except Exception:
+ logging.warn('Unable to read temperature sensors using ectool.')
+ for temperature in temperatures:
+ # Sanity check for real world values.
+ assert ((temperature > 10.0) and
+ (temperature < 150.0)), 'Unreasonable temperature.'
+ return temperatures
+
+
+def get_cpu_cache_size():
+ """
+ Returns the last level CPU cache size in kBytes.
+ """
+ cache_size = _get_int_from_file(_CPUINFO, 'cache size', ': ', ' KB')
+ # Sanity check.
+ assert cache_size >= 64, 'Unreasonably small cache.'
+ return cache_size
+
+
+def get_cpu_model_frequency():
+ """
+ Returns the model frequency from the CPU model name on Intel only. This
+ might be redundant with get_cpu_max_frequency. Unit is Hz.
+ """
+ frequency = _get_float_from_file(_CPUINFO, 'model name', ' @ ', 'GHz')
+ return 1.e9 * frequency
+
+
+def get_cpu_max_frequency():
+ """
+ Returns the largest of the max CPU core frequencies. The unit is Hz.
+ """
+ max_frequency = -1
+ cpus = _get_number_cpus()
+ for cpu in range(1, cpus + 1):
+ path = _CPUINFO_MAX_FREQ_FMT % (cpu + 1)
+ # Convert from kHz to Hz.
+ frequency = 1000 * _get_float_from_file(path, 0, None, None)
+ max_frequency = max(frequency, max_frequency)
+ # Sanity check.
+ assert max_frequency > 1e8, 'Unreasonably low CPU frequency.'
+ return max_frequency
+
+
+def get_cpu_min_frequency():
+ """
+ Returns the smallest of the minimum CPU core frequencies.
+ """
+ min_frequency = 1e20
+ cpus = _get_number_cpus()
+ for cpu in range(1, cpus + 1):
+ path = _CPUINFO_MIN_FREQ_FMT % cpu
+ frequency = _get_float_from_file(path, 0, None, None)
+ min_frequency = min(frequency, min_frequency)
+ # Sanity check.
+ assert min_frequency > 1e8, 'Unreasonably low CPU frequency.'
+ return min_frequency
+
+
+def get_cpu_model():
+ """
+ Returns the CPU model.
+ Only works on Intel.
+ """
+ cpu_model = _get_int_from_file(_CPUINFO, 'model\t', ': ', None)
+ return cpu_model
+
+
+def get_cpu_family():
+ """
+ Returns the CPU family.
+ Only works on Intel.
+ """
+ cpu_family = _get_int_from_file(_CPUINFO, 'cpu family\t', ': ', None)
+ return cpu_family
+
+
+def get_board():
+ """
+ Get the ChromeOS release board name from /etc/lsb-release.
+ """
+ f = open('/etc/lsb-release')
+ try:
+ return re.search('BOARD=(.*)', f.read()).group(1)
+ finally:
+ f.close()
+
+
+def get_board_with_frequency_and_memory():
+ """
+ Returns a board name modified with CPU frequency and memory size to
+ differentiate between different board variants. For instance
+ link -> link_1.8GHz_4GB.
+ """
+ board_name = get_board()
+ # Rounded to nearest GB and GHz.
+ memory = int(round(get_mem_total()/1024.0))
+ # Convert frequency to GHz with 1 digit accuracy after the decimal point.
+ frequency = int(round(get_cpu_max_frequency() * 1e-8)) * 0.1
+ board = "%s_%1.1fGHz_%dGB" % (board_name, frequency, memory)
+ return board
+
+
+def get_mem_total():
+ """
+ Returns the total memory available in the system in MBytes.
+ """
+ mem_total = _get_float_from_file(_MEMINFO, 'MemTotal:', 'MemTotal:', ' kB')
+ # Sanity check, all Chromebooks have at least 1GB of memory.
+ assert mem_total > 1024*1024, 'Unreasonable amount of memory.'
+ return mem_total / 1024
+
+
+def get_mem_free():
+ """
+ Returns the currently free memory in the system in MBytes.
+ """
+ mem_free = _get_float_from_file(_MEMINFO, 'MemFree:', 'MemFree:', ' kB')
+ return mem_free / 1024
+
+
+def get_kernel_max():
+ """
+ Returns content of kernel_max.
+ """
+ kernel_max = _get_int_from_file(_KERNEL_MAX, 0, None, None)
+ # Sanity check.
+ assert ((kernel_max > 0) and (kernel_max < 257)), 'Unreasonable kernel_max.'
+ return kernel_max
+
+
+def _get_number_cpus():
+ """
+ Returns the number of CPUs available.
+ """
+ # TODO(ihf): apparently get_kernel_max() + 1 is not right here due to
+ # hyperthreading. We only need this right now to get sensible
+ # temperature and frequency baselines. But to make the functions
+ # more useful this needs to be fixed.
+ return 1
+
+
+def set_high_performance_mode():
+ """
+ Sets the kernel governor mode to the highest setting.
+ Returns previous governor state.
+ """
+ original_governors = get_scaling_governor_states()
+ set_scaling_governors('performance')
+ return original_governors
+
+
+def set_scaling_governors(value):
+ """
+ Sets all scaling governor to string value.
+ Sample values: 'performance', 'interactive', 'ondemand', 'powersave'.
+ """
+ paths = _get_scaling_governor_paths()
+ for path in paths:
+ cmd = 'sudo echo %s > %s' % (value, path)
+ logging.info('Writing scaling governor mode \'%s\' -> %s', value, path)
+ utils.system(cmd)
+
+
+def _get_scaling_governor_paths():
+ """
+ Returns a list of paths to the governors.
+ """
+ cmd = 'ls /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor'
+ paths = utils.system_output(cmd).splitlines()
+ logging.info(paths)
+ return paths
+
+
+def get_scaling_governor_states():
+ """
+ Returns a list of (performance governor path, current state) tuples.
+ """
+ paths = _get_scaling_governor_paths()
+ path_value_list = []
+ for path in paths:
+ value = _get_line_from_file(path, 0)
+ path_value_list.append((path, value))
+ return path_value_list
+
+
+def restore_scaling_governor_states(path_value_list):
+ """
+ Restores governor states. Inverse operation to get_scaling_governor_states.
+ """
+ for (path, value) in path_value_list:
+ cmd = 'sudo echo %s > %s' % (value, path)
+ utils.system(cmd)