[autotest] Allow gs_offloader to limit the number of files in a result folder
If the number of files is too big, try to compress each subfolder.
BUG=chromium:537756
TEST=unittest
Change-Id: I833c2664c27bc566d08a0354ea4f2695dec2ec36
Reviewed-on: https://chromium-review.googlesource.com/304090
Commit-Ready: Dan Shi <dshi@chromium.org>
Tested-by: Dan Shi <dshi@chromium.org>
Reviewed-by: Simran Basi <sbasi@chromium.org>
diff --git a/global_config.ini b/global_config.ini
index a92a87b..72b6b78 100644
--- a/global_config.ini
+++ b/global_config.ini
@@ -291,6 +291,8 @@
# The swarming instance that will be used for golo proxy
swarming_proxy:
+# Limit the number of files in the result folder.
+gs_offloader_limit_file_count: False
[BUG_REPORTING]
gs_domain: https://storage.cloud.google.com/
diff --git a/site_utils/gs_offloader.py b/site_utils/gs_offloader.py
index d2c67ef..d4b5640 100755
--- a/site_utils/gs_offloader.py
+++ b/site_utils/gs_offloader.py
@@ -27,7 +27,9 @@
from optparse import OptionParser
import common
+from autotest_lib.client.common_lib import error
from autotest_lib.client.common_lib import utils
+from autotest_lib.site_utils import job_directories
try:
# Does not exist, nor is needed, on moblab.
@@ -100,6 +102,12 @@
INVALID_GS_CHARS = ['[', ']', '*', '?', '#']
INVALID_GS_CHAR_RANGE = [(0x00, 0x1F), (0x7F, 0x84), (0x86, 0xFF)]
+# Maximum number of files in the folder.
+MAX_FILE_COUNT = 500
+FOLDERS_NEVER_ZIP = ['debug', 'ssp_logs']
+LIMIT_FILE_COUNT = global_config.global_config.get_config_value(
+ 'CROS', 'gs_offloader_limit_file_count', type=bool, default=False)
+
class TimeoutException(Exception):
"""Exception raised by the timeout_handler."""
pass
@@ -205,6 +213,52 @@
shutil.move(src, dest)
+def limit_file_count(dir_entry):
+ """Limit the number of files in given directory.
+
+ The method checks the total number of files in the given directory. If the
+ number is greater than MAX_FILE_COUNT, the method will compress each folder
+ in the given directory, except folders in FOLDERS_NEVER_ZIP.
+
+ @param dir_entry: Directory entry to be checked.
+ """
+ count = utils.run('find "%s" | wc -l' % dir_entry,
+ ignore_status=True).stdout.strip()
+ try:
+ count = int(count)
+ except ValueError, TypeError:
+ logging.warn('Fail to get the file count in folder %s.', dir_entry)
+ return
+ if count < MAX_FILE_COUNT:
+ return
+
+ # For test job, zip folders in a second level, e.g., 123-debug/host1.
+ # This is to allow autoserv debug folder still be accessible.
+ # For special task, it does not need to dig one level deeper.
+ is_special_task = re.match(job_directories.SPECIAL_TASK_PATTERN, dir_entry)
+
+ folders = [os.path.join(dir_entry, d) for d in os.listdir(dir_entry)
+ if (not os.path.isfile(os.path.join(dir_entry, d)) and
+ not d in FOLDERS_NEVER_ZIP)]
+ if not is_special_task:
+ subfolders = []
+ for folder in folders:
+ subfolders.extend([os.path.join(folder, d) for d in os.listdir(folder)
+ if (not os.path.isfile(os.path.join(folder, d)) and
+ not d in FOLDERS_NEVER_ZIP)])
+ folders = subfolders
+
+ for folder in folders:
+ try:
+ zip_name = '%s.tgz' % folder
+ utils.run('tar -cz -C "%s" -f "%s" "%s"' %
+ (os.path.dirname(folder), zip_name, os.path.basename(folder)))
+ except error.CmdError as e:
+ logging.error('Fail to compress folder %s. Error: %s', folder, e)
+ continue
+ shutil.rmtree(folder)
+
+
def correct_results_folder_permission(dir_entry):
"""Make sure the results folder has the right permission settings.
@@ -249,6 +303,9 @@
sanitize_dir(dir_entry)
+ if LIMIT_FILE_COUNT:
+ limit_file_count(dir_entry)
+
error = False
stdout_file = tempfile.TemporaryFile('w+')
stderr_file = tempfile.TemporaryFile('w+')
diff --git a/site_utils/gs_offloader_unittest.py b/site_utils/gs_offloader_unittest.py
index fa43901..9c98556 100644
--- a/site_utils/gs_offloader_unittest.py
+++ b/site_utils/gs_offloader_unittest.py
@@ -19,8 +19,9 @@
import gs_offloader
import job_directories
-from autotest_lib.client.common_lib import utils, time_utils
from autotest_lib.client.common_lib import global_config
+from autotest_lib.client.common_lib import time_utils
+from autotest_lib.client.common_lib import utils
from autotest_lib.scheduler import email_manager
@@ -829,6 +830,46 @@
shutil.rmtree(results_folder)
+ def check_limit_file_count(self, is_test_job=True):
+ """Test that folder with too many files can be compressed.
+
+ @param is_test_job: True to check the method with test job result
+ folder. Set to False for special task folder.
+ """
+ results_folder = tempfile.mkdtemp()
+ host_folder = os.path.join(
+ results_folder,
+ 'lab1-host1' if is_test_job else 'hosts/lab1-host1/1-repair')
+ debug_folder = os.path.join(host_folder, 'debug')
+ sysinfo_folder = os.path.join(host_folder, 'sysinfo')
+ for folder in [debug_folder, sysinfo_folder]:
+ os.makedirs(folder)
+ for i in range(10):
+ with open(os.path.join(folder, str(i)), 'w') as f:
+ f.write('test')
+
+ gs_offloader.MAX_FILE_COUNT = 100
+ gs_offloader.limit_file_count(
+ results_folder if is_test_job else host_folder)
+ self.assertTrue(os.path.exists(sysinfo_folder))
+
+ gs_offloader.MAX_FILE_COUNT = 10
+ gs_offloader.limit_file_count(
+ results_folder if is_test_job else host_folder)
+ self.assertFalse(os.path.exists(sysinfo_folder))
+ self.assertTrue(os.path.exists(sysinfo_folder + '.tgz'))
+ self.assertTrue(os.path.exists(debug_folder))
+
+ shutil.rmtree(results_folder)
+
+
+ def test_limit_file_count(self):
+ """Test that folder with too many files can be compressed.
+ """
+ self.check_limit_file_count(is_test_job=True)
+ self.check_limit_file_count(is_test_job=False)
+
+
class JobDirectoryOffloadTests(_TempResultsDirTestBase):
"""Tests for `_JobDirectory.enqueue_offload()`.
diff --git a/site_utils/job_directories.py b/site_utils/job_directories.py
index 93c27f3..9839224 100755
--- a/site_utils/job_directories.py
+++ b/site_utils/job_directories.py
@@ -16,6 +16,9 @@
_AFE = frontend_wrappers.RetryingAFE()
+SPECIAL_TASK_PATTERN = '.*/hosts/[^/]+/(\d+)-[^/]+'
+JOB_PATTERN = '.*/(\d+)-[^/]+'
+
def _is_job_expired(age_limit, timestamp):
"""Check whether a job timestamp is older than an age limit.
@@ -49,17 +52,15 @@
if not result_dir:
return
result_dir = os.path.abspath(result_dir)
- special_task_pattern = '.*/hosts/[^/]+/(\d+)-[^/]+'
- job_pattern = '.*/(\d+)-[^/]+'
# Result folder for job running inside container has only job id.
ssp_job_pattern = '.*/(\d+)$'
# Try to get the job ID from the last pattern of number-text. This avoids
# issue with path like 123-results/456-debug_user, in which 456 is the real
# job ID.
- m_job = re.findall(job_pattern, result_dir)
+ m_job = re.findall(JOB_PATTERN, result_dir)
if m_job:
return int(m_job[-1])
- m_special_task = re.match(special_task_pattern, result_dir)
+ m_special_task = re.match(SPECIAL_TASK_PATTERN, result_dir)
if m_special_task:
return int(m_special_task.group(1))
m_ssp_job_pattern = re.match(ssp_job_pattern, result_dir)