1. Enable job tags to be passed from the server side (basically use the -t option available on the client side to be passed in from the server)
2. Add code to help run multiple jobs on the same machine starting at the same time (using jobs tags so that the results don't get clobbered up and barriers for synchronization)
3. Add a sample server side control file to demonstrate the usage of running multiple tests on the same client starting at the same time by running two "sleeptests".
Signed-off-by: Ashwin Ganti <aganti@google.com>
git-svn-id: http://test.kernel.org/svn/autotest/trunk@1549 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/client/common_lib/utils.py b/client/common_lib/utils.py
index c6ec525..71cac1e 100644
--- a/client/common_lib/utils.py
+++ b/client/common_lib/utils.py
@@ -4,7 +4,7 @@
import os, pickle, random, re, select, shutil, signal, StringIO, subprocess
import sys, time, textwrap, urllib, urlparse
-import error
+import error, barrier
def read_one_line(filename):
@@ -339,6 +339,111 @@
if out[-1:] == '\n': out = out[:-1]
return out
+"""
+This function is used when there is a need to run more than one
+job simultaneously starting exactly at the same time. It basically returns
+a modified control file (containing the synchronization code prepended)
+whenever it is ready to run the control file. The synchronization
+is done using barriers to make sure that the jobs start at the same time.
+
+Here is how the synchronization is done to make sure that the tests
+start at exactly the same time on the client.
+sc_bar is a server barrier and s_bar, c_bar are the normal barriers
+
+ Job1 Job2 ...... JobN
+ Server: | sc_bar
+ Server: | s_bar ...... s_bar
+ Server: | at.run() at.run() ...... at.run()
+ ----------|------------------------------------------------------
+ Client | sc_bar
+ Client | c_bar c_bar ...... c_bar
+ Client | <run test> <run test> ...... <run test>
+
+
+PARAMS:
+ control_file : The control file which to which the above synchronization
+ code would be prepended to
+ host_name : The host name on which the job is going to run
+ host_num (non negative) : A number to identify the machine so that we have
+ different sets of s_bar_ports for each of the machines.
+ instance : The number of the job
+ num_jobs : Total number of jobs that are going to run in parallel with
+ this job starting at the same time
+ port_base : Port number that is used to derive the actual barrier ports.
+
+RETURN VALUE:
+ The modified control file.
+
+"""
+def get_sync_control_file(control, host_name, host_num,
+ instance, num_jobs, port_base=63100):
+ sc_bar_port = port_base
+ c_bar_port = port_base
+ if host_num < 0:
+ print "Please provide a non negative number for the host"
+ return None
+ s_bar_port = port_base + 1 + host_num # The set of s_bar_ports are
+ # the same for a given machine
+
+ sc_bar_timeout = 180
+ s_bar_timeout = c_bar_timeout = 120
+
+ # The barrier code snippet is prepended into the conrol file
+ # dynamically before at.run() is called finally.
+ control_new = []
+
+ # jobid is the unique name used to identify the processes
+ # trying to reach the barriers
+ jobid = "%s#%d" % (host_name, instance)
+
+ rendv = []
+ # rendvstr is a temp holder for the rendezvous list of the processes
+ for n in range(num_jobs):
+ rendv.append("'%s#%d'" % (host_name, n))
+ rendvstr = ",".join(rendv)
+
+ if instance == 0:
+ # Do the setup and wait at the server barrier
+ # Clean up the tmp and the control dirs for the first instance
+ control_new.append('if os.path.exists(job.tmpdir):')
+ control_new.append("\t system('umount -f %s > /dev/null"
+ "2> /dev/null' % job.tmpdir,"
+ "ignore_status=True)")
+ control_new.append("\t system('rm -rf ' + job.tmpdir)")
+ control_new.append(
+ 'b0 = job.barrier("%s", "sc_bar", %d, port=%d)'
+ % (jobid, sc_bar_timeout, sc_bar_port))
+ control_new.append(
+ 'b0.rendevous_servers("PARALLEL_MASTER", "%s")'
+ % jobid)
+
+ elif instance == 1:
+ # Wait at the server barrier to wait for instance=0
+ # process to complete setup
+ b0 = barrier.barrier("PARALLEL_MASTER", "sc_bar", sc_bar_timeout,
+ port=sc_bar_port)
+ b0.rendevous_servers("PARALLEL_MASTER", jobid)
+
+ if(num_jobs > 2):
+ b1 = barrier.barrier(jobid, "s_bar", s_bar_timeout,
+ port=s_bar_port)
+ b1.rendevous(rendvstr)
+
+ else:
+ # For the rest of the clients
+ b2 = barrier.barrier(jobid, "s_bar", s_bar_timeout, port=s_bar_port)
+ b2.rendevous(rendvstr)
+
+ # Client side barrier for all the tests to start at the same time
+ control_new.append('b1 = job.barrier("%s", "c_bar", %d, port=%d)'
+ % (jobid, c_bar_timeout, c_bar_port))
+ control_new.append("b1.rendevous(%s)" % rendvstr)
+
+ # Stick in the rest of the control file
+ control_new.append(control)
+
+ return "\n".join(control_new)
+
class CmdResult(object):
"""