Add host attributes to AFE.  Nothing actually uses them; they're purely for users and scripts to use.  Initial implementation is minimal - new migration, new model, RPCs to set and delete, and inclusion of attributes in get_hosts() results.  No CLI or frontend support.

The change to get_hosts() is of primary interest here, since it involves a general problems that occurs many places in Autotest -- gathering relationships over many items.  This has been solved in two ways previously:
* inefficiently in most places, by doing a query for each object (i.e. get_hosts()).
* efficiently in one place (JobManager.populate_dependencies()), by doing a single query to get a pivot table and then postprocessing to gather relationships for each object.

This time, I went ahead and implemented a general solution, a generalized version of JobManager.populate_dependencies().  I removed populate_dependencies() and made get_jobs() use the new general version instead, and I made get_hosts() use the new version for all its relationships, including the new attributes relationship.  This should speed up get_hosts() considerably.  In a future change I'll apply this solution to get_test_views() over in TKO land, since that would also greatly benefit in performance.

Signed-off-by: Steve Howard <showard@google.com>


git-svn-id: http://test.kernel.org/svn/autotest/trunk@3102 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/frontend/afe/doctests/001_rpc_test.txt b/frontend/afe/doctests/001_rpc_test.txt
index db7ed02..08c1c6f 100644
--- a/frontend/afe/doctests/001_rpc_test.txt
+++ b/frontend/afe/doctests/001_rpc_test.txt
@@ -101,6 +101,7 @@
 ...           'labels': [],
 ...           'acls': ['Everyone'],
 ...           'platform': None,
+...           'attributes': {},
 ...           'invalid': 0,
 ...           'protection': 'No protection',
 ...           'locked_by': 'debug_user',
@@ -324,6 +325,19 @@
 ['Everyone']
 
 
+# host attributes
+
+>>> rpc_interface.set_host_attribute('color', 'red', hostname='host1')
+>>> data = rpc_interface.get_hosts(hostname='host1')
+>>> data[0]['attributes']
+{'color': 'red'}
+
+>>> rpc_interface.set_host_attribute('color', None, hostname='host1')
+>>> data = rpc_interface.get_hosts(hostname='host1')
+>>> data[0]['attributes']
+{}
+
+
 # job management
 # ############
 
diff --git a/frontend/afe/model_logic.py b/frontend/afe/model_logic.py
index 81ad9a8..35a8b50 100644
--- a/frontend/afe/model_logic.py
+++ b/frontend/afe/model_logic.py
@@ -2,6 +2,7 @@
 Extensions to Django's model logic.
 """
 
+import itertools
 from django.db import models as dbmodels, backend, connection
 from django.utils import datastructures
 from autotest_lib.frontend.afe import readonly_connection
@@ -159,13 +160,25 @@
 
 
     def add_join(self, query_set, join_table, join_key,
-                 local_join_key='id', join_condition='', suffix='',
-                 exclude=False, force_left_join=False):
-        table_name = self.model._meta.db_table
+                 join_condition='', suffix='', exclude=False,
+                 force_left_join=False):
+        """
+        Add a join to query_set.
+        @param join_table table to join to
+        @param join_key field referencing back to this model to use for the join
+        @param join_condition extra condition for the ON clause of the join
+        @param suffix suffix to add to join_table for the join alias
+        @param exclude if true, exclude rows that match this join (will use a
+        LEFT JOIN and an appropriate WHERE condition)
+        @param force_left_join - if true, a LEFT JOIN will be used instead of an
+        INNER JOIN regardless of other options
+        """
+        join_from_table = self.model._meta.db_table
+        join_from_key = self.model._meta.pk.name
         join_alias = join_table + suffix
         full_join_key = join_alias + '.' + join_key
-        full_join_condition = '%s = %s.%s' % (full_join_key, table_name,
-                                              local_join_key)
+        full_join_condition = '%s = %s.%s' % (full_join_key, join_from_table,
+                                              join_from_key)
         if join_condition:
             full_join_condition += ' AND (' + join_condition + ')'
         if exclude or force_left_join:
@@ -209,6 +222,108 @@
         return sql.replace('%', '%%')
 
 
+    def _custom_select_query(self, query_set, selects):
+        query_selects, where, params = query_set._get_sql_clause()
+        if query_set._distinct:
+            distinct = 'DISTINCT '
+        else:
+            distinct = ''
+        sql_query = 'SELECT ' + distinct + ','.join(selects) + where
+        cursor = readonly_connection.connection().cursor()
+        cursor.execute(sql_query, params)
+        return cursor.fetchall()
+
+
+    def _is_relation_to(self, field, model_class):
+        return field.rel and field.rel.to is model_class
+
+
+    def _determine_pivot_table(self, related_model):
+        """
+        Determine the pivot table for this relationship and return a tuple
+        (pivot_table, pivot_from_field, pivot_to_field).  See
+        _query_pivot_table() for more info.
+        Note -- this depends on Django model internals and will likely need to
+        be updated when we move to Django 1.x.
+        """
+        # look for a field on related_model relating to this model
+        for field in related_model._meta.fields:
+            if self._is_relation_to(field, self.model):
+                # many-to-one -- the related table itself is the pivot table
+                return (related_model._meta.db_table, field.column,
+                        related_model.objects.get_key_on_this_table())
+
+        for field in related_model._meta.many_to_many:
+            if self._is_relation_to(field, self.model):
+                # many-to-many
+                return (field.m2m_db_table(), field.m2m_reverse_name(),
+                        field.m2m_column_name())
+
+        # maybe this model has the many-to-many field
+        for field in self.model._meta.many_to_many:
+            if self._is_relation_to(field, related_model):
+                return (field.m2m_db_table(), field.m2m_column_name(),
+                        field.m2m_reverse_name())
+
+        raise ValueError('%s has no relation to %s' %
+                         (related_model, self.model))
+
+
+    def _query_pivot_table(self, id_list, pivot_table, pivot_from_field,
+                           pivot_to_field):
+        """
+        @param id_list list of IDs of self.model objects to include
+        @param pivot_table the name of the pivot table
+        @param pivot_from_field a field name on pivot_table referencing
+        self.model
+        @param pivot_to_field a field name on pivot_table referencing the
+        related model.
+        @returns a dict mapping each IDs from id_list to a list of IDs of
+        related objects.
+        """
+        query = """
+        SELECT %(from_field)s, %(to_field)s
+        FROM %(table)s
+        WHERE %(from_field)s IN (%(id_list)s)
+        """ % dict(from_field=pivot_from_field,
+                   to_field=pivot_to_field,
+                   table=pivot_table,
+                   id_list=','.join(str(id_) for id_ in id_list))
+        cursor = readonly_connection.connection().cursor()
+        cursor.execute(query)
+
+        related_ids = {}
+        for model_id, related_id in cursor.fetchall():
+            related_ids.setdefault(model_id, []).append(related_id)
+        return related_ids
+
+
+    def populate_relationships(self, model_objects, related_model,
+                               related_list_name):
+        """
+        For each instance in model_objects, add a field named related_list_name
+        listing all the related objects of type related_model.  related_model
+        must be in a many-to-one or many-to-many relationship with this model.
+        """
+        if not model_objects:
+            # if we don't bail early, we'll get a SQL error later
+            return
+        id_list = (item.id for item in model_objects)
+        pivot_table, pivot_from_field, pivot_to_field = (
+            self._determine_pivot_table(related_model))
+        related_ids = self._query_pivot_table(id_list, pivot_table,
+                                              pivot_from_field, pivot_to_field)
+
+        all_related_ids = list(set(itertools.chain(*related_ids.itervalues())))
+        related_objects_by_id = related_model.objects.in_bulk(all_related_ids)
+
+        for item in model_objects:
+            related_ids_for_item = related_ids.get(item.id, [])
+            related_objects = [related_objects_by_id[related_id]
+                               for related_id in related_ids_for_item]
+            setattr(item, related_list_name, related_objects)
+
+
 class ValidObjectsManager(ExtendedManager):
     """
     Manager returning only objects with invalid=False.
diff --git a/frontend/afe/models.py b/frontend/afe/models.py
index 836a773..053680d 100644
--- a/frontend/afe/models.py
+++ b/frontend/afe/models.py
@@ -322,6 +322,20 @@
         return active[0]
 
 
+    def set_attribute(self, attribute, value):
+        attribute_object = HostAttribute.objects.get_or_create(
+            host=self, attribute=attribute)[0]
+        attribute_object.value = value
+        attribute_object.save()
+
+
+    def delete_attribute(self, attribute):
+        try:
+            HostAttribute.objects.get(host=self, attribute=attribute).delete()
+        except HostAttribute.DoesNotExist:
+            pass
+
+
     class Meta:
         db_table = 'hosts'
 
@@ -340,6 +354,18 @@
         return self.hostname
 
 
+class HostAttribute(dbmodels.Model):
+    """Arbitrary keyvals associated with hosts."""
+    host = dbmodels.ForeignKey(Host)
+    attribute = dbmodels.CharField(maxlength=90)
+    value = dbmodels.CharField(maxlength=300)
+
+    objects = model_logic.ExtendedManager()
+
+    class Meta:
+        db_table = 'host_attributes'
+
+
 class Test(dbmodels.Model, model_logic.ModelExtensions):
     """\
     Required:
@@ -598,27 +624,6 @@
         return all_job_counts
 
 
-    def populate_dependencies(self, jobs):
-        if not jobs:
-            return
-        job_ids = ','.join(str(job['id']) for job in jobs)
-        cursor = connection.cursor()
-        cursor.execute("""
-            SELECT jobs.id, labels.name
-            FROM jobs
-            INNER JOIN jobs_dependency_labels
-              ON jobs.id = jobs_dependency_labels.job_id
-            INNER JOIN labels ON jobs_dependency_labels.label_id = labels.id
-            WHERE jobs.id IN (%s)
-            """ % job_ids)
-        job_dependencies = {}
-        for job_id, dependency in cursor.fetchall():
-            job_dependencies.setdefault(job_id, []).append(dependency)
-        for job in jobs:
-            dependencies = ','.join(job_dependencies.get(job['id'], []))
-            job['dependencies'] = dependencies
-
-
 class Job(dbmodels.Model, model_logic.ModelExtensions):
     """\
     owner: username of job owner
diff --git a/frontend/afe/rpc_interface.py b/frontend/afe/rpc_interface.py
index 7e83725..c95f231 100644
--- a/frontend/afe/rpc_interface.py
+++ b/frontend/afe/rpc_interface.py
@@ -122,6 +122,24 @@
     models.Host.smart_get(id).labels.remove(*labels)
 
 
+def set_host_attribute(attribute, value, **host_filter_data):
+    """
+    @param attribute string name of attribute
+    @param value string, or None to delete an attribute
+    @param host_filter_data filter data to apply to Hosts to choose hosts to act
+    upon
+    """
+    assert host_filter_data # disallow accidental actions on all hosts
+    hosts = models.Host.query_objects(host_filter_data)
+    models.AclGroup.check_for_acl_violation_hosts(hosts)
+
+    for host in hosts:
+        if value is None:
+            host.delete_attribute(attribute)
+        else:
+            host.set_attribute(attribute, value)
+
+
 def delete_host(id):
     models.Host.smart_get(id).delete()
 
@@ -137,13 +155,21 @@
     hosts = rpc_utils.get_host_query(multiple_labels,
                                      exclude_only_if_needed_labels,
                                      filter_data)
+    hosts = list(hosts)
+    models.Host.objects.populate_relationships(hosts, models.Label,
+                                               'label_list')
+    models.Host.objects.populate_relationships(hosts, models.AclGroup,
+                                               'acl_list')
+    models.Host.objects.populate_relationships(hosts, models.HostAttribute,
+                                               'attribute_list')
     host_dicts = []
     for host_obj in hosts:
         host_dict = host_obj.get_object_dict()
-        host_dict['labels'] = [label.name for label in host_obj.labels.all()]
-        platform = host_obj.platform()
-        host_dict['platform'] = platform and platform.name or None
-        host_dict['acls'] = [acl.name for acl in host_obj.aclgroup_set.all()]
+        host_dict['labels'] = [label.name for label in host_obj.label_list]
+        host_dict['platform'] = rpc_utils.find_platform(host_obj)
+        host_dict['acls'] = [acl.name for acl in host_obj.acl_list]
+        host_dict['attributes'] = dict((attribute.attribute, attribute.value)
+                                       for attribute in host_obj.attribute_list)
         host_dicts.append(host_dict)
     return rpc_utils.prepare_for_serialization(host_dicts)
 
@@ -448,9 +474,16 @@
     filter_data['extra_args'] = rpc_utils.extra_job_filters(not_yet_run,
                                                             running,
                                                             finished)
-    jobs = models.Job.list_objects(filter_data)
-    models.Job.objects.populate_dependencies(jobs)
-    return rpc_utils.prepare_for_serialization(jobs)
+    job_dicts = []
+    jobs = list(models.Job.query_objects(filter_data))
+    models.Job.objects.populate_relationships(jobs, models.Label,
+                                              'dependencies')
+    for job in jobs:
+        job_dict = job.get_object_dict()
+        job_dict['dependencies'] = ','.join(label.name
+                                            for label in job.dependencies)
+        job_dicts.append(job_dict)
+    return rpc_utils.prepare_for_serialization(job_dicts)
 
 
 def get_num_jobs(not_yet_run=False, running=False, finished=False,
diff --git a/frontend/afe/rpc_utils.py b/frontend/afe/rpc_utils.py
index 18c0c31..e04d372 100644
--- a/frontend/afe/rpc_utils.py
+++ b/frontend/afe/rpc_utils.py
@@ -443,3 +443,13 @@
     job.queue(all_host_objects, atomic_group=atomic_group,
               is_template=is_template)
     return job.id
+
+
+def find_platform(host):
+    platforms = [label.name for label in host.label_list if label.platform]
+    if not platforms:
+        return None
+    if len(platforms) > 1:
+        raise ValueError('Host %s has more than one platform: %s' %
+                         (host.hostname, ', '.join(platforms)))
+    return platforms[0]
diff --git a/frontend/migrations/033_add_host_attributes.py b/frontend/migrations/033_add_host_attributes.py
new file mode 100644
index 0000000..642ddf3
--- /dev/null
+++ b/frontend/migrations/033_add_host_attributes.py
@@ -0,0 +1,21 @@
+UP_SQL = """
+CREATE TABLE `host_attributes` (
+    `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
+    `host_id` integer NOT NULL,
+    `attribute` varchar(90) NOT NULL,
+    `value` varchar(300) NOT NULL,
+    FOREIGN KEY (host_id) REFERENCES hosts (id),
+    KEY (attribute)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
+"""
+
+DOWN_SQL = """
+DROP TABLE IF EXISTS host_attributes;
+"""
+
+def migrate_up(manager):
+    manager.execute_script(UP_SQL)
+
+
+def migrate_down(manager):
+    manager.execute_script(DOWN_SQL)