xen: events: Process event channels notifications in round-robin order.

Avoids fairness issue resulting from domain 0 processing lowest
numbered event channel first.

Fixes bug #1115 "Event channel port scanning unfair".

Bugzilla: http://bugzilla.xensource.com/bugzilla/show_bug.cgi?id=1115
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
[ijc: forward ported from linux-2.6.18-xen.hg 324:7fe1c6d02a2b
 various variables have different names in this tree:
  l1  -> pending_words
  l2  -> pending_bits
  l1i -> word_idx
  l2i -> bit_idx]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 6befe62..75cc6f5 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1028,6 +1028,11 @@
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
 
 /*
+ * Mask out the i least significant bits of w
+ */
+#define MASK_LSBS(w, i) (w & ((~0UL) << i))
+
+/*
  * Search the CPUs pending events bitmasks.  For each one found, map
  * the event number to an irq, and feed it into do_IRQ() for
  * handling.
@@ -1038,6 +1043,9 @@
  */
 static void __xen_evtchn_do_upcall(void)
 {
+	static unsigned int last_word_idx = BITS_PER_LONG - 1;
+	static unsigned int last_bit_idx = BITS_PER_LONG - 1;
+	int word_idx, bit_idx;
 	int cpu = get_cpu();
 	struct shared_info *s = HYPERVISOR_shared_info;
 	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
@@ -1056,17 +1064,50 @@
 		wmb();
 #endif
 		pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+		word_idx = last_word_idx;
+		bit_idx = last_bit_idx;
+
 		while (pending_words != 0) {
 			unsigned long pending_bits;
-			int word_idx = __ffs(pending_words);
-			pending_words &= ~(1UL << word_idx);
+			unsigned long words;
 
-			while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
-				int bit_idx = __ffs(pending_bits);
-				int port = (word_idx * BITS_PER_LONG) + bit_idx;
-				int irq = evtchn_to_irq[port];
+			word_idx = (word_idx + 1) % BITS_PER_LONG;
+			words = MASK_LSBS(pending_words, word_idx);
+
+			/*
+			 * If we masked out all events, wrap around to the
+			 * beginning.
+			 */
+			if (words == 0) {
+				word_idx = BITS_PER_LONG - 1;
+				bit_idx = BITS_PER_LONG - 1;
+				continue;
+			}
+			word_idx = __ffs(words);
+
+			do {
+				unsigned long bits;
+				int port, irq;
 				struct irq_desc *desc;
 
+				pending_bits = active_evtchns(cpu, s, word_idx);
+
+				bit_idx = (bit_idx + 1) % BITS_PER_LONG;
+				bits = MASK_LSBS(pending_bits, bit_idx);
+
+				/* If we masked out all events, move on. */
+				if (bits == 0) {
+					bit_idx = BITS_PER_LONG - 1;
+					break;
+				}
+
+				bit_idx = __ffs(bits);
+
+				/* Process port. */
+				port = (word_idx * BITS_PER_LONG) + bit_idx;
+				irq = evtchn_to_irq[port];
+
 				mask_evtchn(port);
 				clear_evtchn(port);
 
@@ -1075,7 +1116,24 @@
 					if (desc)
 						generic_handle_irq_desc(irq, desc);
 				}
-			}
+
+				/*
+				 * If this is the final port processed, we'll
+				 * pick up here+1 next time.
+				 */
+				last_word_idx = word_idx;
+				last_bit_idx = bit_idx;
+
+			} while (bit_idx != BITS_PER_LONG - 1);
+
+			pending_bits = active_evtchns(cpu, s, word_idx);
+
+			/*
+			 * We handled all ports, so we can clear the
+			 * selector bit.
+			 */
+			if (pending_bits == 0)
+				pending_words &= ~(1UL << word_idx);
 		}
 
 		BUG_ON(!irqs_disabled());