locking/rwsem: for rwsem prio aware enhancement

When add into waiting list will be prio aware,
lower prio value means higher priority task will get lock
before lower priority task.
Only try to preempt waiters with which task priority
which is higher than DEFAULT_PRIO.
To avoid starvation, add count to record how many waiters
preempt to queue in wait list. If preempt count is exceed
MAX_PREEMPT_ALLOWED, use simple FIFO to queue in the wait
list until the wait list is empty.

Change-Id: I4d5fe6a823a16c9762e2e2f416d34bdd701341c4
Signed-off-by: Maria Yu <aiquny@codeaurora.org>
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a4112df..15be517 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -87,21 +87,13 @@
 	sem->owner = NULL;
 	osq_lock_init(&sem->osq);
 #endif
+#ifdef CONFIG_RWSEM_PRIO_AWARE
+	sem->m_count = 0;
+#endif
 }
 
 EXPORT_SYMBOL(__init_rwsem);
 
-enum rwsem_waiter_type {
-	RWSEM_WAITING_FOR_WRITE,
-	RWSEM_WAITING_FOR_READ
-};
-
-struct rwsem_waiter {
-	struct list_head list;
-	struct task_struct *task;
-	enum rwsem_waiter_type type;
-};
-
 enum rwsem_wake_type {
 	RWSEM_WAKE_ANY,		/* Wake whatever's at head of wait list */
 	RWSEM_WAKE_READERS,	/* Wake readers only */
@@ -226,6 +218,7 @@
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk = current;
 	WAKE_Q(wake_q);
+	bool is_first_waiter = false;
 
 	waiter.task = tsk;
 	waiter.type = RWSEM_WAITING_FOR_READ;
@@ -233,7 +226,9 @@
 	raw_spin_lock_irq(&sem->wait_lock);
 	if (list_empty(&sem->wait_list))
 		adjustment += RWSEM_WAITING_BIAS;
-	list_add_tail(&waiter.list, &sem->wait_list);
+
+	/* is_first_waiter == true means we are first in the queue */
+	is_first_waiter = rwsem_list_add_per_prio(&waiter, sem);
 
 	/* we're now waiting on the lock, but no longer actively locking */
 	count = atomic_long_add_return(adjustment, &sem->count);
@@ -246,7 +241,8 @@
 	 */
 	if (count == RWSEM_WAITING_BIAS ||
 	    (count > RWSEM_WAITING_BIAS &&
-	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
+	     (adjustment != -RWSEM_ACTIVE_READ_BIAS ||
+	     is_first_waiter)))
 		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irq(&sem->wait_lock);
@@ -462,6 +458,7 @@
 	struct rwsem_waiter waiter;
 	struct rw_semaphore *ret = sem;
 	WAKE_Q(wake_q);
+	bool is_first_waiter = false;
 
 	/* undo write bias from down_write operation, stop active locking */
 	count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
@@ -483,7 +480,11 @@
 	if (list_empty(&sem->wait_list))
 		waiting = false;
 
-	list_add_tail(&waiter.list, &sem->wait_list);
+	/*
+	 * is_first_waiter == true means we are first in the queue,
+	 * so there is no read locks that were queued ahead of us.
+	 */
+	is_first_waiter = rwsem_list_add_per_prio(&waiter, sem);
 
 	/* we're now waiting on the lock, but no longer actively locking */
 	if (waiting) {
@@ -494,7 +495,7 @@
 		 * no active writers, the lock must be read owned; so we try to
 		 * wake any read locks that were queued ahead of us.
 		 */
-		if (count > RWSEM_WAITING_BIAS) {
+		if (!is_first_waiter && count > RWSEM_WAITING_BIAS) {
 			WAKE_Q(wake_q);
 
 			__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);