net: QDISC_STATE_RUNNING dont need atomic bit ops

__QDISC_STATE_RUNNING is always changed while qdisc lock is held.

We can avoid two atomic operations in xmit path, if we move this bit in
a new __state container.

Location of this __state container is carefully chosen so that fast path
only dirties one qdisc cache line.

THROTTLED bit could later be moved into this __state location too, to
avoid dirtying first qdisc cache line.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9707dae..b3591e4a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,11 +23,17 @@
 };
 
 enum qdisc_state_t {
-	__QDISC_STATE_RUNNING,
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
 };
 
+/*
+ * following bits are only changed while qdisc lock is held
+ */
+enum qdisc___state_t {
+	__QDISC___STATE_RUNNING,
+};
+
 struct qdisc_size_table {
 	struct list_head	list;
 	struct tc_sizespec	szopts;
@@ -72,23 +78,24 @@
 	unsigned long		state;
 	struct sk_buff_head	q;
 	struct gnet_stats_basic_packed bstats;
+	unsigned long		__state;
 	struct gnet_stats_queue	qstats;
 	struct rcu_head     rcu_head;
 };
 
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
 {
-	return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+	return test_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
 }
 
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
-	return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+	return !__test_and_set_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
 }
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
-	clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+	__clear_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
 }
 
 struct Qdisc_class_ops {