net: Embed hh_cache inside of struct neighbour.

Now that there is a one-to-one correspondance between neighbour
and hh_cache entries, we no longer need:

1) dynamic allocation
2) attachment to dst->hh
3) refcounting

Initialization of the hh_cache entry is indicated by hh_len
being non-zero, and such initialization is always done with
the neighbour's lock held as a writer.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7538237..5ccc0cb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -252,14 +252,7 @@
 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
 struct hh_cache {
-	atomic_t	hh_refcnt;	/* number of users                   */
-/*
- * We want hh_output, hh_len, hh_lock and hh_data be a in a separate
- * cache line on SMP.
- * They are mostly read, but hh_refcnt may be changed quite frequently,
- * incurring cache line ping pongs.
- */
-	u16		hh_len ____cacheline_aligned_in_smp;
+	u16		hh_len;
 	u16		__pad;
 	int		(*hh_output)(struct sk_buff *skb);
 	seqlock_t	hh_lock;
@@ -273,12 +266,6 @@
 	unsigned long	hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
 };
 
-static inline void hh_cache_put(struct hh_cache *hh)
-{
-	if (atomic_dec_and_test(&hh->hh_refcnt))
-		kfree(hh);
-}
-
 /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much.
  * Alternative is:
  *   dev->hard_header_len ? (dev->hard_header_len +
diff --git a/include/net/dst.h b/include/net/dst.h
index e12ddfb..0dd7ccb 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -38,7 +38,6 @@
 	unsigned long		expires;
 	struct dst_entry	*path;
 	struct neighbour	*neighbour;
-	struct hh_cache		*hh;
 #ifdef CONFIG_XFRM
 	struct xfrm_state	*xfrm;
 #else
@@ -47,6 +46,14 @@
 	int			(*input)(struct sk_buff*);
 	int			(*output)(struct sk_buff*);
 
+	int			flags;
+#define DST_HOST		0x0001
+#define DST_NOXFRM		0x0002
+#define DST_NOPOLICY		0x0004
+#define DST_NOHASH		0x0008
+#define DST_NOCACHE		0x0010
+#define DST_NOCOUNT		0x0020
+
 	short			error;
 	short			obsolete;
 	unsigned short		header_len;	/* more space at head required */
@@ -62,7 +69,7 @@
 	 * (L1_CACHE_SIZE would be too much)
 	 */
 #ifdef CONFIG_64BIT
-	long			__pad_to_align_refcnt[1];
+	long			__pad_to_align_refcnt[2];
 #endif
 	/*
 	 * __refcnt wants to be on a different cache line from
@@ -71,13 +78,6 @@
 	atomic_t		__refcnt;	/* client references	*/
 	int			__use;
 	unsigned long		lastuse;
-	int			flags;
-#define DST_HOST		0x0001
-#define DST_NOXFRM		0x0002
-#define DST_NOPOLICY		0x0004
-#define DST_NOHASH		0x0008
-#define DST_NOCACHE		0x0010
-#define DST_NOCOUNT		0x0020
 	union {
 		struct dst_entry	*next;
 		struct rtable __rcu	*rt_next;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 6fe8c2c..bd8f9f0 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -108,7 +108,7 @@
 	__u8			dead;
 	seqlock_t		ha_lock;
 	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
-	struct hh_cache		*hh;
+	struct hh_cache		hh;
 	int			(*output)(struct sk_buff *skb);
 	const struct neigh_ops	*ops;
 	struct rcu_head		rcu;