net: Embed hh_cache inside of struct neighbour.

Now that there is a one-to-one correspondance between neighbour
and hh_cache entries, we no longer need:

1) dynamic allocation
2) attachment to dst->hh
3) refcounting

Initialization of the hh_cache entry is indicated by hh_len
being non-zero, and such initialization is always done with
the neighbour's lock held as a writer.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7538237..5ccc0cb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -252,14 +252,7 @@
 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
 struct hh_cache {
-	atomic_t	hh_refcnt;	/* number of users                   */
-/*
- * We want hh_output, hh_len, hh_lock and hh_data be a in a separate
- * cache line on SMP.
- * They are mostly read, but hh_refcnt may be changed quite frequently,
- * incurring cache line ping pongs.
- */
-	u16		hh_len ____cacheline_aligned_in_smp;
+	u16		hh_len;
 	u16		__pad;
 	int		(*hh_output)(struct sk_buff *skb);
 	seqlock_t	hh_lock;
@@ -273,12 +266,6 @@
 	unsigned long	hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
 };
 
-static inline void hh_cache_put(struct hh_cache *hh)
-{
-	if (atomic_dec_and_test(&hh->hh_refcnt))
-		kfree(hh);
-}
-
 /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much.
  * Alternative is:
  *   dev->hard_header_len ? (dev->hard_header_len +