[SK_BUFF]: Convert skb->tail to sk_buff_data_t

So that it is also an offset from skb->head, reduces its size from 8 to 4 bytes
on 64bit architectures, allowing us to combine the 4 bytes hole left by the
layer headers conversion, reducing struct sk_buff size to 256 bytes, i.e. 4
64byte cachelines, and since the sk_buff slab cache is SLAB_HWCACHE_ALIGN...
:-)

Many calculations that previously required that skb->{transport,network,
mac}_header be first converted to a pointer now can be done directly, being
meaningful as offsets or pointers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 4a629ea..3a4cb24 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -605,7 +605,7 @@
 
 #define RTA_PUT_NOHDR(skb, attrlen, data) \
 ({	RTA_APPEND(skb, RTA_ALIGN(attrlen), data); \
-	memset(skb->tail - (RTA_ALIGN(attrlen) - attrlen), 0, \
+	memset(skb_tail_pointer(skb) - (RTA_ALIGN(attrlen) - attrlen), 0, \
 	       RTA_ALIGN(attrlen) - attrlen); })
 
 #define RTA_PUT_U8(skb, attrtype, value) \
@@ -637,12 +637,12 @@
 	RTA_PUT(skb, attrtype, 0, NULL);
 
 #define RTA_NEST(skb, type) \
-({	struct rtattr *__start = (struct rtattr *) (skb)->tail; \
+({	struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \
 	RTA_PUT(skb, type, 0, NULL); \
 	__start;  })
 
 #define RTA_NEST_END(skb, start) \
-({	(start)->rta_len = ((skb)->tail - (unsigned char *) (start)); \
+({	(start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
 	(skb)->len; })
 
 #define RTA_NEST_CANCEL(skb, start) \