xfrm: Move dst->path into struct xfrm_dst

The first member of an IPSEC route bundle chain sets it's dst->path to
the underlying ipv4/ipv6 route that carries the bundle.

Stated another way, if one were to follow the xfrm_dst->child chain of
the bundle, the final non-NULL pointer would be the path and point to
either an ipv4 or an ipv6 route.

This is largely used to make sure that PMTU events propagate down to
the correct ipv4 or ipv6 route.

When we don't have the top of an IPSEC bundle 'dst->path == dst'.

Move it down into xfrm_dst and key off of dst->xfrm.

Signed-off-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
diff --git a/include/net/dst.h b/include/net/dst.h
index 13c839d..424bff6 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -38,7 +38,6 @@
 	struct  dst_ops	        *ops;
 	unsigned long		_metrics;
 	unsigned long           expires;
-	struct dst_entry	*path;
 #ifdef CONFIG_XFRM
 	struct xfrm_state	*xfrm;
 #else
@@ -87,7 +86,7 @@
 	 * Align __refcnt to a 64 bytes alignment
 	 * (L1_CACHE_SIZE would be too much)
 	 */
-	long			__pad_to_align_refcnt[4];
+	long			__pad_to_align_refcnt[5];
 #endif
 	/*
 	 * __refcnt wants to be on a different cache line from
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0009dab..1ec0c47 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -985,6 +985,7 @@
 	} u;
 	struct dst_entry *route;
 	struct dst_entry *child;
+	struct dst_entry *path;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols, num_xfrms;
 	u32 xfrm_genid;
@@ -995,6 +996,18 @@
 	u32 path_cookie;
 };
 
+static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
+{
+#ifdef CONFIG_XFRM
+	if (dst->xfrm) {
+		const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
+
+		return xdst->path;
+	}
+#endif
+	return (struct dst_entry *) dst;
+}
+
 static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
 {
 #ifdef CONFIG_XFRM
@@ -1889,7 +1902,7 @@
 		return false;
 
 	xdst = (struct xfrm_dst *) dst;
-	if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) &&
+	if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
 	    !xdst->child->xfrm)
 		return true;
 
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 20cbb72..8e2d7cf 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -78,7 +78,6 @@
 
 	atomic_set(&rt->dst.__refcnt, 1);
 	rt->dst.dev = br->dev;
-	rt->dst.path = &rt->dst;
 	dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
 	rt->dst.flags	= DST_NOXFRM | DST_FAKE_RTABLE;
 	rt->dst.ops = &fake_dst_ops;
diff --git a/net/core/dst.c b/net/core/dst.c
index cf2076c..9bc3bb6 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -69,7 +69,6 @@
 	dst->ops = ops;
 	dst_init_metrics(dst, dst_default_metrics.metrics, true);
 	dst->expires = 0UL;
-	dst->path = dst;
 #ifdef CONFIG_XFRM
 	dst->xfrm = NULL;
 #endif
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 43b69af..f0ed031 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1106,7 +1106,7 @@
 		new = true;
 	}
 
-	__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
+	__ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
 
 	if (!dst_check(&rt->dst, 0)) {
 		if (new)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a41..176d74f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1201,13 +1201,13 @@
 		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
 	else
 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+		      rt->dst.dev->mtu : dst_mtu(xfrm_dst_path(&rt->dst));
 	if (np->frag_size < mtu) {
 		if (np->frag_size)
 			mtu = np->frag_size;
 	}
 	cork->base.fragsize = mtu;
-	if (dst_allfrag(rt->dst.path))
+	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
 		cork->base.flags |= IPCORK_ALLFRAG;
 	cork->base.length = 0;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1f1ef1e..46fd53b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4596,8 +4596,6 @@
 					   GFP_KERNEL);
 	if (!net->ipv6.ip6_null_entry)
 		goto out_ip6_dst_entries;
-	net->ipv6.ip6_null_entry->dst.path =
-		(struct dst_entry *)net->ipv6.ip6_null_entry;
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
 			 ip6_template_metrics, true);
@@ -4609,8 +4607,6 @@
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_prohibit_entry)
 		goto out_ip6_null_entry;
-	net->ipv6.ip6_prohibit_entry->dst.path =
-		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
 			 ip6_template_metrics, true);
@@ -4620,8 +4616,6 @@
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_blk_hole_entry)
 		goto out_ip6_prohibit_entry;
-	net->ipv6.ip6_blk_hole_entry->dst.path =
-		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
 			 ip6_template_metrics, true);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index c5851dd..c61a7d4 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -120,7 +120,7 @@
 	if (!x->type_offload || x->encap)
 		return false;
 
-	if ((x->xso.offload_handle && (dev == dst->path->dev)) &&
+	if ((x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev)) &&
 	     !xdst->child->xfrm && x->type->get_mtu) {
 		mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 58b3ecb..da1b41e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1626,7 +1626,7 @@
 	}
 
 	xfrm_dst_set_child(xdst_prev, dst);
-	xdst0->u.dst.path = dst;
+	xdst0->path = dst;
 
 	err = -ENODEV;
 	dev = dst->dev;
@@ -1879,8 +1879,8 @@
 	xfrm_decode_session(skb, &fl, dst->ops->family);
 	spin_unlock(&pq->hold_queue.lock);
 
-	dst_hold(dst->path);
-	dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
+	dst_hold(xfrm_dst_path(dst));
+	dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
 	if (IS_ERR(dst))
 		goto purge_queue;
 
@@ -1909,8 +1909,8 @@
 		skb = __skb_dequeue(&list);
 
 		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
-		dst_hold(skb_dst(skb)->path);
-		dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
+		dst_hold(xfrm_dst_path(skb_dst(skb)));
+		dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
 		if (IS_ERR(dst)) {
 			kfree_skb(skb);
 			continue;
@@ -2012,7 +2012,7 @@
 
 	dst_hold(dst);
 	xfrm_dst_set_child(xdst, dst);
-	dst1->path = dst;
+	xdst->path = dst;
 
 	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
 
@@ -2630,7 +2630,7 @@
 	struct xfrm_dst *last;
 	u32 mtu;
 
-	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
+	if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) ||
 	    (dst->dev && !netif_running(dst->dev)))
 		return 0;
 
@@ -2691,22 +2691,20 @@
 
 static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
 {
-	return dst_metric_advmss(dst->path);
+	return dst_metric_advmss(xfrm_dst_path(dst));
 }
 
 static unsigned int xfrm_mtu(const struct dst_entry *dst)
 {
 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 
-	return mtu ? : dst_mtu(dst->path);
+	return mtu ? : dst_mtu(xfrm_dst_path(dst));
 }
 
 static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
 					const void *daddr)
 {
-	const struct dst_entry *path = dst->path;
-
-	for (; dst != path; dst = xfrm_dst_child(dst)) {
+	while (dst->xfrm) {
 		const struct xfrm_state *xfrm = dst->xfrm;
 
 		if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
@@ -2715,6 +2713,8 @@
 			daddr = xfrm->coaddr;
 		else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
 			daddr = &xfrm->id.daddr;
+
+		dst = xfrm_dst_child(dst);
 	}
 	return daddr;
 }
@@ -2723,7 +2723,7 @@
 					   struct sk_buff *skb,
 					   const void *daddr)
 {
-	const struct dst_entry *path = dst->path;
+	const struct dst_entry *path = xfrm_dst_path(dst);
 
 	if (!skb)
 		daddr = xfrm_get_dst_nexthop(dst, daddr);
@@ -2732,7 +2732,7 @@
 
 static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 {
-	const struct dst_entry *path = dst->path;
+	const struct dst_entry *path = xfrm_dst_path(dst);
 
 	daddr = xfrm_get_dst_nexthop(dst, daddr);
 	path->ops->confirm_neigh(path, daddr);