ipv6: take dst->__refcnt for insertion into fib6 tree

In IPv6 routing code, struct rt6_info is created for each static route
and RTF_CACHE route and inserted into fib6 tree. In both cases, dst
ref count is not taken.
As explained in the previous patch, this leads to the need of the dst
garbage collector.

This patch holds ref count of dst before inserting the route into fib6
tree and properly releases the dst when deleting it from the fib6 tree
as a preparation in order to fully get rid of dst gc later.

Also, correct fib6_age() logic to check dst->__refcnt to be 1 to indicate
no user is referencing the dst.

And remove dst_hold() in vrf_rt6_create() as ip6_dst_alloc() already puts
dst->__refcnt to 1.

Signed-off-by: Wei Wang <weiwan@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bc1bc91b..908b711 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -354,7 +354,7 @@
 					int flags)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
-					0, DST_OBSOLETE_FORCE_CHK, flags);
+					1, DST_OBSOLETE_FORCE_CHK, flags);
 
 	if (rt)
 		rt6_info_init(rt);
@@ -381,7 +381,9 @@
 				*p =  NULL;
 			}
 		} else {
-			dst_destroy((struct dst_entry *)rt);
+			dst_release(&rt->dst);
+			if (!(flags & DST_NOCACHE))
+				dst_destroy((struct dst_entry *)rt);
 			return NULL;
 		}
 	}
@@ -932,9 +934,9 @@
 EXPORT_SYMBOL(rt6_lookup);
 
 /* ip6_ins_rt is called with FREE table->tb6_lock.
-   It takes new route entry, the addition fails by any reason the
-   route is freed. In any case, if caller does not hold it, it may
-   be destroyed.
+ * It takes new route entry, the addition fails by any reason the
+ * route is released.
+ * Caller must hold dst before calling it.
  */
 
 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
@@ -957,6 +959,8 @@
 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
 	struct mx6_config mxc = { .mx = NULL, };
 
+	/* Hold dst to account for the reference from the fib6 tree */
+	dst_hold(&rt->dst);
 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
 }
 
@@ -1049,6 +1053,7 @@
 		prev = cmpxchg(p, NULL, pcpu_rt);
 		if (prev) {
 			/* If someone did it before us, return prev instead */
+			dst_release(&pcpu_rt->dst);
 			dst_destroy(&pcpu_rt->dst);
 			pcpu_rt = prev;
 		}
@@ -1059,6 +1064,7 @@
 		 * since rt is going away anyway.  The next
 		 * dst_check() will trigger a re-lookup.
 		 */
+		dst_release(&pcpu_rt->dst);
 		dst_destroy(&pcpu_rt->dst);
 		pcpu_rt = rt;
 	}
@@ -1129,12 +1135,15 @@
 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
 		dst_release(&rt->dst);
 
-		if (uncached_rt)
+		if (uncached_rt) {
+			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
+			 * No need for another dst_hold()
+			 */
 			rt6_uncached_list_add(uncached_rt);
-		else
+		} else {
 			uncached_rt = net->ipv6.ip6_null_entry;
-
-		dst_hold(&uncached_rt->dst);
+			dst_hold(&uncached_rt->dst);
+		}
 
 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
 		return uncached_rt;
@@ -1422,6 +1431,10 @@
 			 * invalidate the sk->sk_dst_cache.
 			 */
 			ip6_ins_rt(nrt6);
+			/* Release the reference taken in
+			 * ip6_rt_cache_alloc()
+			 */
+			dst_release(&nrt6->dst);
 		}
 	}
 }
@@ -1673,7 +1686,6 @@
 
 	rt->dst.flags |= DST_HOST;
 	rt->dst.output  = ip6_output;
-	atomic_set(&rt->dst.__refcnt, 1);
 	rt->rt6i_gateway  = fl6->daddr;
 	rt->rt6i_dst.addr = fl6->daddr;
 	rt->rt6i_dst.plen = 128;
@@ -2130,8 +2142,10 @@
 		dev_put(dev);
 	if (idev)
 		in6_dev_put(idev);
-	if (rt)
+	if (rt) {
+		dst_release(&rt->dst);
 		dst_free(&rt->dst);
+	}
 
 	return ERR_PTR(err);
 }
@@ -2160,8 +2174,10 @@
 
 	return err;
 out:
-	if (rt)
+	if (rt) {
+		dst_release(&rt->dst);
 		dst_free(&rt->dst);
+	}
 
 	return err;
 }
@@ -2398,7 +2414,7 @@
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
 	if (ip6_ins_rt(nrt))
-		goto out;
+		goto out_release;
 
 	netevent.old = &rt->dst;
 	netevent.new = &nrt->dst;
@@ -2411,6 +2427,12 @@
 		ip6_del_rt(rt);
 	}
 
+out_release:
+	/* Release the reference taken in
+	 * ip6_rt_cache_alloc()
+	 */
+	dst_release(&nrt->dst);
+
 out:
 	neigh_release(neigh);
 }
@@ -2760,8 +2782,6 @@
 	rt->rt6i_table = fib6_get_table(net, tb_id);
 	rt->dst.flags |= DST_NOCACHE;
 
-	atomic_set(&rt->dst.__refcnt, 1);
-
 	return rt;
 }
 
@@ -3186,6 +3206,7 @@
 
 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
 		if (err) {
+			dst_release(&rt->dst);
 			dst_free(&rt->dst);
 			goto cleanup;
 		}
@@ -3249,8 +3270,10 @@
 
 cleanup:
 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
-		if (nh->rt6_info)
+		if (nh->rt6_info) {
+			dst_release(&nh->rt6_info->dst);
 			dst_free(&nh->rt6_info->dst);
+		}
 		kfree(nh->mxc.mx);
 		list_del(&nh->next);
 		kfree(nh);