ipv4: use a 64bit load/store in output path

gcc compiler is smart enough to use a single load/store if we
memcpy(dptr, sptr, 8) on x86_64, regardless of
CONFIG_CC_OPTIMIZE_FOR_SIZE

In IP header, daddr immediately follows saddr, this wont change in the
future. We only need to make sure our flowi4 (saddr,daddr) fields wont
break the rule.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/flow.h b/include/net/flow.h
index a094477..9192d69 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -59,8 +59,11 @@
 #define flowi4_proto		__fl_common.flowic_proto
 #define flowi4_flags		__fl_common.flowic_flags
 #define flowi4_secid		__fl_common.flowic_secid
-	__be32			daddr;
+
+	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
+	__be32			daddr;
+
 	union flowi_uli		uli;
 #define fl4_sport		uli.ports.sport
 #define fl4_dport		uli.ports.dport