[NET]: Supporting UDP-Lite (RFC 3828) in Linux

This is a revision of the previously submitted patch, which alters
the way files are organized and compiled in the following manner:

	* UDP and UDP-Lite now use separate object files
	* source file dependencies resolved via header files
	  net/ipv{4,6}/udp_impl.h
	* order of inclusion files in udp.c/udplite.c adapted
	  accordingly

[NET/IPv4]: Support for the UDP-Lite protocol (RFC 3828)

This patch adds support for UDP-Lite to the IPv4 stack, provided as an
extension to the existing UDPv4 code:
        * generic routines are all located in net/ipv4/udp.c
        * UDP-Lite specific routines are in net/ipv4/udplite.c
        * MIB/statistics support in /proc/net/snmp and /proc/net/udplite
        * shared API with extensions for partial checksum coverage

[NET/IPv6]: Extension for UDP-Lite over IPv6

It extends the existing UDPv6 code base with support for UDP-Lite
in the same manner as per UDPv4. In particular,
        * UDPv6 generic and shared code is in net/ipv6/udp.c
        * UDP-Litev6 specific extensions are in net/ipv6/udplite.c
        * MIB/statistics support in /proc/net/snmp6 and /proc/net/udplite6
        * support for IPV6_ADDRFORM
        * aligned the coding style of protocol initialisation with af_inet6.c
        * made the error handling in udpv6_queue_rcv_skb consistent;
          to return `-1' on error on all error cases
        * consolidation of shared code

[NET]: UDP-Lite Documentation and basic XFRM/Netfilter support

The UDP-Lite patch further provides
        * API documentation for UDP-Lite
        * basic xfrm support
        * basic netfilter support for IPv4 and IPv6 (LOG target)

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/in.h b/include/linux/in.h
index 2619859..1912e7c 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -45,6 +45,7 @@
 
   IPPROTO_COMP   = 108,                /* Compression Header protocol */
   IPPROTO_SCTP   = 132,		/* Stream Control Transport Protocol	*/
+  IPPROTO_UDPLITE = 136,	/* UDP-Lite (RFC 3828)			*/
 
   IPPROTO_RAW	 = 255,		/* Raw IP packets			*/
   IPPROTO_MAX
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 3614090..592b666 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -264,6 +264,7 @@
 #define SOL_IPV6	41
 #define SOL_ICMPV6	58
 #define SOL_SCTP	132
+#define SOL_UDPLITE	136     /* UDP-Lite (RFC 3828) */
 #define SOL_RAW		255
 #define SOL_IPX		256
 #define SOL_AX25	257
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 014b41d..564f3b0 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -38,6 +38,7 @@
 #include <linux/types.h>
 
 #include <net/inet_sock.h>
+#define UDP_HTABLE_SIZE		128
 
 struct udp_sock {
 	/* inet_sock has to be the first member */
@@ -50,12 +51,23 @@
 	 * when the socket is uncorked.
 	 */
 	__u16		 len;		/* total length of pending frames */
+	/*
+	 * Fields specific to UDP-Lite.
+	 */
+	__u16		 pcslen;
+	__u16		 pcrlen;
+/* indicator bits used by pcflag: */
+#define UDPLITE_BIT      0x1  		/* set by udplite proto init function */
+#define UDPLITE_SEND_CC  0x2  		/* set via udplite setsockopt         */
+#define UDPLITE_RECV_CC  0x4		/* set via udplite setsocktopt        */
+	__u8		 pcflag;        /* marks socket as UDP-Lite if > 0    */
 };
 
 static inline struct udp_sock *udp_sk(const struct sock *sk)
 {
 	return (struct udp_sock *)sk;
 }
+#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)
 
 #endif
 
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 3c266ad..9390649 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -158,9 +158,13 @@
 	SNMP_INC_STATS_OFFSET_BH(icmpv6_statistics, field, _offset);    	\
 })
 DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
-#define UDP6_INC_STATS(field)		SNMP_INC_STATS(udp_stats_in6, field)
-#define UDP6_INC_STATS_BH(field)	SNMP_INC_STATS_BH(udp_stats_in6, field)
-#define UDP6_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_stats_in6, field)
+DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
+#define UDP6_INC_STATS_BH(field, is_udplite) 			      do  {  \
+	if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field);         \
+	else		SNMP_INC_STATS_BH(udp_stats_in6, field);    } while(0)
+#define UDP6_INC_STATS_USER(field, is_udplite)			       do {    \
+	if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field);         \
+	else		SNMP_INC_STATS_USER(udp_stats_in6, field);    } while(0)
 
 int snmp6_register_dev(struct inet6_dev *idev);
 int snmp6_unregister_dev(struct inet6_dev *idev);
@@ -604,6 +608,8 @@
 extern void tcp6_proc_exit(void);
 extern int  udp6_proc_init(void);
 extern void udp6_proc_exit(void);
+extern int  udplite6_proc_init(void);
+extern void udplite6_proc_exit(void);
 extern int  ipv6_misc_proc_init(void);
 extern void ipv6_misc_proc_exit(void);
 
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index 61f724c..409da3a 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -11,6 +11,7 @@
 
 extern struct proto rawv6_prot;
 extern struct proto udpv6_prot;
+extern struct proto udplitev6_prot;
 extern struct proto tcpv6_prot;
 
 struct flowi;
@@ -24,6 +25,7 @@
 /* transport protocols */
 extern void				rawv6_init(void);
 extern void				udpv6_init(void);
+extern void 				udplitev6_init(void);
 extern void				tcpv6_init(void);
 
 extern int				udpv6_connect(struct sock *sk,
diff --git a/include/net/udp.h b/include/net/udp.h
index db0c05f..4f06267 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -26,9 +26,28 @@
 #include <net/inet_sock.h>
 #include <net/sock.h>
 #include <net/snmp.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
 #include <linux/seq_file.h>
 
-#define UDP_HTABLE_SIZE		128
+/**
+ *	struct udp_skb_cb  -  UDP(-Lite) private variables
+ *
+ *	@header:      private variables used by IPv4/IPv6
+ *	@cscov:       checksum coverage length (UDP-Lite only)
+ *	@partial_cov: if set indicates partial csum coverage
+ */
+struct udp_skb_cb {
+	union {
+		struct inet_skb_parm	h4;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		struct inet6_skb_parm	h6;
+#endif
+	} header;
+	__u16		cscov;
+	__u8		partial_cov;
+};
+#define UDP_SKB_CB(__skb)	((struct udp_skb_cb *)((__skb)->cb))
 
 extern struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 extern rwlock_t udp_hash_lock;
@@ -47,6 +66,62 @@
 
 struct sk_buff;
 
+/*
+ *	Generic checksumming routines for UDP(-Lite) v4 and v6
+ */
+static inline u16  __udp_lib_checksum_complete(struct sk_buff *skb)
+{
+	if (! UDP_SKB_CB(skb)->partial_cov)
+		return __skb_checksum_complete(skb);
+	return  csum_fold(skb_checksum(skb, 0, UDP_SKB_CB(skb)->cscov,
+			  skb->csum));
+}
+
+static __inline__ int udp_lib_checksum_complete(struct sk_buff *skb)
+{
+	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+		__udp_lib_checksum_complete(skb);
+}
+
+/**
+ * 	udp_csum_outgoing  -  compute UDPv4/v6 checksum over fragments
+ * 	@sk: 	socket we are writing to
+ * 	@skb: 	sk_buff containing the filled-in UDP header
+ * 	        (checksum field must be zeroed out)
+ */
+static inline u32 udp_csum_outgoing(struct sock *sk, struct sk_buff *skb)
+{
+	u32 csum = csum_partial(skb->h.raw, sizeof(struct udphdr), 0);
+
+	skb_queue_walk(&sk->sk_write_queue, skb) {
+		csum = csum_add(csum, skb->csum);
+	}
+	return csum;
+}
+
+/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
+static inline void udp_lib_hash(struct sock *sk)
+{
+	BUG();
+}
+
+static inline void udp_lib_unhash(struct sock *sk)
+{
+	write_lock_bh(&udp_hash_lock);
+	if (sk_del_node_init(sk)) {
+		inet_sk(sk)->num = 0;
+		sock_prot_dec_use(sk->sk_prot);
+	}
+	write_unlock_bh(&udp_hash_lock);
+}
+
+static inline void udp_lib_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+
+/* net/ipv4/udp.c */
 extern int	udp_get_port(struct sock *sk, unsigned short snum,
 			     int (*saddr_cmp)(const struct sock *, const struct sock *));
 extern void	udp_err(struct sk_buff *, u32);
@@ -61,21 +136,29 @@
 			     poll_table *wait);
 
 DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
-#define UDP_INC_STATS(field)		SNMP_INC_STATS(udp_statistics, field)
-#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(udp_statistics, field)
-#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_statistics, field)
+/*
+ * 	SNMP statistics for UDP and UDP-Lite
+ */
+#define UDP_INC_STATS_USER(field, is_udplite)			       do {   \
+	if (is_udplite) SNMP_INC_STATS_USER(udplite_statistics, field);       \
+	else		SNMP_INC_STATS_USER(udp_statistics, field);  }  while(0)
+#define UDP_INC_STATS_BH(field, is_udplite) 			       do  {  \
+	if (is_udplite) SNMP_INC_STATS_BH(udplite_statistics, field);         \
+	else		SNMP_INC_STATS_BH(udp_statistics, field);    }  while(0)
 
 /* /proc */
 struct udp_seq_afinfo {
 	struct module		*owner;
 	char			*name;
 	sa_family_t		family;
+	struct hlist_head	*hashtable;
 	int 			(*seq_show) (struct seq_file *m, void *v);
 	struct file_operations	*seq_fops;
 };
 
 struct udp_iter_state {
 	sa_family_t		family;
+	struct hlist_head	*hashtable;
 	int			bucket;
 	struct seq_operations	seq_ops;
 };
diff --git a/include/net/udplite.h b/include/net/udplite.h
new file mode 100644
index 0000000..1473b3e
--- /dev/null
+++ b/include/net/udplite.h
@@ -0,0 +1,149 @@
+/*
+ *	Definitions for the UDP-Lite (RFC 3828) code.
+ */
+#ifndef _UDPLITE_H
+#define _UDPLITE_H
+
+/* UDP-Lite socket options */
+#define UDPLITE_SEND_CSCOV   10 /* sender partial coverage (as sent)      */
+#define UDPLITE_RECV_CSCOV   11 /* receiver partial coverage (threshold ) */
+
+extern struct proto 		udplite_prot;
+extern struct hlist_head 	udplite_hash[UDP_HTABLE_SIZE];
+
+/* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */
+DECLARE_SNMP_STAT(struct udp_mib, udplite_statistics);
+
+/*
+ *	Checksum computation is all in software, hence simpler getfrag.
+ */
+static __inline__ int udplite_getfrag(void *from, char *to, int  offset,
+				      int len, int odd, struct sk_buff *skb)
+{
+	return memcpy_fromiovecend(to, (struct iovec *) from, offset, len);
+}
+
+/* Designate sk as UDP-Lite socket */
+static inline int udplite_sk_init(struct sock *sk)
+{
+	udp_sk(sk)->pcflag = UDPLITE_BIT;
+	return 0;
+}
+
+/*
+ * 	Checksumming routines
+ */
+static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
+{
+	u16 cscov;
+
+        /* In UDPv4 a zero checksum means that the transmitter generated no
+         * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets
+         * with a zero checksum field are illegal.                            */
+	if (uh->check == 0) {
+		LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: zeroed checksum field\n");
+		return 1;
+	}
+
+        UDP_SKB_CB(skb)->partial_cov = 0;
+	cscov = ntohs(uh->len);
+
+	if (cscov == 0)		 /* Indicates that full coverage is required. */
+		cscov = skb->len;
+	else if (cscov < 8  || cscov > skb->len) {
+		/*
+		 * Coverage length violates RFC 3828: log and discard silently.
+		 */
+		LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: bad csum coverage %d/%d\n",
+			       cscov, skb->len);
+		return 1;
+
+	} else if (cscov < skb->len)
+        	UDP_SKB_CB(skb)->partial_cov = 1;
+
+        UDP_SKB_CB(skb)->cscov = cscov;
+
+	/*
+	 * There is no known NIC manufacturer supporting UDP-Lite yet,
+	 * hence ip_summed is always (re-)set to CHECKSUM_NONE.
+	 */
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return 0;
+}
+
+static __inline__ int udplite4_csum_init(struct sk_buff *skb, struct udphdr *uh)
+{
+	int rc = udplite_checksum_init(skb, uh);
+
+	if (!rc)
+		skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
+					       skb->nh.iph->daddr,
+					       skb->len, IPPROTO_UDPLITE, 0);
+	return rc;
+}
+
+static __inline__ int udplite6_csum_init(struct sk_buff *skb, struct udphdr *uh)
+{
+	int rc = udplite_checksum_init(skb, uh);
+
+	if (!rc)
+		skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+					     &skb->nh.ipv6h->daddr,
+					     skb->len, IPPROTO_UDPLITE, 0);
+	return rc;
+}
+
+static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh)
+{
+	int cscov = up->len;
+
+	/*
+	 * Sender has set `partial coverage' option on UDP-Lite socket
+	 */
+	if (up->pcflag & UDPLITE_SEND_CC)    {
+		if (up->pcslen < up->len) {
+		/* up->pcslen == 0 means that full coverage is required,
+		 * partial coverage only if  0 < up->pcslen < up->len */
+			if (0 < up->pcslen) {
+			       cscov = up->pcslen;
+			}
+			uh->len = htons(up->pcslen);
+		}
+	/*
+	 * NOTE: Causes for the error case  `up->pcslen > up->len':
+	 *        (i)  Application error (will not be penalized).
+	 *       (ii)  Payload too big for send buffer: data is split
+	 *             into several packets, each with its own header.
+	 *             In this case (e.g. last segment), coverage may
+	 *             exceed packet length.
+	 *       Since packets with coverage length > packet length are
+	 *       illegal, we fall back to the defaults here.
+	 */
+	}
+	return cscov;
+}
+
+static inline u32 udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
+{
+	u32 csum = 0;
+	int off, len, cscov = udplite_sender_cscov(udp_sk(sk), skb->h.uh);
+
+	skb->ip_summed = CHECKSUM_NONE;     /* no HW support for checksumming */
+
+	skb_queue_walk(&sk->sk_write_queue, skb) {
+		off = skb->h.raw - skb->data;
+		len = skb->len - off;
+
+		csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
+
+		if ((cscov -= len) <= 0)
+			break;
+	}
+	return csum;
+}
+
+extern void	udplite4_register(void);
+extern int 	udplite_get_port(struct sock *sk, unsigned short snum,
+			int (*scmp)(const struct sock *, const struct sock *));
+#endif	/* _UDPLITE_H */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 81c91e8..3878a88 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -468,6 +468,7 @@
 	switch(fl->proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
 	case IPPROTO_SCTP:
 		port = fl->fl_ip_sport;
 		break;
@@ -493,6 +494,7 @@
 	switch(fl->proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
 	case IPPROTO_SCTP:
 		port = fl->fl_ip_dport;
 		break;