Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
1) Setting link attributes can modify the size of the attributes that
would be reported on a subsequent getlink netlink operation,
therefore min_ifinfo_dump_size needs to be adjusted. From Stefan
Gula.
2) Resegmentation of TSO frames while trimming can violate invariants
expected by callers, namely that the number of segments can only stay
the same or decrease, never increase. If MSS changes, however, we
can trim data but then end up with more segments. Fix this by only
segmenting to the MSS already recorded in the SKB. That's the
simplest fix for now and if we want to get more fancy in the future
that's a more involved change.
This probably explains some retransmit counter inaccuracies.
From Neal Cardwell.
3) Fix too-many-wakeups in POLL with AF_UNIX sockets, from Eric Dumazet.
4) Fix CAIF crashes wrt. namespace handling. From Eric Dumazet and
Eric W. Biederman.
5) TCP port selection fixes from Flavio Leitner.
6) More socket memory cgroup build fixes in certain randonfig
situations. From Glauber Costa.
7) Fix TCP memory sysctl regression reported by Ingo Molnar, also from
Glauber Costa.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
af_unix: fix EPOLLET regression for stream sockets
tcp: fix tcp_trim_head() to adjust segment count with skb MSS
net/tcp: Fix tcp memory limits initialization when !CONFIG_SYSCTL
net caif: Register properly as a pernet subsystem.
netns: Fail conspicously if someone uses net_generic at an inappropriate time.
net: explicitly add jump_label.h header to sock.h
net: RTNETLINK adjusting values of min_ifinfo_dump_size
ipv6: Fix ip_gre lockless xmits.
xen-netfront: correct MAX_TX_TARGET calculation.
netns: fix net_alloc_generic()
tcp: bind() optimize port allocation
tcp: bind() fix autoselection to share ports
l2tp: l2tp_ip - fix possible oops on packet receive
iwlwifi: fix PCI-E transport "inta" race
mac80211: set bss_conf.idle when vif is connected
mac80211: update oper_channel on ibss join
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c
index 752493f..65d1f05 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c
@@ -972,11 +972,11 @@
}
#endif
- spin_unlock_irqrestore(&trans->shrd->lock, flags);
-
/* saved interrupt in inta variable now we can reset trans_pcie->inta */
trans_pcie->inta = 0;
+ spin_unlock_irqrestore(&trans->shrd->lock, flags);
+
/* Now service all interrupt bits discovered above. */
if (inta & CSR_INT_BIT_HW_ERR) {
IWL_ERR(trans, "Hardware error detected. Restarting.\n");
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index fa67905..698b905 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -68,7 +68,7 @@
#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
-#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
struct netfront_stats {
u64 rx_packets;
diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h
index 3419bf5..d55f434 100644
--- a/include/net/netns/generic.h
+++ b/include/net/netns/generic.h
@@ -41,6 +41,7 @@
ptr = ng->ptr[id - 1];
rcu_read_unlock();
+ BUG_ON(!ptr);
return ptr;
}
#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 4c69ac16..91c1c8b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -55,6 +55,7 @@
#include <linux/uaccess.h>
#include <linux/memcontrol.h>
#include <linux/res_counter.h>
+#include <linux/jump_label.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0118ea9..d49db01 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -311,6 +311,8 @@
#define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
+extern void tcp_init_mem(struct net *net);
+
extern void tcp_v4_err(struct sk_buff *skb, u32);
extern void tcp_shutdown (struct sock *sk, int how);
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 673728a..82c5706 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -59,8 +59,6 @@
{
struct caif_net *caifn;
caifn = net_generic(net, caif_net_id);
- if (!caifn)
- return NULL;
return caifn->cfg;
}
EXPORT_SYMBOL(get_cfcnfg);
@@ -69,8 +67,6 @@
{
struct caif_net *caifn;
caifn = net_generic(net, caif_net_id);
- if (!caifn)
- return NULL;
return &caifn->caifdevs;
}
@@ -99,8 +95,6 @@
struct caif_device_entry *caifd;
caifdevs = caif_device_list(dev_net(dev));
- if (!caifdevs)
- return NULL;
caifd = kzalloc(sizeof(*caifd), GFP_KERNEL);
if (!caifd)
@@ -120,8 +114,6 @@
struct caif_device_entry_list *caifdevs =
caif_device_list(dev_net(dev));
struct caif_device_entry *caifd;
- if (!caifdevs)
- return NULL;
list_for_each_entry_rcu(caifd, &caifdevs->list, list) {
if (caifd->netdev == dev)
@@ -321,8 +313,6 @@
struct caif_device_entry_list *caifdevs;
caifdevs = caif_device_list(dev_net(dev));
- if (!cfg || !caifdevs)
- return;
caifd = caif_device_alloc(dev);
if (!caifd)
return;
@@ -374,8 +364,6 @@
cfg = get_cfcnfg(dev_net(dev));
caifdevs = caif_device_list(dev_net(dev));
- if (!cfg || !caifdevs)
- return 0;
caifd = caif_get(dev);
if (caifd == NULL && dev->type != ARPHRD_CAIF)
@@ -507,9 +495,6 @@
static int caif_init_net(struct net *net)
{
struct caif_net *caifn = net_generic(net, caif_net_id);
- if (WARN_ON(!caifn))
- return -EINVAL;
-
INIT_LIST_HEAD(&caifn->caifdevs.list);
mutex_init(&caifn->caifdevs.lock);
@@ -527,9 +512,6 @@
caif_device_list(net);
struct cfcnfg *cfg = get_cfcnfg(net);
- if (!cfg || !caifdevs)
- return;
-
rtnl_lock();
mutex_lock(&caifdevs->lock);
@@ -569,7 +551,7 @@
{
int result;
- result = register_pernet_device(&caif_net_ops);
+ result = register_pernet_subsys(&caif_net_ops);
if (result)
return result;
@@ -582,7 +564,7 @@
static void __exit caif_device_exit(void)
{
- unregister_pernet_device(&caif_net_ops);
+ unregister_pernet_subsys(&caif_net_ops);
unregister_netdevice_notifier(&caif_device_notifier);
dev_remove_pack(&caif_packet_type);
}
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 598aafb..ba9cfd4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -309,7 +309,6 @@
int err;
struct cfctrl_link_param param;
struct cfcnfg *cfg = get_cfcnfg(net);
- caif_assert(cfg != NULL);
rcu_read_lock();
err = caif_connect_req_to_link_param(cfg, conn_req, ¶m);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index aefcd7a..0e950fd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -30,6 +30,20 @@
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
+static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
+
+static struct net_generic *net_alloc_generic(void)
+{
+ struct net_generic *ng;
+ size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+
+ ng = kzalloc(generic_size, GFP_KERNEL);
+ if (ng)
+ ng->len = max_gen_ptrs;
+
+ return ng;
+}
+
static int net_assign_generic(struct net *net, int id, void *data)
{
struct net_generic *ng, *old_ng;
@@ -43,8 +57,7 @@
if (old_ng->len >= id)
goto assign;
- ng = kzalloc(sizeof(struct net_generic) +
- id * sizeof(void *), GFP_KERNEL);
+ ng = net_alloc_generic();
if (ng == NULL)
return -ENOMEM;
@@ -59,7 +72,6 @@
* the old copy for kfree after a grace period.
*/
- ng->len = id;
memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
rcu_assign_pointer(net->gen, ng);
@@ -161,18 +173,6 @@
goto out;
}
-static struct net_generic *net_alloc_generic(void)
-{
- struct net_generic *ng;
- size_t generic_size = sizeof(struct net_generic) +
- INITIAL_NET_GEN_PTRS * sizeof(void *);
-
- ng = kzalloc(generic_size, GFP_KERNEL);
- if (ng)
- ng->len = INITIAL_NET_GEN_PTRS;
-
- return ng;
-}
#ifdef CONFIG_NET_NS
static struct kmem_cache *net_cachep;
@@ -483,6 +483,7 @@
}
return error;
}
+ max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
}
error = __register_pernet_operations(list, ops);
if (error) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f16444b..65aebd4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1509,6 +1509,9 @@
if (send_addr_notify)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ min_ifinfo_dump_size = max_t(u16, if_nlmsg_size(dev),
+ min_ifinfo_dump_size);
+
return err;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 2e4e244..19d66ce 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -123,11 +123,14 @@
smallest_size = tb->num_owners;
smallest_rover = rover;
if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
- spin_unlock(&head->lock);
snum = smallest_rover;
- goto have_snum;
+ goto tb_found;
}
}
+ if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+ snum = rover;
+ goto tb_found;
+ }
goto next;
}
break;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2b53a1f..6b3ca5b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -422,6 +422,10 @@
if (register_netdevice(dev) < 0)
goto failed_free;
+ /* Can use a lockless transmit, unless we generate output sequences */
+ if (!(nt->parms.o_flags & GRE_SEQ))
+ dev->features |= NETIF_F_LLTX;
+
dev_hold(dev);
ipgre_tunnel_link(ign, nt);
return nt;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4aa7e9d..4cb9cd2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -814,6 +814,7 @@
net->ipv4.sysctl_rt_cache_rebuild_count = 4;
+ tcp_init_mem(net);
limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9bcdec3..06373b4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3216,6 +3216,16 @@
}
__setup("thash_entries=", set_thash_entries);
+void tcp_init_mem(struct net *net)
+{
+ /* Set per-socket limits to no more than 1/128 the pressure threshold */
+ unsigned long limit = nr_free_buffer_pages() / 8;
+ limit = max(limit, 128UL);
+ net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
+ net->ipv4.sysctl_tcp_mem[1] = limit;
+ net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
+}
+
void __init tcp_init(void)
{
struct sk_buff *skb = NULL;
@@ -3276,9 +3286,9 @@
sysctl_tcp_max_orphans = cnt / 2;
sysctl_max_syn_backlog = max(128, cnt / 256);
- /* Set per-socket limits to no more than 1/128 the pressure threshold */
- limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1])
- << (PAGE_SHIFT - 7);
+ tcp_init_mem(&init_net);
+ limit = nr_free_buffer_pages() / 8;
+ limit = max(limit, 128UL);
max_share = min(4UL*1024*1024, limit);
sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8c8de27..4ff3b6d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1141,11 +1141,9 @@
sk_mem_uncharge(sk, len);
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
- /* Any change of skb->len requires recalculation of tso
- * factor and mss.
- */
+ /* Any change of skb->len requires recalculation of tso factor. */
if (tcp_skb_pcount(skb) > 1)
- tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
+ tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
return 0;
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index d21e7eb..55670ec 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -393,11 +393,6 @@
{
int rc;
- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
- goto drop;
-
- nf_reset(skb);
-
/* Charge it to the socket, dropping if the queue is full. */
rc = sock_queue_rcv_skb(sk, skb);
if (rc < 0)
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index b3d76b7..a464396 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -106,6 +106,7 @@
sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
+ local->oper_channel = chan;
channel_type = ifibss->channel_type;
if (channel_type > NL80211_CHAN_HT20 &&
!cfg80211_can_beacon_sec_chan(local->hw.wiphy, chan, channel_type))
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e47768c..01a21c2 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1314,6 +1314,7 @@
continue;
}
/* count everything else */
+ sdata->vif.bss_conf.idle = false;
count++;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aad8fb6..85d3bb7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1918,7 +1918,7 @@
struct sk_buff *skb;
unix_state_lock(sk);
- skb = skb_dequeue(&sk->sk_receive_queue);
+ skb = skb_peek(&sk->sk_receive_queue);
if (skb == NULL) {
unix_sk(sk)->recursion_level = 0;
if (copied >= target)
@@ -1958,11 +1958,8 @@
if (check_creds) {
/* Never glue messages from different writers */
if ((UNIXCB(skb).pid != siocb->scm->pid) ||
- (UNIXCB(skb).cred != siocb->scm->cred)) {
- skb_queue_head(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ (UNIXCB(skb).cred != siocb->scm->cred))
break;
- }
} else {
/* Copy credentials */
scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
@@ -1977,8 +1974,6 @@
chunk = min_t(unsigned int, skb->len, size);
if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
- skb_queue_head(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
if (copied == 0)
copied = -EFAULT;
break;
@@ -1993,13 +1988,10 @@
if (UNIXCB(skb).fp)
unix_detach_fds(siocb->scm, skb);
- /* put the skb back if we didn't use it up.. */
- if (skb->len) {
- skb_queue_head(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ if (skb->len)
break;
- }
+ skb_unlink(skb, &sk->sk_receive_queue);
consume_skb(skb);
if (siocb->scm->fp)
@@ -2010,9 +2002,6 @@
if (UNIXCB(skb).fp)
siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
- /* put message back and return */
- skb_queue_head(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
break;
}
} while (size);