Merge android-4.9.182 (cb0eff4) into msm-4.9
* refs/heads/tmp-cb0eff4:
Linux 4.9.182
tcp: enforce tcp_min_snd_mss in tcp_mtu_probing()
tcp: add tcp_min_snd_mss sysctl
tcp: tcp_fragment() should apply sane memory limits
tcp: limit payload size of sacked skbs
tcp: reduce tcp_fastretrans_alert() verbosity
efi/libstub: remove duplicate nokaslr
BACKPORT: Add support for BPF_FUNC_probe_read_str
UPSTREAM: binder: check for overflow when alloc for security context
BACKPORT: binder: fix race between munmap() and direct reclaim
Change-Id: Ia27ee55b392dc481fa27ea6d689022a1047a3ed3
Signed-off-by: jianzhou <jianzhou@codeaurora.org>
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index c88a0a8..12d9a68 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -230,6 +230,14 @@
Path MTU discovery (MTU probing). If MTU probing is enabled,
this is the initial MSS used by the connection.
+tcp_min_snd_mss - INTEGER
+ TCP SYN and SYNACK messages usually advertise an ADVMSS option,
+ as described in RFC 1122 and RFC 6691.
+ If this ADVMSS option is smaller than tcp_min_snd_mss,
+ it is silently capped to tcp_min_snd_mss.
+
+ Default : 48 (at least 8 bytes of payload per segment)
+
tcp_congestion_control - STRING
Set the congestion control algorithm to be used for new
connections. The algorithm "reno" is always available, but
diff --git a/Makefile b/Makefile
index ec95482..45b38f4 100755
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 9
-SUBLEVEL = 181
+SUBLEVEL = 182
EXTRAVERSION =
NAME = Roaring Lionus
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 6107c03..edb108a 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3227,6 +3227,7 @@
if (target_node && target_node->txn_security_ctx) {
u32 secid;
+ size_t added_size;
security_task_getsecid(proc->tsk, &secid);
ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
@@ -3236,7 +3237,15 @@
return_error_line = __LINE__;
goto err_get_secctx_failed;
}
- extra_buffers_size += ALIGN(secctx_sz, sizeof(u64));
+ added_size = ALIGN(secctx_sz, sizeof(u64));
+ extra_buffers_size += added_size;
+ if (extra_buffers_size < added_size) {
+ /* integer overflow of extra_buffers_size */
+ return_error = BR_FAILED_REPLY;
+ return_error_param = EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_extra_size;
+ }
}
trace_binder_transaction(reply, t, target_node);
@@ -3585,6 +3594,7 @@
t->buffer->transaction = NULL;
binder_alloc_free_buf(&target_proc->alloc, t->buffer);
err_binder_alloc_buf_failed:
+err_bad_extra_size:
if (secctx)
security_release_secctx(secctx, secctx_sz);
err_get_secctx_failed:
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 34454b0..ce006af 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -891,14 +891,13 @@
index = page - alloc->pages;
page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE;
+
+ mm = alloc->vma_vm_mm;
+ if (!mmget_not_zero(mm))
+ goto err_mmget;
+ if (!down_write_trylock(&mm->mmap_sem))
+ goto err_down_write_mmap_sem_failed;
vma = alloc->vma;
- if (vma) {
- if (!mmget_not_zero(alloc->vma_vm_mm))
- goto err_mmget;
- mm = alloc->vma_vm_mm;
- if (!down_write_trylock(&mm->mmap_sem))
- goto err_down_write_mmap_sem_failed;
- }
list_lru_isolate(lru, item);
spin_unlock(lock);
@@ -909,10 +908,9 @@
zap_page_range(vma, page_addr, PAGE_SIZE, NULL);
trace_binder_unmap_user_end(alloc, index);
-
- up_write(&mm->mmap_sem);
- mmput(mm);
}
+ up_write(&mm->mmap_sem);
+ mmput(mm);
trace_binder_unmap_kernel_start(alloc, index);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b8ea15a..4fd7dd9 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -434,4 +434,7 @@
tp->saved_syn = NULL;
}
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
+ int shiftlen);
+
#endif /* _LINUX_TCP_H */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 7adf438..bf619a6 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -94,6 +94,7 @@
#endif
int sysctl_tcp_mtu_probing;
int sysctl_tcp_base_mss;
+ int sysctl_tcp_min_snd_mss;
int sysctl_tcp_probe_threshold;
u32 sysctl_tcp_probe_interval;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 96c2b19..6b7f988 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -53,6 +53,8 @@
#define MAX_TCP_HEADER (128 + MAX_HEADER)
#define MAX_TCP_OPTION_SPACE 40
+#define TCP_MIN_SND_MSS 48
+#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
/*
* Never offer a window over 32767 without using window scaling. Some
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 3442a26..56e3460 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -282,6 +282,7 @@
LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */
LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */
LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */
+ LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */
__LINUX_MIB_MAX
};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7cc06f2..c7aab15 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -81,6 +81,35 @@
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size, const void *, unsafe_ptr)
+{
+ int ret;
+
+ /*
+ * The strncpy_from_unsafe() call will likely not fill the entire
+ * buffer, but that's okay in this circumstance as we're probing
+ * arbitrary memory anyway similar to bpf_probe_read() and might
+ * as well probe the stack. Thus, memory is explicitly cleared
+ * only in error case, so that improper users ignoring return
+ * code altogether don't copy garbage; otherwise length of string
+ * is returned that can be used for bpf_perf_event_output() et al.
+ */
+ ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_str_proto = {
+ .func = bpf_probe_read_str,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_RAW_STACK,
+ .arg2_type = ARG_CONST_STACK_SIZE,
+ .arg3_type = ARG_ANYTHING,
+};
+
BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
u32, size)
{
@@ -434,6 +463,8 @@
return &bpf_map_delete_elem_proto;
case BPF_FUNC_probe_read:
return &bpf_probe_read_proto;
+ case BPF_FUNC_probe_read_str:
+ return &bpf_probe_read_str_proto;
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_tail_call:
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ec48d8ea..8b22139 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -306,6 +306,7 @@
SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL),
SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS),
+ SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 76da06e..50f4945 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -35,6 +35,8 @@
static int ip_local_port_range_max[] = { 65535, 65535 };
static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31;
+static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
+static int tcp_min_snd_mss_max = 65535;
static int ip_ttl_min = 1;
static int ip_ttl_max = 255;
static int tcp_syn_retries_min = 1;
@@ -891,6 +893,15 @@
.proc_handler = proc_dointvec,
},
{
+ .procname = "tcp_min_snd_mss",
+ .data = &init_net.ipv4.sysctl_tcp_min_snd_mss,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_min_snd_mss_min,
+ .extra2 = &tcp_min_snd_mss_max,
+ },
+ {
.procname = "tcp_probe_threshold",
.data = &init_net.ipv4.sysctl_tcp_probe_threshold,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a0e1fc1..ac398b72 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3359,6 +3359,7 @@
unsigned long limit;
unsigned int i;
+ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
FIELD_SIZEOF(struct sk_buff, cb));
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6c42ffc..435450f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1321,7 +1321,7 @@
TCP_SKB_CB(skb)->seq += shifted;
tcp_skb_pcount_add(prev, pcount);
- BUG_ON(tcp_skb_pcount(skb) < pcount);
+ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
tcp_skb_pcount_add(skb, -pcount);
/* When we're adding to gso_segs == 1, gso_size will be zero,
@@ -1388,6 +1388,21 @@
return !skb_headlen(skb) && skb_is_nonlinear(skb);
}
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
+ int pcount, int shiftlen)
+{
+ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
+ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
+ * to make sure not storing more than 65535 * 8 bytes per skb,
+ * even if current MSS is bigger.
+ */
+ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
+ return 0;
+ if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
+ return 0;
+ return skb_shift(to, from, shiftlen);
+}
+
/* Try collapsing SACK blocks spanning across multiple skbs to a single
* skb.
*/
@@ -1399,6 +1414,7 @@
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *prev;
int mss;
+ int next_pcount;
int pcount = 0;
int len;
int in_sack;
@@ -1496,7 +1512,7 @@
if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
goto fallback;
- if (!skb_shift(prev, skb, len))
+ if (!tcp_skb_shift(prev, skb, pcount, len))
goto fallback;
if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
goto out;
@@ -1515,11 +1531,11 @@
goto out;
len = skb->len;
- if (skb_shift(prev, skb, len)) {
- pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+ next_pcount = tcp_skb_pcount(skb);
+ if (tcp_skb_shift(prev, skb, next_pcount, len)) {
+ pcount += next_pcount;
+ tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0);
}
-
out:
state->fack_count += pcount;
return prev;
@@ -2838,9 +2854,9 @@
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
- if (WARN_ON(!tp->packets_out && tp->sacked_out))
+ if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
- if (WARN_ON(!tp->sacked_out && tp->fackets_out))
+ if (!tp->sacked_out && tp->fackets_out)
tp->fackets_out = 0;
/* Now state machine starts.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0116d9e..d6a9526 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2470,6 +2470,7 @@
net->ipv4.sysctl_tcp_ecn_fallback = 1;
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+ net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8d31f60..c1fd321 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1185,6 +1185,11 @@
if (nsize < 0)
nsize = 0;
+ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
+ return -ENOMEM;
+ }
+
if (skb_unclone(skb, gfp))
return -ENOMEM;
@@ -1355,8 +1360,7 @@
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- if (mss_now < 48)
- mss_now = 48;
+ mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
return mss_now;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 93fbace..d962922 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -172,6 +172,7 @@
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
mss = max(mss, 68 - tp->tcp_header_len);
+ mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
}