bpf: add lookup/update support for per-cpu hash and array maps The functions bpf_map_lookup_elem(map, key, value) and bpf_map_update_elem(map, key, value, flags) need to get/set values from all-cpus for per-cpu hash and array maps, so that user space can aggregate/update them as necessary. Example of single counter aggregation in user space: unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); long values[nr_cpus]; long value = 0; bpf_lookup_elem(fd, key, values); for (i = 0; i < nr_cpus; i++) value += values[i]; The user space must provide round_up(value_size, 8) * nr_cpus array to get/set values, since kernel will use 'long' copy of per-cpu values to try to copy good counters atomically. It's a best-effort, since bpf programs and user space are racing to access the same memory. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>

commit: 15a07b33814d14ca817887dbea8530728dc0fbe4 [log] [tgz]
author: Alexei Starovoitov <ast@fb.com> Mon Feb 01 22:39:55 2016 -0800
committer: David S. Miller <davem@davemloft.net> Sat Feb 06 03:34:36 2016 -0500
tree: cf33026c34f2fbbe72e6ba3dd8079e3b0afa00ad
parent: a10423b87a7eae75da79ce80a8d9475047a674ee [diff] [blame]
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b9bf1d7..bd3bdf2 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c

@@ -130,6 +130,32 @@
 	return this_cpu_ptr(array->pptrs[index]);
 }
 
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u32 index = *(u32 *)key;
+	void __percpu *pptr;
+	int cpu, off = 0;
+	u32 size;
+
+	if (unlikely(index >= array->map.max_entries))
+		return -ENOENT;
+
+	/* per_cpu areas are zero-filled and bpf programs can only
+	 * access 'value_size' of them, so copying rounded areas
+	 * will not leak any kernel data
+	 */
+	size = round_up(map->value_size, 8);
+	rcu_read_lock();
+	pptr = array->pptrs[index];
+	for_each_possible_cpu(cpu) {
+		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
+		off += size;
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
 /* Called from syscall */
 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
@@ -177,6 +203,44 @@
 	return 0;
 }
 
+int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
+			    u64 map_flags)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u32 index = *(u32 *)key;
+	void __percpu *pptr;
+	int cpu, off = 0;
+	u32 size;
+
+	if (unlikely(map_flags > BPF_EXIST))
+		/* unknown flags */
+		return -EINVAL;
+
+	if (unlikely(index >= array->map.max_entries))
+		/* all elements were pre-allocated, cannot insert a new one */
+		return -E2BIG;
+
+	if (unlikely(map_flags == BPF_NOEXIST))
+		/* all elements already exist */
+		return -EEXIST;
+
+	/* the user space will provide round_up(value_size, 8) bytes that
+	 * will be copied into per-cpu area. bpf programs can only access
+	 * value_size of it. During lookup the same extra bytes will be
+	 * returned or zeros which were zero-filled by percpu_alloc,
+	 * so no kernel data leaks possible
+	 */
+	size = round_up(map->value_size, 8);
+	rcu_read_lock();
+	pptr = array->pptrs[index];
+	for_each_possible_cpu(cpu) {
+		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
+		off += size;
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
 /* Called from syscall or from eBPF program */
 static int array_map_delete_elem(struct bpf_map *map, void *key)
 {
commit	15a07b33814d14ca817887dbea8530728dc0fbe4	[log] [tgz]
author	Alexei Starovoitov <ast@fb.com>	Mon Feb 01 22:39:55 2016 -0800
committer	David S. Miller <davem@davemloft.net>	Sat Feb 06 03:34:36 2016 -0500
tree	cf33026c34f2fbbe72e6ba3dd8079e3b0afa00ad
parent	a10423b87a7eae75da79ce80a8d9475047a674ee [diff] [blame]