Merge branch 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6
* 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6:
slab: fix DEBUG_SLAB warning
slab: shrink sizeof(struct kmem_cache)
slab: fix DEBUG_SLAB build
SLUB: Fix missing <linux/stacktrace.h> include
slub: reduce overhead of slub_debug
slub: Add method to verify memory is not freed
slub: Enable backtrace for create/delete points
slab allocators: Provide generic description of alignment defines
slab, slub, slob: Unify alignment definition
slob/lockdep: Fix gfp flags passed to lockdep
diff --git a/include/linux/slab.h b/include/linux/slab.h
index ad4dd1c..573c809 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -134,6 +134,26 @@
#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT)
/*
+ * Some archs want to perform DMA into kmalloc caches and need a guaranteed
+ * alignment larger than the alignment of a 64-bit integer.
+ * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
+ */
+#ifdef ARCH_DMA_MINALIGN
+#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
+#else
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+#endif
+
+/*
+ * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
+ * Intended for arches that get misalignment faults even for 64 bit integer
+ * aligned buffers.
+ */
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#endif
+
+/*
* Common kmalloc functions provided by all allocators
*/
void * __must_check __krealloc(const void *, size_t, gfp_t);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 83203ae..d00e0ba 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -18,53 +18,25 @@
#include <trace/events/kmem.h>
/*
- * Enforce a minimum alignment for the kmalloc caches.
- * Usually, the kmalloc caches are cache_line_size() aligned, except when
- * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
- * Some archs want to perform DMA into kmalloc caches and need a guaranteed
- * alignment larger than the alignment of a 64-bit integer.
- * ARCH_KMALLOC_MINALIGN allows that.
- * Note that increasing this value may disable some debug features.
- */
-#ifdef ARCH_DMA_MINALIGN
-#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
-#else
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
-#endif
-
-#ifndef ARCH_SLAB_MINALIGN
-/*
- * Enforce a minimum alignment for all caches.
- * Intended for archs that get misalignment faults even for BYTES_PER_WORD
- * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
- * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
- * some debug features.
- */
-#define ARCH_SLAB_MINALIGN 0
-#endif
-
-/*
* struct kmem_cache
*
* manages a cache.
*/
struct kmem_cache {
-/* 1) per-cpu data, touched during every alloc/free */
- struct array_cache *array[NR_CPUS];
-/* 2) Cache tunables. Protected by cache_chain_mutex */
+/* 1) Cache tunables. Protected by cache_chain_mutex */
unsigned int batchcount;
unsigned int limit;
unsigned int shared;
unsigned int buffer_size;
u32 reciprocal_buffer_size;
-/* 3) touched by every alloc & free from the backend */
+/* 2) touched by every alloc & free from the backend */
unsigned int flags; /* constant flags */
unsigned int num; /* # of objs per slab */
-/* 4) cache_grow/shrink */
+/* 3) cache_grow/shrink */
/* order of pgs per slab (2^n) */
unsigned int gfporder;
@@ -80,11 +52,11 @@
/* constructor func */
void (*ctor)(void *obj);
-/* 5) cache creation/removal */
+/* 4) cache creation/removal */
const char *name;
struct list_head next;
-/* 6) statistics */
+/* 5) statistics */
#ifdef CONFIG_DEBUG_SLAB
unsigned long num_active;
unsigned long num_allocations;
@@ -111,16 +83,18 @@
int obj_size;
#endif /* CONFIG_DEBUG_SLAB */
+/* 6) per-cpu/per-node data, touched during every alloc/free */
/*
- * We put nodelists[] at the end of kmem_cache, because we want to size
- * this array to nr_node_ids slots instead of MAX_NUMNODES
+ * We put array[] at the end of kmem_cache, because we want to size
+ * this array to nr_cpu_ids slots instead of NR_CPUS
* (see kmem_cache_init())
- * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
- * is statically defined, so we reserve the max number of nodes.
+ * We still use [NR_CPUS] and not [1] or [0] because cache_cache
+ * is statically defined, so we reserve the max number of cpus.
*/
- struct kmem_list3 *nodelists[MAX_NUMNODES];
+ struct kmem_list3 **nodelists;
+ struct array_cache *array[NR_CPUS];
/*
- * Do not add fields after nodelists[]
+ * Do not add fields after array[]
*/
};
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 4382db0..0ec00b3 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -1,16 +1,6 @@
#ifndef __LINUX_SLOB_DEF_H
#define __LINUX_SLOB_DEF_H
-#ifdef ARCH_DMA_MINALIGN
-#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
-#else
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
-#endif
-
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
-#endif
-
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index c8668d1..4b35c06 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -113,16 +113,6 @@
#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
-#ifdef ARCH_DMA_MINALIGN
-#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
-#else
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
-#endif
-
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
-#endif
-
/*
* Maximum kmalloc object size handled by SLUB. Larger object allocations
* are passed through to the page allocator. The page allocator "fastpath"
@@ -228,6 +218,19 @@
return ret;
}
+/**
+ * Calling this on allocated memory will check that the memory
+ * is expected to be in use, and print warnings if not.
+ */
+#ifdef CONFIG_SLUB_DEBUG
+extern bool verify_mem_not_deleted(const void *x);
+#else
+static inline bool verify_mem_not_deleted(const void *x)
+{
+ return true;
+}
+#endif
+
#ifdef CONFIG_TRACING
extern void *
kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size);
diff --git a/mm/slab.c b/mm/slab.c
index d96e223..1e523ed 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -574,7 +574,9 @@
{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
/* internal cache of cache description objs */
+static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
static struct kmem_cache cache_cache = {
+ .nodelists = cache_cache_nodelists,
.batchcount = 1,
.limit = BOOT_CPUCACHE_ENTRIES,
.shared = 1,
@@ -1492,11 +1494,10 @@
cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
/*
- * struct kmem_cache size depends on nr_node_ids, which
- * can be less than MAX_NUMNODES.
+ * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
*/
- cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
- nr_node_ids * sizeof(struct kmem_list3 *);
+ cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+ nr_node_ids * sizeof(struct kmem_list3 *);
#if DEBUG
cache_cache.obj_size = cache_cache.buffer_size;
#endif
@@ -2308,6 +2309,7 @@
if (!cachep)
goto oops;
+ cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
#if DEBUG
cachep->obj_size = size;
@@ -3153,12 +3155,11 @@
objp += obj_offset(cachep);
if (cachep->ctor && cachep->flags & SLAB_POISON)
cachep->ctor(objp);
-#if ARCH_SLAB_MINALIGN
- if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
+ if (ARCH_SLAB_MINALIGN &&
+ ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
- objp, ARCH_SLAB_MINALIGN);
+ objp, (int)ARCH_SLAB_MINALIGN);
}
-#endif
return objp;
}
#else
diff --git a/mm/slob.c b/mm/slob.c
index 46e0aee..0ae8818 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -482,6 +482,8 @@
int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
void *ret;
+ gfp &= gfp_allowed_mask;
+
lockdep_trace_alloc(gfp);
if (size < PAGE_SIZE - align) {
@@ -608,6 +610,10 @@
{
void *b;
+ flags &= gfp_allowed_mask;
+
+ lockdep_trace_alloc(flags);
+
if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node);
trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
diff --git a/mm/slub.c b/mm/slub.c
index 35f351f..ba83f3f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -27,6 +27,7 @@
#include <linux/memory.h>
#include <linux/math64.h>
#include <linux/fault-inject.h>
+#include <linux/stacktrace.h>
#include <trace/events/kmem.h>
@@ -191,8 +192,12 @@
/*
* Tracking user of a slab.
*/
+#define TRACK_ADDRS_COUNT 16
struct track {
unsigned long addr; /* Called from address */
+#ifdef CONFIG_STACKTRACE
+ unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
+#endif
int cpu; /* Was running on cpu */
int pid; /* Pid context */
unsigned long when; /* When did the operation occur */
@@ -420,6 +425,24 @@
struct track *p = get_track(s, object, alloc);
if (addr) {
+#ifdef CONFIG_STACKTRACE
+ struct stack_trace trace;
+ int i;
+
+ trace.nr_entries = 0;
+ trace.max_entries = TRACK_ADDRS_COUNT;
+ trace.entries = p->addrs;
+ trace.skip = 3;
+ save_stack_trace(&trace);
+
+ /* See rant in lockdep.c */
+ if (trace.nr_entries != 0 &&
+ trace.entries[trace.nr_entries - 1] == ULONG_MAX)
+ trace.nr_entries--;
+
+ for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
+ p->addrs[i] = 0;
+#endif
p->addr = addr;
p->cpu = smp_processor_id();
p->pid = current->pid;
@@ -444,6 +467,16 @@
printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
+#ifdef CONFIG_STACKTRACE
+ {
+ int i;
+ for (i = 0; i < TRACK_ADDRS_COUNT; i++)
+ if (t->addrs[i])
+ printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
+ else
+ break;
+ }
+#endif
}
static void print_tracking(struct kmem_cache *s, void *object)
@@ -557,10 +590,10 @@
memset(p + s->objsize, val, s->inuse - s->objsize);
}
-static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
+static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes)
{
while (bytes) {
- if (*start != (u8)value)
+ if (*start != value)
return start;
start++;
bytes--;
@@ -568,6 +601,38 @@
return NULL;
}
+static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
+{
+ u64 value64;
+ unsigned int words, prefix;
+
+ if (bytes <= 16)
+ return check_bytes8(start, value, bytes);
+
+ value64 = value | value << 8 | value << 16 | value << 24;
+ value64 = value64 | value64 << 32;
+ prefix = 8 - ((unsigned long)start) % 8;
+
+ if (prefix) {
+ u8 *r = check_bytes8(start, value, prefix);
+ if (r)
+ return r;
+ start += prefix;
+ bytes -= prefix;
+ }
+
+ words = bytes / 8;
+
+ while (words) {
+ if (*(u64 *)start != value64)
+ return check_bytes8(start, value, 8);
+ start += 8;
+ words--;
+ }
+
+ return check_bytes8(start, value, bytes % 8);
+}
+
static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
void *from, void *to)
{
@@ -2928,6 +2993,42 @@
}
EXPORT_SYMBOL(ksize);
+#ifdef CONFIG_SLUB_DEBUG
+bool verify_mem_not_deleted(const void *x)
+{
+ struct page *page;
+ void *object = (void *)x;
+ unsigned long flags;
+ bool rv;
+
+ if (unlikely(ZERO_OR_NULL_PTR(x)))
+ return false;
+
+ local_irq_save(flags);
+
+ page = virt_to_head_page(x);
+ if (unlikely(!PageSlab(page))) {
+ /* maybe it was from stack? */
+ rv = true;
+ goto out_unlock;
+ }
+
+ slab_lock(page);
+ if (on_freelist(page->slab, page, object)) {
+ object_err(page->slab, page, object, "Object is on free-list");
+ rv = false;
+ } else {
+ rv = true;
+ }
+ slab_unlock(page);
+
+out_unlock:
+ local_irq_restore(flags);
+ return rv;
+}
+EXPORT_SYMBOL(verify_mem_not_deleted);
+#endif
+
void kfree(const void *x)
{
struct page *page;