blob: d7d8ffd0c3061336406105d38b2203c8b7cf6650 [file] [log] [blame]
Christoph Lameter039363f2012-07-06 15:25:10 -05001/*
2 * Slab allocator functions that are independent of the allocator strategy
3 *
4 * (C) 2012 Christoph Lameter <cl@linux.com>
5 */
6#include <linux/slab.h>
7
8#include <linux/mm.h>
9#include <linux/poison.h>
10#include <linux/interrupt.h>
11#include <linux/memory.h>
12#include <linux/compiler.h>
13#include <linux/module.h>
Christoph Lameter20cea962012-07-06 15:25:13 -050014#include <linux/cpu.h>
15#include <linux/uaccess.h>
Glauber Costab7454ad2012-10-19 18:20:25 +040016#include <linux/seq_file.h>
17#include <linux/proc_fs.h>
Christoph Lameter039363f2012-07-06 15:25:10 -050018#include <asm/cacheflush.h>
19#include <asm/tlbflush.h>
20#include <asm/page.h>
Glauber Costa2633d7a2012-12-18 14:22:34 -080021#include <linux/memcontrol.h>
Andrey Ryabinin928cec92014-08-06 16:04:44 -070022
23#define CREATE_TRACE_POINTS
Christoph Lameterf1b6eb62013-09-04 16:35:34 +000024#include <trace/events/kmem.h>
Christoph Lameter039363f2012-07-06 15:25:10 -050025
Christoph Lameter97d06602012-07-06 15:25:11 -050026#include "slab.h"
27
28enum slab_state slab_state;
Christoph Lameter18004c52012-07-06 15:25:12 -050029LIST_HEAD(slab_caches);
30DEFINE_MUTEX(slab_mutex);
Christoph Lameter9b030cb2012-09-05 00:20:33 +000031struct kmem_cache *kmem_cache;
Christoph Lameter97d06602012-07-06 15:25:11 -050032
Joonsoo Kim07f361b2014-10-09 15:26:00 -070033/*
34 * Determine the size of a slab object
35 */
36unsigned int kmem_cache_size(struct kmem_cache *s)
37{
38 return s->object_size;
39}
40EXPORT_SYMBOL(kmem_cache_size);
41
Shuah Khan77be4b12012-08-16 00:09:46 -070042#ifdef CONFIG_DEBUG_VM
Vladimir Davydov794b1242014-04-07 15:39:26 -070043static int kmem_cache_sanity_check(const char *name, size_t size)
Shuah Khan77be4b12012-08-16 00:09:46 -070044{
45 struct kmem_cache *s = NULL;
46
47 if (!name || in_interrupt() || size < sizeof(void *) ||
48 size > KMALLOC_MAX_SIZE) {
49 pr_err("kmem_cache_create(%s) integrity check failed\n", name);
50 return -EINVAL;
51 }
52
53 list_for_each_entry(s, &slab_caches, list) {
54 char tmp;
55 int res;
56
57 /*
58 * This happens when the module gets unloaded and doesn't
59 * destroy its slab cache and no-one else reuses the vmalloc
60 * area of the module. Print a warning.
61 */
62 res = probe_kernel_address(s->name, tmp);
63 if (res) {
64 pr_err("Slab cache with size %d has lost its name\n",
65 s->object_size);
66 continue;
67 }
68
Mikulas Patocka69461742014-03-04 17:13:47 -050069#if !defined(CONFIG_SLUB)
Vladimir Davydov794b1242014-04-07 15:39:26 -070070 if (!strcmp(s->name, name)) {
Shuah Khan77be4b12012-08-16 00:09:46 -070071 pr_err("%s (%s): Cache name already exists.\n",
72 __func__, name);
73 dump_stack();
74 s = NULL;
75 return -EINVAL;
76 }
Christoph Lameter3e374912013-09-21 21:56:34 +000077#endif
Shuah Khan77be4b12012-08-16 00:09:46 -070078 }
79
80 WARN_ON(strchr(name, ' ')); /* It confuses parsers */
81 return 0;
82}
83#else
Vladimir Davydov794b1242014-04-07 15:39:26 -070084static inline int kmem_cache_sanity_check(const char *name, size_t size)
Shuah Khan77be4b12012-08-16 00:09:46 -070085{
86 return 0;
87}
88#endif
89
Glauber Costa55007d82012-12-18 14:22:38 -080090#ifdef CONFIG_MEMCG_KMEM
91int memcg_update_all_caches(int num_memcgs)
92{
93 struct kmem_cache *s;
94 int ret = 0;
95 mutex_lock(&slab_mutex);
96
97 list_for_each_entry(s, &slab_caches, list) {
98 if (!is_root_cache(s))
99 continue;
100
101 ret = memcg_update_cache_size(s, num_memcgs);
102 /*
103 * See comment in memcontrol.c, memcg_update_cache_size:
104 * Instead of freeing the memory, we'll just leave the caches
105 * up to this point in an updated state.
106 */
107 if (ret)
108 goto out;
109 }
110
111 memcg_update_array_size(num_memcgs);
112out:
113 mutex_unlock(&slab_mutex);
114 return ret;
115}
116#endif
117
Christoph Lameter039363f2012-07-06 15:25:10 -0500118/*
Christoph Lameter45906852012-11-28 16:23:16 +0000119 * Figure out what the alignment of the objects will be given a set of
120 * flags, a user specified alignment and the size of the objects.
121 */
122unsigned long calculate_alignment(unsigned long flags,
123 unsigned long align, unsigned long size)
124{
125 /*
126 * If the user wants hardware cache aligned objects then follow that
127 * suggestion if the object is sufficiently large.
128 *
129 * The hardware cache alignment cannot override the specified
130 * alignment though. If that is greater then use it.
131 */
132 if (flags & SLAB_HWCACHE_ALIGN) {
133 unsigned long ralign = cache_line_size();
134 while (size <= ralign / 2)
135 ralign /= 2;
136 align = max(align, ralign);
137 }
138
139 if (align < ARCH_SLAB_MINALIGN)
140 align = ARCH_SLAB_MINALIGN;
141
142 return ALIGN(align, sizeof(void *));
143}
144
Vladimir Davydov794b1242014-04-07 15:39:26 -0700145static struct kmem_cache *
146do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align,
147 unsigned long flags, void (*ctor)(void *),
148 struct mem_cgroup *memcg, struct kmem_cache *root_cache)
149{
150 struct kmem_cache *s;
151 int err;
152
153 err = -ENOMEM;
154 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
155 if (!s)
156 goto out;
157
158 s->name = name;
159 s->object_size = object_size;
160 s->size = size;
161 s->align = align;
162 s->ctor = ctor;
163
164 err = memcg_alloc_cache_params(memcg, s, root_cache);
165 if (err)
166 goto out_free_cache;
167
168 err = __kmem_cache_create(s, flags);
169 if (err)
170 goto out_free_cache;
171
172 s->refcount = 1;
173 list_add(&s->list, &slab_caches);
Vladimir Davydov794b1242014-04-07 15:39:26 -0700174out:
175 if (err)
176 return ERR_PTR(err);
177 return s;
178
179out_free_cache:
180 memcg_free_cache_params(s);
181 kfree(s);
182 goto out;
183}
Christoph Lameter45906852012-11-28 16:23:16 +0000184
185/*
Christoph Lameter039363f2012-07-06 15:25:10 -0500186 * kmem_cache_create - Create a cache.
187 * @name: A string which is used in /proc/slabinfo to identify this cache.
188 * @size: The size of objects to be created in this cache.
189 * @align: The required alignment for the objects.
190 * @flags: SLAB flags
191 * @ctor: A constructor for the objects.
192 *
193 * Returns a ptr to the cache on success, NULL on failure.
194 * Cannot be called within a interrupt, but can be interrupted.
195 * The @ctor is run when new pages are allocated by the cache.
196 *
197 * The flags are
198 *
199 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
200 * to catch references to uninitialised memory.
201 *
202 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
203 * for buffer overruns.
204 *
205 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
206 * cacheline. This can be beneficial if you're counting cycles as closely
207 * as davem.
208 */
Glauber Costa2633d7a2012-12-18 14:22:34 -0800209struct kmem_cache *
Vladimir Davydov794b1242014-04-07 15:39:26 -0700210kmem_cache_create(const char *name, size_t size, size_t align,
211 unsigned long flags, void (*ctor)(void *))
Christoph Lameter039363f2012-07-06 15:25:10 -0500212{
Vladimir Davydov794b1242014-04-07 15:39:26 -0700213 struct kmem_cache *s;
214 char *cache_name;
Vladimir Davydov3965fc32014-01-23 15:52:55 -0800215 int err;
Christoph Lameter039363f2012-07-06 15:25:10 -0500216
Pekka Enbergb9205362012-08-16 10:12:18 +0300217 get_online_cpus();
Vladimir Davydov03afc0e2014-06-04 16:07:20 -0700218 get_online_mems();
219
Pekka Enbergb9205362012-08-16 10:12:18 +0300220 mutex_lock(&slab_mutex);
Christoph Lameter686d5502012-09-05 00:20:33 +0000221
Vladimir Davydov794b1242014-04-07 15:39:26 -0700222 err = kmem_cache_sanity_check(name, size);
Andrew Morton3aa24f52014-10-09 15:25:58 -0700223 if (err) {
224 s = NULL; /* suppress uninit var warning */
Vladimir Davydov3965fc32014-01-23 15:52:55 -0800225 goto out_unlock;
Andrew Morton3aa24f52014-10-09 15:25:58 -0700226 }
Christoph Lameter686d5502012-09-05 00:20:33 +0000227
Glauber Costad8843922012-10-17 15:36:51 +0400228 /*
229 * Some allocators will constraint the set of valid flags to a subset
230 * of all flags. We expect them to define CACHE_CREATE_MASK in this
231 * case, and we'll just provide them with a sanitized version of the
232 * passed flags.
233 */
234 flags &= CACHE_CREATE_MASK;
Christoph Lameter686d5502012-09-05 00:20:33 +0000235
Vladimir Davydov794b1242014-04-07 15:39:26 -0700236 s = __kmem_cache_alias(name, size, align, flags, ctor);
237 if (s)
Vladimir Davydov3965fc32014-01-23 15:52:55 -0800238 goto out_unlock;
Glauber Costa2633d7a2012-12-18 14:22:34 -0800239
Vladimir Davydov794b1242014-04-07 15:39:26 -0700240 cache_name = kstrdup(name, GFP_KERNEL);
241 if (!cache_name) {
242 err = -ENOMEM;
243 goto out_unlock;
244 }
Glauber Costa2633d7a2012-12-18 14:22:34 -0800245
Vladimir Davydov794b1242014-04-07 15:39:26 -0700246 s = do_kmem_cache_create(cache_name, size, size,
247 calculate_alignment(flags, align, size),
248 flags, ctor, NULL, NULL);
249 if (IS_ERR(s)) {
250 err = PTR_ERR(s);
251 kfree(cache_name);
252 }
Vladimir Davydov3965fc32014-01-23 15:52:55 -0800253
254out_unlock:
Christoph Lameter20cea962012-07-06 15:25:13 -0500255 mutex_unlock(&slab_mutex);
Vladimir Davydov03afc0e2014-06-04 16:07:20 -0700256
257 put_online_mems();
Christoph Lameter20cea962012-07-06 15:25:13 -0500258 put_online_cpus();
259
Dave Jonesba3253c72014-01-29 14:05:48 -0800260 if (err) {
Christoph Lameter686d5502012-09-05 00:20:33 +0000261 if (flags & SLAB_PANIC)
262 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
263 name, err);
264 else {
265 printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d",
266 name, err);
267 dump_stack();
268 }
Christoph Lameter686d5502012-09-05 00:20:33 +0000269 return NULL;
270 }
Christoph Lameter039363f2012-07-06 15:25:10 -0500271 return s;
Glauber Costa2633d7a2012-12-18 14:22:34 -0800272}
Christoph Lameter039363f2012-07-06 15:25:10 -0500273EXPORT_SYMBOL(kmem_cache_create);
Christoph Lameter97d06602012-07-06 15:25:11 -0500274
Vladimir Davydov794b1242014-04-07 15:39:26 -0700275#ifdef CONFIG_MEMCG_KMEM
276/*
Vladimir Davydov776ed0f2014-06-04 16:10:02 -0700277 * memcg_create_kmem_cache - Create a cache for a memory cgroup.
Vladimir Davydov794b1242014-04-07 15:39:26 -0700278 * @memcg: The memory cgroup the new cache is for.
279 * @root_cache: The parent of the new cache.
Vladimir Davydov073ee1c2014-06-04 16:08:23 -0700280 * @memcg_name: The name of the memory cgroup (used for naming the new cache).
Vladimir Davydov794b1242014-04-07 15:39:26 -0700281 *
282 * This function attempts to create a kmem cache that will serve allocation
283 * requests going from @memcg to @root_cache. The new cache inherits properties
284 * from its parent.
285 */
Vladimir Davydov776ed0f2014-06-04 16:10:02 -0700286struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
Vladimir Davydov073ee1c2014-06-04 16:08:23 -0700287 struct kmem_cache *root_cache,
288 const char *memcg_name)
Vladimir Davydov794b1242014-04-07 15:39:26 -0700289{
Vladimir Davydovbd673142014-06-04 16:07:40 -0700290 struct kmem_cache *s = NULL;
Vladimir Davydov794b1242014-04-07 15:39:26 -0700291 char *cache_name;
292
293 get_online_cpus();
Vladimir Davydov03afc0e2014-06-04 16:07:20 -0700294 get_online_mems();
295
Vladimir Davydov794b1242014-04-07 15:39:26 -0700296 mutex_lock(&slab_mutex);
297
Vladimir Davydov073ee1c2014-06-04 16:08:23 -0700298 cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
299 memcg_cache_id(memcg), memcg_name);
Vladimir Davydov794b1242014-04-07 15:39:26 -0700300 if (!cache_name)
301 goto out_unlock;
302
303 s = do_kmem_cache_create(cache_name, root_cache->object_size,
304 root_cache->size, root_cache->align,
305 root_cache->flags, root_cache->ctor,
306 memcg, root_cache);
Vladimir Davydovbd673142014-06-04 16:07:40 -0700307 if (IS_ERR(s)) {
Vladimir Davydov794b1242014-04-07 15:39:26 -0700308 kfree(cache_name);
Vladimir Davydovbd673142014-06-04 16:07:40 -0700309 s = NULL;
310 }
Vladimir Davydov794b1242014-04-07 15:39:26 -0700311
312out_unlock:
313 mutex_unlock(&slab_mutex);
Vladimir Davydov03afc0e2014-06-04 16:07:20 -0700314
315 put_online_mems();
Vladimir Davydov794b1242014-04-07 15:39:26 -0700316 put_online_cpus();
Vladimir Davydovbd673142014-06-04 16:07:40 -0700317
318 return s;
Vladimir Davydov794b1242014-04-07 15:39:26 -0700319}
Vladimir Davydovb8529902014-04-07 15:39:28 -0700320
Vladimir Davydov776ed0f2014-06-04 16:10:02 -0700321static int memcg_cleanup_cache_params(struct kmem_cache *s)
Vladimir Davydovb8529902014-04-07 15:39:28 -0700322{
323 int rc;
324
325 if (!s->memcg_params ||
326 !s->memcg_params->is_root_cache)
327 return 0;
328
329 mutex_unlock(&slab_mutex);
Vladimir Davydov776ed0f2014-06-04 16:10:02 -0700330 rc = __memcg_cleanup_cache_params(s);
Vladimir Davydovb8529902014-04-07 15:39:28 -0700331 mutex_lock(&slab_mutex);
332
333 return rc;
334}
335#else
Vladimir Davydov776ed0f2014-06-04 16:10:02 -0700336static int memcg_cleanup_cache_params(struct kmem_cache *s)
Vladimir Davydovb8529902014-04-07 15:39:28 -0700337{
338 return 0;
339}
Vladimir Davydov794b1242014-04-07 15:39:26 -0700340#endif /* CONFIG_MEMCG_KMEM */
341
Christoph Lameter41a21282014-05-06 12:50:08 -0700342void slab_kmem_cache_release(struct kmem_cache *s)
343{
344 kfree(s->name);
345 kmem_cache_free(kmem_cache, s);
346}
347
Christoph Lameter945cf2b2012-09-04 23:18:33 +0000348void kmem_cache_destroy(struct kmem_cache *s)
349{
350 get_online_cpus();
Vladimir Davydov03afc0e2014-06-04 16:07:20 -0700351 get_online_mems();
352
Christoph Lameter945cf2b2012-09-04 23:18:33 +0000353 mutex_lock(&slab_mutex);
Vladimir Davydovb8529902014-04-07 15:39:28 -0700354
Christoph Lameter945cf2b2012-09-04 23:18:33 +0000355 s->refcount--;
Vladimir Davydovb8529902014-04-07 15:39:28 -0700356 if (s->refcount)
357 goto out_unlock;
Christoph Lameter945cf2b2012-09-04 23:18:33 +0000358
Vladimir Davydov776ed0f2014-06-04 16:10:02 -0700359 if (memcg_cleanup_cache_params(s) != 0)
Vladimir Davydovb8529902014-04-07 15:39:28 -0700360 goto out_unlock;
Christoph Lameter945cf2b2012-09-04 23:18:33 +0000361
Vladimir Davydovb8529902014-04-07 15:39:28 -0700362 if (__kmem_cache_shutdown(s) != 0) {
Vladimir Davydovb8529902014-04-07 15:39:28 -0700363 printk(KERN_ERR "kmem_cache_destroy %s: "
364 "Slab cache still has objects\n", s->name);
365 dump_stack();
366 goto out_unlock;
Christoph Lameter945cf2b2012-09-04 23:18:33 +0000367 }
Vladimir Davydovb8529902014-04-07 15:39:28 -0700368
Vladimir Davydov0bd62b12014-06-04 16:10:03 -0700369 list_del(&s->list);
370
Vladimir Davydovb8529902014-04-07 15:39:28 -0700371 mutex_unlock(&slab_mutex);
372 if (s->flags & SLAB_DESTROY_BY_RCU)
373 rcu_barrier();
374
375 memcg_free_cache_params(s);
Christoph Lameter41a21282014-05-06 12:50:08 -0700376#ifdef SLAB_SUPPORTS_SYSFS
377 sysfs_slab_remove(s);
378#else
379 slab_kmem_cache_release(s);
380#endif
Vladimir Davydov03afc0e2014-06-04 16:07:20 -0700381 goto out;
Vladimir Davydovb8529902014-04-07 15:39:28 -0700382
383out_unlock:
384 mutex_unlock(&slab_mutex);
Vladimir Davydov03afc0e2014-06-04 16:07:20 -0700385out:
386 put_online_mems();
Christoph Lameter945cf2b2012-09-04 23:18:33 +0000387 put_online_cpus();
388}
389EXPORT_SYMBOL(kmem_cache_destroy);
390
Vladimir Davydov03afc0e2014-06-04 16:07:20 -0700391/**
392 * kmem_cache_shrink - Shrink a cache.
393 * @cachep: The cache to shrink.
394 *
395 * Releases as many slabs as possible for a cache.
396 * To help debugging, a zero exit status indicates all slabs were released.
397 */
398int kmem_cache_shrink(struct kmem_cache *cachep)
399{
400 int ret;
401
402 get_online_cpus();
403 get_online_mems();
404 ret = __kmem_cache_shrink(cachep);
405 put_online_mems();
406 put_online_cpus();
407 return ret;
408}
409EXPORT_SYMBOL(kmem_cache_shrink);
410
Christoph Lameter97d06602012-07-06 15:25:11 -0500411int slab_is_available(void)
412{
413 return slab_state >= UP;
414}
Glauber Costab7454ad2012-10-19 18:20:25 +0400415
Christoph Lameter45530c42012-11-28 16:23:07 +0000416#ifndef CONFIG_SLOB
417/* Create a cache during boot when no slab services are available yet */
418void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
419 unsigned long flags)
420{
421 int err;
422
423 s->name = name;
424 s->size = s->object_size = size;
Christoph Lameter45906852012-11-28 16:23:16 +0000425 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
Christoph Lameter45530c42012-11-28 16:23:07 +0000426 err = __kmem_cache_create(s, flags);
427
428 if (err)
Christoph Lameter31ba7342013-01-10 19:00:53 +0000429 panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
Christoph Lameter45530c42012-11-28 16:23:07 +0000430 name, size, err);
431
432 s->refcount = -1; /* Exempt from merging for now */
433}
434
435struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
436 unsigned long flags)
437{
438 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
439
440 if (!s)
441 panic("Out of memory when creating slab %s\n", name);
442
443 create_boot_cache(s, name, size, flags);
444 list_add(&s->list, &slab_caches);
445 s->refcount = 1;
446 return s;
447}
448
Christoph Lameter9425c582013-01-10 19:12:17 +0000449struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
450EXPORT_SYMBOL(kmalloc_caches);
451
452#ifdef CONFIG_ZONE_DMA
453struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
454EXPORT_SYMBOL(kmalloc_dma_caches);
455#endif
456
Christoph Lameterf97d5f62013-01-10 19:12:17 +0000457/*
Christoph Lameter2c59dd62013-01-10 19:14:19 +0000458 * Conversion table for small slabs sizes / 8 to the index in the
459 * kmalloc array. This is necessary for slabs < 192 since we have non power
460 * of two cache sizes there. The size of larger slabs can be determined using
461 * fls.
462 */
463static s8 size_index[24] = {
464 3, /* 8 */
465 4, /* 16 */
466 5, /* 24 */
467 5, /* 32 */
468 6, /* 40 */
469 6, /* 48 */
470 6, /* 56 */
471 6, /* 64 */
472 1, /* 72 */
473 1, /* 80 */
474 1, /* 88 */
475 1, /* 96 */
476 7, /* 104 */
477 7, /* 112 */
478 7, /* 120 */
479 7, /* 128 */
480 2, /* 136 */
481 2, /* 144 */
482 2, /* 152 */
483 2, /* 160 */
484 2, /* 168 */
485 2, /* 176 */
486 2, /* 184 */
487 2 /* 192 */
488};
489
490static inline int size_index_elem(size_t bytes)
491{
492 return (bytes - 1) / 8;
493}
494
495/*
496 * Find the kmem_cache structure that serves a given size of
497 * allocation
498 */
499struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
500{
501 int index;
502
Joonsoo Kim9de1bc82013-08-02 11:02:42 +0900503 if (unlikely(size > KMALLOC_MAX_SIZE)) {
Sasha Levin907985f2013-06-10 15:18:00 -0400504 WARN_ON_ONCE(!(flags & __GFP_NOWARN));
Christoph Lameter6286ae92013-05-03 15:43:18 +0000505 return NULL;
Sasha Levin907985f2013-06-10 15:18:00 -0400506 }
Christoph Lameter6286ae92013-05-03 15:43:18 +0000507
Christoph Lameter2c59dd62013-01-10 19:14:19 +0000508 if (size <= 192) {
509 if (!size)
510 return ZERO_SIZE_PTR;
511
512 index = size_index[size_index_elem(size)];
513 } else
514 index = fls(size - 1);
515
516#ifdef CONFIG_ZONE_DMA
Joonsoo Kimb1e05412013-02-04 23:46:46 +0900517 if (unlikely((flags & GFP_DMA)))
Christoph Lameter2c59dd62013-01-10 19:14:19 +0000518 return kmalloc_dma_caches[index];
519
520#endif
521 return kmalloc_caches[index];
522}
523
524/*
Christoph Lameterf97d5f62013-01-10 19:12:17 +0000525 * Create the kmalloc array. Some of the regular kmalloc arrays
526 * may already have been created because they were needed to
527 * enable allocations for slab creation.
528 */
529void __init create_kmalloc_caches(unsigned long flags)
530{
531 int i;
532
Christoph Lameter2c59dd62013-01-10 19:14:19 +0000533 /*
534 * Patch up the size_index table if we have strange large alignment
535 * requirements for the kmalloc array. This is only the case for
536 * MIPS it seems. The standard arches will not generate any code here.
537 *
538 * Largest permitted alignment is 256 bytes due to the way we
539 * handle the index determination for the smaller caches.
540 *
541 * Make sure that nothing crazy happens if someone starts tinkering
542 * around with ARCH_KMALLOC_MINALIGN
543 */
544 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
545 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
546
547 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
548 int elem = size_index_elem(i);
549
550 if (elem >= ARRAY_SIZE(size_index))
551 break;
552 size_index[elem] = KMALLOC_SHIFT_LOW;
553 }
554
555 if (KMALLOC_MIN_SIZE >= 64) {
556 /*
557 * The 96 byte size cache is not used if the alignment
558 * is 64 byte.
559 */
560 for (i = 64 + 8; i <= 96; i += 8)
561 size_index[size_index_elem(i)] = 7;
562
563 }
564
565 if (KMALLOC_MIN_SIZE >= 128) {
566 /*
567 * The 192 byte sized cache is not used if the alignment
568 * is 128 byte. Redirect kmalloc to use the 256 byte cache
569 * instead.
570 */
571 for (i = 128 + 8; i <= 192; i += 8)
572 size_index[size_index_elem(i)] = 8;
573 }
Christoph Lameter8a965b32013-05-03 18:04:18 +0000574 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
575 if (!kmalloc_caches[i]) {
Christoph Lameterf97d5f62013-01-10 19:12:17 +0000576 kmalloc_caches[i] = create_kmalloc_cache(NULL,
577 1 << i, flags);
Christoph Lameter8a965b32013-05-03 18:04:18 +0000578 }
Chris Mason956e46e2013-05-08 15:56:28 -0400579
580 /*
581 * Caches that are not of the two-to-the-power-of size.
582 * These have to be created immediately after the
583 * earlier power of two caches
584 */
585 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
586 kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags);
587
588 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
589 kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags);
Christoph Lameter8a965b32013-05-03 18:04:18 +0000590 }
591
Christoph Lameterf97d5f62013-01-10 19:12:17 +0000592 /* Kmalloc array is now usable */
593 slab_state = UP;
594
595 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
596 struct kmem_cache *s = kmalloc_caches[i];
597 char *n;
598
599 if (s) {
600 n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i));
601
602 BUG_ON(!n);
603 s->name = n;
604 }
605 }
606
607#ifdef CONFIG_ZONE_DMA
608 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
609 struct kmem_cache *s = kmalloc_caches[i];
610
611 if (s) {
612 int size = kmalloc_size(i);
613 char *n = kasprintf(GFP_NOWAIT,
614 "dma-kmalloc-%d", size);
615
616 BUG_ON(!n);
617 kmalloc_dma_caches[i] = create_kmalloc_cache(n,
618 size, SLAB_CACHE_DMA | flags);
619 }
620 }
621#endif
622}
Christoph Lameter45530c42012-11-28 16:23:07 +0000623#endif /* !CONFIG_SLOB */
624
Vladimir Davydovcea371f2014-06-04 16:07:04 -0700625/*
626 * To avoid unnecessary overhead, we pass through large allocation requests
627 * directly to the page allocator. We use __GFP_COMP, because we will need to
628 * know the allocation order to free the pages properly in kfree.
629 */
Vladimir Davydov52383432014-06-04 16:06:39 -0700630void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
631{
632 void *ret;
633 struct page *page;
634
635 flags |= __GFP_COMP;
636 page = alloc_kmem_pages(flags, order);
637 ret = page ? page_address(page) : NULL;
638 kmemleak_alloc(ret, size, 1, flags);
639 return ret;
640}
641EXPORT_SYMBOL(kmalloc_order);
642
Christoph Lameterf1b6eb62013-09-04 16:35:34 +0000643#ifdef CONFIG_TRACING
644void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
645{
646 void *ret = kmalloc_order(size, flags, order);
647 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
648 return ret;
649}
650EXPORT_SYMBOL(kmalloc_order_trace);
651#endif
Christoph Lameter45530c42012-11-28 16:23:07 +0000652
Glauber Costab7454ad2012-10-19 18:20:25 +0400653#ifdef CONFIG_SLABINFO
Wanpeng Lie9b4db22013-07-04 08:33:24 +0800654
655#ifdef CONFIG_SLAB
656#define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR)
657#else
658#define SLABINFO_RIGHTS S_IRUSR
659#endif
660
Glauber Costa749c5412012-12-18 14:23:01 -0800661void print_slabinfo_header(struct seq_file *m)
Glauber Costabcee6e22012-10-19 18:20:26 +0400662{
663 /*
664 * Output format version, so at least we can change it
665 * without _too_ many complaints.
666 */
667#ifdef CONFIG_DEBUG_SLAB
668 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
669#else
670 seq_puts(m, "slabinfo - version: 2.1\n");
671#endif
672 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
673 "<objperslab> <pagesperslab>");
674 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
675 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
676#ifdef CONFIG_DEBUG_SLAB
677 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
678 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
679 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
680#endif
681 seq_putc(m, '\n');
682}
683
Glauber Costab7454ad2012-10-19 18:20:25 +0400684static void *s_start(struct seq_file *m, loff_t *pos)
685{
686 loff_t n = *pos;
687
688 mutex_lock(&slab_mutex);
689 if (!n)
690 print_slabinfo_header(m);
691
692 return seq_list_start(&slab_caches, *pos);
693}
694
Wanpeng Li276a2432013-07-08 08:08:28 +0800695void *slab_next(struct seq_file *m, void *p, loff_t *pos)
Glauber Costab7454ad2012-10-19 18:20:25 +0400696{
697 return seq_list_next(p, &slab_caches, pos);
698}
699
Wanpeng Li276a2432013-07-08 08:08:28 +0800700void slab_stop(struct seq_file *m, void *p)
Glauber Costab7454ad2012-10-19 18:20:25 +0400701{
702 mutex_unlock(&slab_mutex);
703}
704
Glauber Costa749c5412012-12-18 14:23:01 -0800705static void
706memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
Glauber Costab7454ad2012-10-19 18:20:25 +0400707{
Glauber Costa749c5412012-12-18 14:23:01 -0800708 struct kmem_cache *c;
709 struct slabinfo sinfo;
710 int i;
711
712 if (!is_root_cache(s))
713 return;
714
715 for_each_memcg_cache_index(i) {
Qiang Huang2ade4de2013-11-12 15:08:23 -0800716 c = cache_from_memcg_idx(s, i);
Glauber Costa749c5412012-12-18 14:23:01 -0800717 if (!c)
718 continue;
719
720 memset(&sinfo, 0, sizeof(sinfo));
721 get_slabinfo(c, &sinfo);
722
723 info->active_slabs += sinfo.active_slabs;
724 info->num_slabs += sinfo.num_slabs;
725 info->shared_avail += sinfo.shared_avail;
726 info->active_objs += sinfo.active_objs;
727 info->num_objs += sinfo.num_objs;
728 }
729}
730
731int cache_show(struct kmem_cache *s, struct seq_file *m)
732{
Glauber Costa0d7561c2012-10-19 18:20:27 +0400733 struct slabinfo sinfo;
734
735 memset(&sinfo, 0, sizeof(sinfo));
736 get_slabinfo(s, &sinfo);
737
Glauber Costa749c5412012-12-18 14:23:01 -0800738 memcg_accumulate_slabinfo(s, &sinfo);
739
Glauber Costa0d7561c2012-10-19 18:20:27 +0400740 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
Glauber Costa749c5412012-12-18 14:23:01 -0800741 cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
Glauber Costa0d7561c2012-10-19 18:20:27 +0400742 sinfo.objects_per_slab, (1 << sinfo.cache_order));
743
744 seq_printf(m, " : tunables %4u %4u %4u",
745 sinfo.limit, sinfo.batchcount, sinfo.shared);
746 seq_printf(m, " : slabdata %6lu %6lu %6lu",
747 sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
748 slabinfo_show_stats(m, s);
749 seq_putc(m, '\n');
750 return 0;
Glauber Costab7454ad2012-10-19 18:20:25 +0400751}
752
Glauber Costa749c5412012-12-18 14:23:01 -0800753static int s_show(struct seq_file *m, void *p)
754{
755 struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
756
757 if (!is_root_cache(s))
758 return 0;
759 return cache_show(s, m);
760}
761
Glauber Costab7454ad2012-10-19 18:20:25 +0400762/*
763 * slabinfo_op - iterator that generates /proc/slabinfo
764 *
765 * Output layout:
766 * cache-name
767 * num-active-objs
768 * total-objs
769 * object size
770 * num-active-slabs
771 * total-slabs
772 * num-pages-per-slab
773 * + further values on SMP and with statistics enabled
774 */
775static const struct seq_operations slabinfo_op = {
776 .start = s_start,
Wanpeng Li276a2432013-07-08 08:08:28 +0800777 .next = slab_next,
778 .stop = slab_stop,
Glauber Costab7454ad2012-10-19 18:20:25 +0400779 .show = s_show,
780};
781
782static int slabinfo_open(struct inode *inode, struct file *file)
783{
784 return seq_open(file, &slabinfo_op);
785}
786
787static const struct file_operations proc_slabinfo_operations = {
788 .open = slabinfo_open,
789 .read = seq_read,
790 .write = slabinfo_write,
791 .llseek = seq_lseek,
792 .release = seq_release,
793};
794
795static int __init slab_proc_init(void)
796{
Wanpeng Lie9b4db22013-07-04 08:33:24 +0800797 proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
798 &proc_slabinfo_operations);
Glauber Costab7454ad2012-10-19 18:20:25 +0400799 return 0;
800}
801module_init(slab_proc_init);
802#endif /* CONFIG_SLABINFO */
Andrey Ryabinin928cec92014-08-06 16:04:44 -0700803
804static __always_inline void *__do_krealloc(const void *p, size_t new_size,
805 gfp_t flags)
806{
807 void *ret;
808 size_t ks = 0;
809
810 if (p)
811 ks = ksize(p);
812
813 if (ks >= new_size)
814 return (void *)p;
815
816 ret = kmalloc_track_caller(new_size, flags);
817 if (ret && p)
818 memcpy(ret, p, ks);
819
820 return ret;
821}
822
823/**
824 * __krealloc - like krealloc() but don't free @p.
825 * @p: object to reallocate memory for.
826 * @new_size: how many bytes of memory are required.
827 * @flags: the type of memory to allocate.
828 *
829 * This function is like krealloc() except it never frees the originally
830 * allocated buffer. Use this if you don't want to free the buffer immediately
831 * like, for example, with RCU.
832 */
833void *__krealloc(const void *p, size_t new_size, gfp_t flags)
834{
835 if (unlikely(!new_size))
836 return ZERO_SIZE_PTR;
837
838 return __do_krealloc(p, new_size, flags);
839
840}
841EXPORT_SYMBOL(__krealloc);
842
843/**
844 * krealloc - reallocate memory. The contents will remain unchanged.
845 * @p: object to reallocate memory for.
846 * @new_size: how many bytes of memory are required.
847 * @flags: the type of memory to allocate.
848 *
849 * The contents of the object pointed to are preserved up to the
850 * lesser of the new and old sizes. If @p is %NULL, krealloc()
851 * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
852 * %NULL pointer, the object pointed to is freed.
853 */
854void *krealloc(const void *p, size_t new_size, gfp_t flags)
855{
856 void *ret;
857
858 if (unlikely(!new_size)) {
859 kfree(p);
860 return ZERO_SIZE_PTR;
861 }
862
863 ret = __do_krealloc(p, new_size, flags);
864 if (ret && p != ret)
865 kfree(p);
866
867 return ret;
868}
869EXPORT_SYMBOL(krealloc);
870
871/**
872 * kzfree - like kfree but zero memory
873 * @p: object to free memory of
874 *
875 * The memory of the object @p points to is zeroed before freed.
876 * If @p is %NULL, kzfree() does nothing.
877 *
878 * Note: this function zeroes the whole allocated buffer which can be a good
879 * deal bigger than the requested buffer size passed to kmalloc(). So be
880 * careful when using this function in performance sensitive code.
881 */
882void kzfree(const void *p)
883{
884 size_t ks;
885 void *mem = (void *)p;
886
887 if (unlikely(ZERO_OR_NULL_PTR(mem)))
888 return;
889 ks = ksize(mem);
890 memset(mem, 0, ks);
891 kfree(mem);
892}
893EXPORT_SYMBOL(kzfree);
894
895/* Tracepoints definitions. */
896EXPORT_TRACEPOINT_SYMBOL(kmalloc);
897EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
898EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
899EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
900EXPORT_TRACEPOINT_SYMBOL(kfree);
901EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);