Blame - mm/slub.c - kernel/msm-5.4

blob: 4251917c5da1c123a478ec86159aa32081a7140c [file] [log] [blame]

Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1	/*
				2	* SLUB: A slab allocator that limits cache line use instead of queuing
				3	* objects in per cpu and per node lists.
				4	*
				5	* The allocator synchronizes using per slab locks and only
				6	* uses a centralized lock to manage a pool of partial slabs.
				7	*
				8	* (C) 2007 SGI, Christoph Lameter <clameter@sgi.com>
				9	*/
				10
				11	#include <linux/mm.h>
				12	#include <linux/module.h>
				13	#include <linux/bit_spinlock.h>
				14	#include <linux/interrupt.h>
				15	#include <linux/bitops.h>
				16	#include <linux/slab.h>
				17	#include <linux/seq_file.h>
				18	#include <linux/cpu.h>
				19	#include <linux/cpuset.h>
				20	#include <linux/mempolicy.h>
				21	#include <linux/ctype.h>
				22	#include <linux/kallsyms.h>
				23
				24	/*
				25	* Lock order:
				26	* 1. slab_lock(page)
				27	* 2. slab->list_lock
				28	*
				29	* The slab_lock protects operations on the object of a particular
				30	* slab and its metadata in the page struct. If the slab lock
				31	* has been taken then no allocations nor frees can be performed
				32	* on the objects in the slab nor can the slab be added or removed
				33	* from the partial or full lists since this would mean modifying
				34	* the page_struct of the slab.
				35	*
				36	* The list_lock protects the partial and full list on each node and
				37	* the partial slab counter. If taken then no new slabs may be added or
				38	* removed from the lists nor make the number of partial slabs be modified.
				39	* (Note that the total number of slabs is an atomic value that may be
				40	* modified without taking the list lock).
				41	*
				42	* The list_lock is a centralized lock and thus we avoid taking it as
				43	* much as possible. As long as SLUB does not have to handle partial
				44	* slabs, operations can continue without any centralized lock. F.e.
				45	* allocating a long series of objects that fill up slabs does not require
				46	* the list lock.
				47	*
				48	* The lock order is sometimes inverted when we are trying to get a slab
				49	* off a list. We take the list_lock and then look for a page on the list
				50	* to use. While we do that objects in the slabs may be freed. We can
				51	* only operate on the slab if we have also taken the slab_lock. So we use
				52	* a slab_trylock() on the slab. If trylock was successful then no frees
				53	* can occur anymore and we can use the slab for allocations etc. If the
				54	* slab_trylock() does not succeed then frees are in progress in the slab and
				55	* we must stay away from it for a while since we may cause a bouncing
				56	* cacheline if we try to acquire the lock. So go onto the next slab.
				57	* If all pages are busy then we may allocate a new slab instead of reusing
				58	* a partial slab. A new slab has noone operating on it and thus there is
				59	* no danger of cacheline contention.
				60	*
				61	* Interrupts are disabled during allocation and deallocation in order to
				62	* make the slab allocator safe to use in the context of an irq. In addition
				63	* interrupts are disabled to ensure that the processor does not change
				64	* while handling per_cpu slabs, due to kernel preemption.
				65	*
				66	* SLUB assigns one slab for allocation to each processor.
				67	* Allocations only occur from these slabs called cpu slabs.
				68	*
				69	* Slabs with free elements are kept on a partial list.
				70	* There is no list for full slabs. If an object in a full slab is
				71	* freed then the slab will show up again on the partial lists.
				72	* Otherwise there is no need to track full slabs unless we have to
				73	* track full slabs for debugging purposes.
				74	*
				75	* Slabs are freed when they become empty. Teardown and setup is
				76	* minimal so we rely on the page allocators per cpu caches for
				77	* fast frees and allocs.
				78	*
				79	* Overloading of page flags that are otherwise used for LRU management.
				80	*
				81	* PageActive The slab is used as a cpu cache. Allocations
				82	* may be performed from the slab. The slab is not
				83	* on any slab list and cannot be moved onto one.
				84	*
				85	* PageError Slab requires special handling due to debug
				86	* options set. This moves slab handling out of
				87	* the fast path.
				88	*/
				89
				90	/*
				91	* Issues still to be resolved:
				92	*
				93	* - The per cpu array is updated for each new slab and and is a remote
				94	* cacheline for most nodes. This could become a bouncing cacheline given
				95	* enough frequent updates. There are 16 pointers in a cacheline.so at
				96	* max 16 cpus could compete. Likely okay.
				97	*
				98	* - Support PAGE_ALLOC_DEBUG. Should be easy to do.
				99	*
				100	* - Support DEBUG_SLAB_LEAK. Trouble is we do not know where the full
				101	* slabs are in SLUB.
				102	*
				103	* - SLAB_DEBUG_INITIAL is not supported but I have never seen a use of
				104	* it.
				105	*
				106	* - Variable sizing of the per node arrays
				107	*/
				108
				109	/* Enable to test recovery from slab corruption on boot */
				110	#undef SLUB_RESILIENCY_TEST
				111
				112	#if PAGE_SHIFT <= 12
				113
				114	/*
				115	* Small page size. Make sure that we do not fragment memory
				116	*/
				117	#define DEFAULT_MAX_ORDER 1
				118	#define DEFAULT_MIN_OBJECTS 4
				119
				120	#else
				121
				122	/*
				123	* Large page machines are customarily able to handle larger
				124	* page orders.
				125	*/
				126	#define DEFAULT_MAX_ORDER 2
				127	#define DEFAULT_MIN_OBJECTS 8
				128
				129	#endif
				130
				131	/*
				132	* Flags from the regular SLAB that SLUB does not support:
				133	*/
				134	#define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL)
				135
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	136	/* Mininum number of partial slabs */
				137	#define MIN_PARTIAL 2
				138
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	139	#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE \| SLAB_RED_ZONE \| \
				140	SLAB_POISON \| SLAB_STORE_USER)
				141	/*
				142	* Set of flags that will prevent slab merging
				143	*/
				144	#define SLUB_NEVER_MERGE (SLAB_RED_ZONE \| SLAB_POISON \| SLAB_STORE_USER \| \
				145	SLAB_TRACE \| SLAB_DESTROY_BY_RCU)
				146
				147	#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE \| SLAB_RECLAIM_ACCOUNT \| \
				148	SLAB_CACHE_DMA)
				149
				150	#ifndef ARCH_KMALLOC_MINALIGN
Christoph Lameter	47bfdc0	2007-05-06 14:49:37 -0700	[diff] [blame]	151	#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	152	#endif
				153
				154	#ifndef ARCH_SLAB_MINALIGN
Christoph Lameter	47bfdc0	2007-05-06 14:49:37 -0700	[diff] [blame]	155	#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	156	#endif
				157
				158	/* Internal SLUB flags */
				159	#define __OBJECT_POISON 0x80000000 /* Poison object */
				160
				161	static int kmem_size = sizeof(struct kmem_cache);
				162
				163	#ifdef CONFIG_SMP
				164	static struct notifier_block slab_notifier;
				165	#endif
				166
				167	static enum {
				168	DOWN, /* No slab functionality available */
				169	PARTIAL, /* kmem_cache_open() works but kmalloc does not */
				170	UP, /* Everything works */
				171	SYSFS /* Sysfs up */
				172	} slab_state = DOWN;
				173
				174	/* A list of all slab caches on the system */
				175	static DECLARE_RWSEM(slub_lock);
				176	LIST_HEAD(slab_caches);
				177
				178	#ifdef CONFIG_SYSFS
				179	static int sysfs_slab_add(struct kmem_cache *);
				180	static int sysfs_slab_alias(struct kmem_cache , const char );
				181	static void sysfs_slab_remove(struct kmem_cache *);
				182	#else
				183	static int sysfs_slab_add(struct kmem_cache *s) { return 0; }
				184	static int sysfs_slab_alias(struct kmem_cache s, const char p) { return 0; }
				185	static void sysfs_slab_remove(struct kmem_cache *s) {}
				186	#endif
				187
				188	/********************************************************************
				189	* Core slab cache functions
				190	*******************************************************************/
				191
				192	int slab_is_available(void)
				193	{
				194	return slab_state >= UP;
				195	}
				196
				197	static inline struct kmem_cache_node get_node(struct kmem_cache s, int node)
				198	{
				199	#ifdef CONFIG_NUMA
				200	return s->node[node];
				201	#else
				202	return &s->local_node;
				203	#endif
				204	}
				205
				206	/*
				207	* Object debugging
				208	*/
				209	static void print_section(char text, u8 addr, unsigned int length)
				210	{
				211	int i, offset;
				212	int newline = 1;
				213	char ascii[17];
				214
				215	ascii[16] = 0;
				216
				217	for (i = 0; i < length; i++) {
				218	if (newline) {
				219	printk(KERN_ERR "%10s 0x%p: ", text, addr + i);
				220	newline = 0;
				221	}
				222	printk(" %02x", addr[i]);
				223	offset = i % 16;
				224	ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
				225	if (offset == 15) {
				226	printk(" %s\n",ascii);
				227	newline = 1;
				228	}
				229	}
				230	if (!newline) {
				231	i %= 16;
				232	while (i < 16) {
				233	printk(" ");
				234	ascii[i] = ' ';
				235	i++;
				236	}
				237	printk(" %s\n", ascii);
				238	}
				239	}
				240
				241	/*
				242	* Slow version of get and set free pointer.
				243	*
				244	* This requires touching the cache lines of kmem_cache.
				245	* The offset can also be obtained from the page. In that
				246	* case it is in the cacheline that we already need to touch.
				247	*/
				248	static void get_freepointer(struct kmem_cache s, void *object)
				249	{
				250	return (void *)(object + s->offset);
				251	}
				252
				253	static void set_freepointer(struct kmem_cache s, void object, void *fp)
				254	{
				255	(void *)(object + s->offset) = fp;
				256	}
				257
				258	/*
				259	* Tracking user of a slab.
				260	*/
				261	struct track {
				262	void addr; / Called from address */
				263	int cpu; /* Was running on cpu */
				264	int pid; /* Pid context */
				265	unsigned long when; /* When did the operation occur */
				266	};
				267
				268	enum track_item { TRACK_ALLOC, TRACK_FREE };
				269
				270	static struct track get_track(struct kmem_cache s, void *object,
				271	enum track_item alloc)
				272	{
				273	struct track *p;
				274
				275	if (s->offset)
				276	p = object + s->offset + sizeof(void *);
				277	else
				278	p = object + s->inuse;
				279
				280	return p + alloc;
				281	}
				282
				283	static void set_track(struct kmem_cache s, void object,
				284	enum track_item alloc, void *addr)
				285	{
				286	struct track *p;
				287
				288	if (s->offset)
				289	p = object + s->offset + sizeof(void *);
				290	else
				291	p = object + s->inuse;
				292
				293	p += alloc;
				294	if (addr) {
				295	p->addr = addr;
				296	p->cpu = smp_processor_id();
				297	p->pid = current ? current->pid : -1;
				298	p->when = jiffies;
				299	} else
				300	memset(p, 0, sizeof(struct track));
				301	}
				302
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	303	static void init_tracking(struct kmem_cache s, void object)
				304	{
				305	if (s->flags & SLAB_STORE_USER) {
				306	set_track(s, object, TRACK_FREE, NULL);
				307	set_track(s, object, TRACK_ALLOC, NULL);
				308	}
				309	}
				310
				311	static void print_track(const char s, struct track t)
				312	{
				313	if (!t->addr)
				314	return;
				315
				316	printk(KERN_ERR "%s: ", s);
				317	__print_symbol("%s", (unsigned long)t->addr);
				318	printk(" jiffies_ago=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid);
				319	}
				320
				321	static void print_trailer(struct kmem_cache s, u8 p)
				322	{
				323	unsigned int off; /* Offset of last byte */
				324
				325	if (s->flags & SLAB_RED_ZONE)
				326	print_section("Redzone", p + s->objsize,
				327	s->inuse - s->objsize);
				328
				329	printk(KERN_ERR "FreePointer 0x%p -> 0x%p\n",
				330	p + s->offset,
				331	get_freepointer(s, p));
				332
				333	if (s->offset)
				334	off = s->offset + sizeof(void *);
				335	else
				336	off = s->inuse;
				337
				338	if (s->flags & SLAB_STORE_USER) {
				339	print_track("Last alloc", get_track(s, p, TRACK_ALLOC));
				340	print_track("Last free ", get_track(s, p, TRACK_FREE));
				341	off += 2 * sizeof(struct track);
				342	}
				343
				344	if (off != s->size)
				345	/* Beginning of the filler is the free pointer */
				346	print_section("Filler", p + off, s->size - off);
				347	}
				348
				349	static void object_err(struct kmem_cache s, struct page page,
				350	u8 object, char reason)
				351	{
				352	u8 *addr = page_address(page);
				353
				354	printk(KERN_ERR "*** SLUB %s: %s@0x%p slab 0x%p\n",
				355	s->name, reason, object, page);
				356	printk(KERN_ERR " offset=%tu flags=0x%04lx inuse=%u freelist=0x%p\n",
				357	object - addr, page->flags, page->inuse, page->freelist);
				358	if (object > addr + 16)
				359	print_section("Bytes b4", object - 16, 16);
				360	print_section("Object", object, min(s->objsize, 128));
				361	print_trailer(s, object);
				362	dump_stack();
				363	}
				364
				365	static void slab_err(struct kmem_cache s, struct page page, char *reason, ...)
				366	{
				367	va_list args;
				368	char buf[100];
				369
				370	va_start(args, reason);
				371	vsnprintf(buf, sizeof(buf), reason, args);
				372	va_end(args);
				373	printk(KERN_ERR "*** SLUB %s: %s in slab @0x%p\n", s->name, buf,
				374	page);
				375	dump_stack();
				376	}
				377
				378	static void init_object(struct kmem_cache s, void object, int active)
				379	{
				380	u8 *p = object;
				381
				382	if (s->flags & __OBJECT_POISON) {
				383	memset(p, POISON_FREE, s->objsize - 1);
				384	p[s->objsize -1] = POISON_END;
				385	}
				386
				387	if (s->flags & SLAB_RED_ZONE)
				388	memset(p + s->objsize,
				389	active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE,
				390	s->inuse - s->objsize);
				391	}
				392
				393	static int check_bytes(u8 *start, unsigned int value, unsigned int bytes)
				394	{
				395	while (bytes) {
				396	if (*start != (u8)value)
				397	return 0;
				398	start++;
				399	bytes--;
				400	}
				401	return 1;
				402	}
				403
				404
				405	static int check_valid_pointer(struct kmem_cache s, struct page page,
				406	void *object)
				407	{
				408	void *base;
				409
				410	if (!object)
				411	return 1;
				412
				413	base = page_address(page);
				414	if (object < base \|\| object >= base + s->objects * s->size \|\|
				415	(object - base) % s->size) {
				416	return 0;
				417	}
				418
				419	return 1;
				420	}
				421
				422	/*
				423	* Object layout:
				424	*
				425	* object address
				426	* Bytes of the object to be managed.
				427	* If the freepointer may overlay the object then the free
				428	* pointer is the first word of the object.
				429	* Poisoning uses 0x6b (POISON_FREE) and the last byte is
				430	* 0xa5 (POISON_END)
				431	*
				432	* object + s->objsize
				433	* Padding to reach word boundary. This is also used for Redzoning.
				434	* Padding is extended to word size if Redzoning is enabled
				435	* and objsize == inuse.
				436	* We fill with 0xbb (RED_INACTIVE) for inactive objects and with
				437	* 0xcc (RED_ACTIVE) for objects in use.
				438	*
				439	* object + s->inuse
				440	* A. Free pointer (if we cannot overwrite object on free)
				441	* B. Tracking data for SLAB_STORE_USER
				442	* C. Padding to reach required alignment boundary
				443	* Padding is done using 0x5a (POISON_INUSE)
				444	*
				445	* object + s->size
				446	*
				447	* If slabcaches are merged then the objsize and inuse boundaries are to
				448	* be ignored. And therefore no slab options that rely on these boundaries
				449	* may be used with merged slabcaches.
				450	*/
				451
				452	static void restore_bytes(struct kmem_cache s, char message, u8 data,
				453	void from, void to)
				454	{
				455	printk(KERN_ERR "@@@ SLUB: %s Restoring %s (0x%x) from 0x%p-0x%p\n",
				456	s->name, message, data, from, to - 1);
				457	memset(from, data, to - from);
				458	}
				459
				460	static int check_pad_bytes(struct kmem_cache s, struct page page, u8 *p)
				461	{
				462	unsigned long off = s->inuse; /* The end of info */
				463
				464	if (s->offset)
				465	/* Freepointer is placed after the object. */
				466	off += sizeof(void *);
				467
				468	if (s->flags & SLAB_STORE_USER)
				469	/* We also have user information there */
				470	off += 2 * sizeof(struct track);
				471
				472	if (s->size == off)
				473	return 1;
				474
				475	if (check_bytes(p + off, POISON_INUSE, s->size - off))
				476	return 1;
				477
				478	object_err(s, page, p, "Object padding check fails");
				479
				480	/*
				481	* Restore padding
				482	*/
				483	restore_bytes(s, "object padding", POISON_INUSE, p + off, p + s->size);
				484	return 0;
				485	}
				486
				487	static int slab_pad_check(struct kmem_cache s, struct page page)
				488	{
				489	u8 *p;
				490	int length, remainder;
				491
				492	if (!(s->flags & SLAB_POISON))
				493	return 1;
				494
				495	p = page_address(page);
				496	length = s->objects * s->size;
				497	remainder = (PAGE_SIZE << s->order) - length;
				498	if (!remainder)
				499	return 1;
				500
				501	if (!check_bytes(p + length, POISON_INUSE, remainder)) {
				502	printk(KERN_ERR "SLUB: %s slab 0x%p: Padding fails check\n",
				503	s->name, p);
				504	dump_stack();
				505	restore_bytes(s, "slab padding", POISON_INUSE, p + length,
				506	p + length + remainder);
				507	return 0;
				508	}
				509	return 1;
				510	}
				511
				512	static int check_object(struct kmem_cache s, struct page page,
				513	void *object, int active)
				514	{
				515	u8 *p = object;
				516	u8 *endobject = object + s->objsize;
				517
				518	if (s->flags & SLAB_RED_ZONE) {
				519	unsigned int red =
				520	active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE;
				521
				522	if (!check_bytes(endobject, red, s->inuse - s->objsize)) {
				523	object_err(s, page, object,
				524	active ? "Redzone Active" : "Redzone Inactive");
				525	restore_bytes(s, "redzone", red,
				526	endobject, object + s->inuse);
				527	return 0;
				528	}
				529	} else {
				530	if ((s->flags & SLAB_POISON) && s->objsize < s->inuse &&
				531	!check_bytes(endobject, POISON_INUSE,
				532	s->inuse - s->objsize)) {
				533	object_err(s, page, p, "Alignment padding check fails");
				534	/*
				535	* Fix it so that there will not be another report.
				536	*
				537	* Hmmm... We may be corrupting an object that now expects
				538	* to be longer than allowed.
				539	*/
				540	restore_bytes(s, "alignment padding", POISON_INUSE,
				541	endobject, object + s->inuse);
				542	}
				543	}
				544
				545	if (s->flags & SLAB_POISON) {
				546	if (!active && (s->flags & __OBJECT_POISON) &&
				547	(!check_bytes(p, POISON_FREE, s->objsize - 1) \|\|
				548	p[s->objsize - 1] != POISON_END)) {
				549
				550	object_err(s, page, p, "Poison check failed");
				551	restore_bytes(s, "Poison", POISON_FREE,
				552	p, p + s->objsize -1);
				553	restore_bytes(s, "Poison", POISON_END,
				554	p + s->objsize - 1, p + s->objsize);
				555	return 0;
				556	}
				557	/*
				558	* check_pad_bytes cleans up on its own.
				559	*/
				560	check_pad_bytes(s, page, p);
				561	}
				562
				563	if (!s->offset && active)
				564	/*
				565	* Object and freepointer overlap. Cannot check
				566	* freepointer while object is allocated.
				567	*/
				568	return 1;
				569
				570	/* Check free pointer validity */
				571	if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
				572	object_err(s, page, p, "Freepointer corrupt");
				573	/*
				574	* No choice but to zap it and thus loose the remainder
				575	* of the free objects in this slab. May cause
				576	* another error because the object count maybe
				577	* wrong now.
				578	*/
				579	set_freepointer(s, p, NULL);
				580	return 0;
				581	}
				582	return 1;
				583	}
				584
				585	static int check_slab(struct kmem_cache s, struct page page)
				586	{
				587	VM_BUG_ON(!irqs_disabled());
				588
				589	if (!PageSlab(page)) {
				590	printk(KERN_ERR "SLUB: %s Not a valid slab page @0x%p "
				591	"flags=%lx mapping=0x%p count=%d \n",
				592	s->name, page, page->flags, page->mapping,
				593	page_count(page));
				594	return 0;
				595	}
				596	if (page->offset * sizeof(void *) != s->offset) {
				597	printk(KERN_ERR "SLUB: %s Corrupted offset %lu in slab @0x%p"
				598	" flags=0x%lx mapping=0x%p count=%d\n",
				599	s->name,
				600	(unsigned long)(page->offset * sizeof(void *)),
				601	page,
				602	page->flags,
				603	page->mapping,
				604	page_count(page));
				605	dump_stack();
				606	return 0;
				607	}
				608	if (page->inuse > s->objects) {
				609	printk(KERN_ERR "SLUB: %s Inuse %u > max %u in slab "
				610	"page @0x%p flags=%lx mapping=0x%p count=%d\n",
				611	s->name, page->inuse, s->objects, page, page->flags,
				612	page->mapping, page_count(page));
				613	dump_stack();
				614	return 0;
				615	}
				616	/* Slab_pad_check fixes things up after itself */
				617	slab_pad_check(s, page);
				618	return 1;
				619	}
				620
				621	/*
				622	* Determine if a certain object on a page is on the freelist and
				623	* therefore free. Must hold the slab lock for cpu slabs to
				624	* guarantee that the chains are consistent.
				625	*/
				626	static int on_freelist(struct kmem_cache s, struct page page, void *search)
				627	{
				628	int nr = 0;
				629	void *fp = page->freelist;
				630	void *object = NULL;
				631
				632	while (fp && nr <= s->objects) {
				633	if (fp == search)
				634	return 1;
				635	if (!check_valid_pointer(s, page, fp)) {
				636	if (object) {
				637	object_err(s, page, object,
				638	"Freechain corrupt");
				639	set_freepointer(s, object, NULL);
				640	break;
				641	} else {
				642	printk(KERN_ERR "SLUB: %s slab 0x%p "
				643	"freepointer 0x%p corrupted.\n",
				644	s->name, page, fp);
				645	dump_stack();
				646	page->freelist = NULL;
				647	page->inuse = s->objects;
				648	return 0;
				649	}
				650	break;
				651	}
				652	object = fp;
				653	fp = get_freepointer(s, object);
				654	nr++;
				655	}
				656
				657	if (page->inuse != s->objects - nr) {
				658	printk(KERN_ERR "slab %s: page 0x%p wrong object count."
				659	" counter is %d but counted were %d\n",
				660	s->name, page, page->inuse,
				661	s->objects - nr);
				662	page->inuse = s->objects - nr;
				663	}
				664	return search == NULL;
				665	}
				666
Christoph Lameter	643b113	2007-05-06 14:49:42 -0700	[diff] [blame]	667	/*
				668	* Tracking of fully allocated slabs for debugging
				669	*/
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	670	static void add_full(struct kmem_cache_node n, struct page page)
Christoph Lameter	643b113	2007-05-06 14:49:42 -0700	[diff] [blame]	671	{
Christoph Lameter	643b113	2007-05-06 14:49:42 -0700	[diff] [blame]	672	spin_lock(&n->list_lock);
				673	list_add(&page->lru, &n->full);
				674	spin_unlock(&n->list_lock);
				675	}
				676
				677	static void remove_full(struct kmem_cache s, struct page page)
				678	{
				679	struct kmem_cache_node *n;
				680
				681	if (!(s->flags & SLAB_STORE_USER))
				682	return;
				683
				684	n = get_node(s, page_to_nid(page));
				685
				686	spin_lock(&n->list_lock);
				687	list_del(&page->lru);
				688	spin_unlock(&n->list_lock);
				689	}
				690
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	691	static int alloc_object_checks(struct kmem_cache s, struct page page,
				692	void *object)
				693	{
				694	if (!check_slab(s, page))
				695	goto bad;
				696
				697	if (object && !on_freelist(s, page, object)) {
				698	printk(KERN_ERR "SLUB: %s Object 0x%p@0x%p "
				699	"already allocated.\n",
				700	s->name, object, page);
				701	goto dump;
				702	}
				703
				704	if (!check_valid_pointer(s, page, object)) {
				705	object_err(s, page, object, "Freelist Pointer check fails");
				706	goto dump;
				707	}
				708
				709	if (!object)
				710	return 1;
				711
				712	if (!check_object(s, page, object, 0))
				713	goto bad;
				714	init_object(s, object, 1);
				715
				716	if (s->flags & SLAB_TRACE) {
				717	printk(KERN_INFO "TRACE %s alloc 0x%p inuse=%d fp=0x%p\n",
				718	s->name, object, page->inuse,
				719	page->freelist);
				720	dump_stack();
				721	}
				722	return 1;
				723	dump:
				724	dump_stack();
				725	bad:
				726	if (PageSlab(page)) {
				727	/*
				728	* If this is a slab page then lets do the best we can
				729	* to avoid issues in the future. Marking all objects
				730	* as used avoids touching the remainder.
				731	*/
				732	printk(KERN_ERR "@@@ SLUB: %s slab 0x%p. Marking all objects used.\n",
				733	s->name, page);
				734	page->inuse = s->objects;
				735	page->freelist = NULL;
				736	/* Fix up fields that may be corrupted */
				737	page->offset = s->offset / sizeof(void *);
				738	}
				739	return 0;
				740	}
				741
				742	static int free_object_checks(struct kmem_cache s, struct page page,
				743	void *object)
				744	{
				745	if (!check_slab(s, page))
				746	goto fail;
				747
				748	if (!check_valid_pointer(s, page, object)) {
				749	printk(KERN_ERR "SLUB: %s slab 0x%p invalid "
				750	"object pointer 0x%p\n",
				751	s->name, page, object);
				752	goto fail;
				753	}
				754
				755	if (on_freelist(s, page, object)) {
				756	printk(KERN_ERR "SLUB: %s slab 0x%p object "
				757	"0x%p already free.\n", s->name, page, object);
				758	goto fail;
				759	}
				760
				761	if (!check_object(s, page, object, 1))
				762	return 0;
				763
				764	if (unlikely(s != page->slab)) {
				765	if (!PageSlab(page))
				766	printk(KERN_ERR "slab_free %s size %d: attempt to"
				767	"free object(0x%p) outside of slab.\n",
				768	s->name, s->size, object);
				769	else
				770	if (!page->slab)
				771	printk(KERN_ERR
				772	"slab_free : no slab(NULL) for object 0x%p.\n",
				773	object);
				774	else
				775	printk(KERN_ERR "slab_free %s(%d): object at 0x%p"
				776	" belongs to slab %s(%d)\n",
				777	s->name, s->size, object,
				778	page->slab->name, page->slab->size);
				779	goto fail;
				780	}
				781	if (s->flags & SLAB_TRACE) {
				782	printk(KERN_INFO "TRACE %s free 0x%p inuse=%d fp=0x%p\n",
				783	s->name, object, page->inuse,
				784	page->freelist);
				785	print_section("Object", object, s->objsize);
				786	dump_stack();
				787	}
				788	init_object(s, object, 0);
				789	return 1;
				790	fail:
				791	dump_stack();
				792	printk(KERN_ERR "@@@ SLUB: %s slab 0x%p object at 0x%p not freed.\n",
				793	s->name, page, object);
				794	return 0;
				795	}
				796
				797	/*
				798	* Slab allocation and freeing
				799	*/
				800	static struct page allocate_slab(struct kmem_cache s, gfp_t flags, int node)
				801	{
				802	struct page * page;
				803	int pages = 1 << s->order;
				804
				805	if (s->order)
				806	flags \|= __GFP_COMP;
				807
				808	if (s->flags & SLAB_CACHE_DMA)
				809	flags \|= SLUB_DMA;
				810
				811	if (node == -1)
				812	page = alloc_pages(flags, s->order);
				813	else
				814	page = alloc_pages_node(node, flags, s->order);
				815
				816	if (!page)
				817	return NULL;
				818
				819	mod_zone_page_state(page_zone(page),
				820	(s->flags & SLAB_RECLAIM_ACCOUNT) ?
				821	NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
				822	pages);
				823
				824	return page;
				825	}
				826
				827	static void setup_object(struct kmem_cache s, struct page page,
				828	void *object)
				829	{
				830	if (PageError(page)) {
				831	init_object(s, object, 0);
				832	init_tracking(s, object);
				833	}
				834
				835	if (unlikely(s->ctor)) {
				836	int mode = SLAB_CTOR_CONSTRUCTOR;
				837
				838	if (!(s->flags & __GFP_WAIT))
				839	mode \|= SLAB_CTOR_ATOMIC;
				840
				841	s->ctor(object, s, mode);
				842	}
				843	}
				844
				845	static struct page new_slab(struct kmem_cache s, gfp_t flags, int node)
				846	{
				847	struct page *page;
				848	struct kmem_cache_node *n;
				849	void *start;
				850	void *end;
				851	void *last;
				852	void *p;
				853
				854	if (flags & __GFP_NO_GROW)
				855	return NULL;
				856
				857	BUG_ON(flags & ~(GFP_DMA \| GFP_LEVEL_MASK));
				858
				859	if (flags & __GFP_WAIT)
				860	local_irq_enable();
				861
				862	page = allocate_slab(s, flags & GFP_LEVEL_MASK, node);
				863	if (!page)
				864	goto out;
				865
				866	n = get_node(s, page_to_nid(page));
				867	if (n)
				868	atomic_long_inc(&n->nr_slabs);
				869	page->offset = s->offset / sizeof(void *);
				870	page->slab = s;
				871	page->flags \|= 1 << PG_slab;
				872	if (s->flags & (SLAB_DEBUG_FREE \| SLAB_RED_ZONE \| SLAB_POISON \|
				873	SLAB_STORE_USER \| SLAB_TRACE))
				874	page->flags \|= 1 << PG_error;
				875
				876	start = page_address(page);
				877	end = start + s->objects * s->size;
				878
				879	if (unlikely(s->flags & SLAB_POISON))
				880	memset(start, POISON_INUSE, PAGE_SIZE << s->order);
				881
				882	last = start;
				883	for (p = start + s->size; p < end; p += s->size) {
				884	setup_object(s, page, last);
				885	set_freepointer(s, last, p);
				886	last = p;
				887	}
				888	setup_object(s, page, last);
				889	set_freepointer(s, last, NULL);
				890
				891	page->freelist = start;
				892	page->inuse = 0;
				893	out:
				894	if (flags & __GFP_WAIT)
				895	local_irq_disable();
				896	return page;
				897	}
				898
				899	static void __free_slab(struct kmem_cache s, struct page page)
				900	{
				901	int pages = 1 << s->order;
				902
				903	if (unlikely(PageError(page) \|\| s->dtor)) {
				904	void *start = page_address(page);
				905	void *end = start + (pages << PAGE_SHIFT);
				906	void *p;
				907
				908	slab_pad_check(s, page);
				909	for (p = start; p <= end - s->size; p += s->size) {
				910	if (s->dtor)
				911	s->dtor(p, s, 0);
				912	check_object(s, page, p, 0);
				913	}
				914	}
				915
				916	mod_zone_page_state(page_zone(page),
				917	(s->flags & SLAB_RECLAIM_ACCOUNT) ?
				918	NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
				919	- pages);
				920
				921	page->mapping = NULL;
				922	__free_pages(page, s->order);
				923	}
				924
				925	static void rcu_free_slab(struct rcu_head *h)
				926	{
				927	struct page *page;
				928
				929	page = container_of((struct list_head *)h, struct page, lru);
				930	__free_slab(page->slab, page);
				931	}
				932
				933	static void free_slab(struct kmem_cache s, struct page page)
				934	{
				935	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
				936	/*
				937	* RCU free overloads the RCU head over the LRU
				938	*/
				939	struct rcu_head head = (void )&page->lru;
				940
				941	call_rcu(head, rcu_free_slab);
				942	} else
				943	__free_slab(s, page);
				944	}
				945
				946	static void discard_slab(struct kmem_cache s, struct page page)
				947	{
				948	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
				949
				950	atomic_long_dec(&n->nr_slabs);
				951	reset_page_mapcount(page);
				952	page->flags &= ~(1 << PG_slab \| 1 << PG_error);
				953	free_slab(s, page);
				954	}
				955
				956	/*
				957	* Per slab locking using the pagelock
				958	*/
				959	static __always_inline void slab_lock(struct page *page)
				960	{
				961	bit_spin_lock(PG_locked, &page->flags);
				962	}
				963
				964	static __always_inline void slab_unlock(struct page *page)
				965	{
				966	bit_spin_unlock(PG_locked, &page->flags);
				967	}
				968
				969	static __always_inline int slab_trylock(struct page *page)
				970	{
				971	int rc = 1;
				972
				973	rc = bit_spin_trylock(PG_locked, &page->flags);
				974	return rc;
				975	}
				976
				977	/*
				978	* Management of partially allocated slabs
				979	*/
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	980	static void add_partial_tail(struct kmem_cache_node n, struct page page)
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	981	{
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	982	spin_lock(&n->list_lock);
				983	n->nr_partial++;
				984	list_add_tail(&page->lru, &n->partial);
				985	spin_unlock(&n->list_lock);
				986	}
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	987
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	988	static void add_partial(struct kmem_cache_node n, struct page page)
				989	{
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	990	spin_lock(&n->list_lock);
				991	n->nr_partial++;
				992	list_add(&page->lru, &n->partial);
				993	spin_unlock(&n->list_lock);
				994	}
				995
				996	static void remove_partial(struct kmem_cache *s,
				997	struct page *page)
				998	{
				999	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
				1000
				1001	spin_lock(&n->list_lock);
				1002	list_del(&page->lru);
				1003	n->nr_partial--;
				1004	spin_unlock(&n->list_lock);
				1005	}
				1006
				1007	/*
				1008	* Lock page and remove it from the partial list
				1009	*
				1010	* Must hold list_lock
				1011	*/
				1012	static int lock_and_del_slab(struct kmem_cache_node n, struct page page)
				1013	{
				1014	if (slab_trylock(page)) {
				1015	list_del(&page->lru);
				1016	n->nr_partial--;
				1017	return 1;
				1018	}
				1019	return 0;
				1020	}
				1021
				1022	/*
				1023	* Try to get a partial slab from a specific node
				1024	*/
				1025	static struct page get_partial_node(struct kmem_cache_node n)
				1026	{
				1027	struct page *page;
				1028
				1029	/*
				1030	* Racy check. If we mistakenly see no partial slabs then we
				1031	* just allocate an empty slab. If we mistakenly try to get a
				1032	* partial slab then get_partials() will return NULL.
				1033	*/
				1034	if (!n \|\| !n->nr_partial)
				1035	return NULL;
				1036
				1037	spin_lock(&n->list_lock);
				1038	list_for_each_entry(page, &n->partial, lru)
				1039	if (lock_and_del_slab(n, page))
				1040	goto out;
				1041	page = NULL;
				1042	out:
				1043	spin_unlock(&n->list_lock);
				1044	return page;
				1045	}
				1046
				1047	/*
				1048	* Get a page from somewhere. Search in increasing NUMA
				1049	* distances.
				1050	*/
				1051	static struct page get_any_partial(struct kmem_cache s, gfp_t flags)
				1052	{
				1053	#ifdef CONFIG_NUMA
				1054	struct zonelist *zonelist;
				1055	struct zone **z;
				1056	struct page *page;
				1057
				1058	/*
				1059	* The defrag ratio allows to configure the tradeoffs between
				1060	* inter node defragmentation and node local allocations.
				1061	* A lower defrag_ratio increases the tendency to do local
				1062	* allocations instead of scanning throught the partial
				1063	* lists on other nodes.
				1064	*
				1065	* If defrag_ratio is set to 0 then kmalloc() always
				1066	* returns node local objects. If its higher then kmalloc()
				1067	* may return off node objects in order to avoid fragmentation.
				1068	*
				1069	* A higher ratio means slabs may be taken from other nodes
				1070	* thus reducing the number of partial slabs on those nodes.
				1071	*
				1072	* If /sys/slab/xx/defrag_ratio is set to 100 (which makes
				1073	* defrag_ratio = 1000) then every (well almost) allocation
				1074	* will first attempt to defrag slab caches on other nodes. This
				1075	* means scanning over all nodes to look for partial slabs which
				1076	* may be a bit expensive to do on every slab allocation.
				1077	*/
				1078	if (!s->defrag_ratio \|\| get_cycles() % 1024 > s->defrag_ratio)
				1079	return NULL;
				1080
				1081	zonelist = &NODE_DATA(slab_node(current->mempolicy))
				1082	->node_zonelists[gfp_zone(flags)];
				1083	for (z = zonelist->zones; *z; z++) {
				1084	struct kmem_cache_node *n;
				1085
				1086	n = get_node(s, zone_to_nid(*z));
				1087
				1088	if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	1089	n->nr_partial > MIN_PARTIAL) {
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1090	page = get_partial_node(n);
				1091	if (page)
				1092	return page;
				1093	}
				1094	}
				1095	#endif
				1096	return NULL;
				1097	}
				1098
				1099	/*
				1100	* Get a partial page, lock it and return it.
				1101	*/
				1102	static struct page get_partial(struct kmem_cache s, gfp_t flags, int node)
				1103	{
				1104	struct page *page;
				1105	int searchnode = (node == -1) ? numa_node_id() : node;
				1106
				1107	page = get_partial_node(get_node(s, searchnode));
				1108	if (page \|\| (flags & __GFP_THISNODE))
				1109	return page;
				1110
				1111	return get_any_partial(s, flags);
				1112	}
				1113
				1114	/*
				1115	* Move a page back to the lists.
				1116	*
				1117	* Must be called with the slab lock held.
				1118	*
				1119	* On exit the slab lock will have been dropped.
				1120	*/
				1121	static void putback_slab(struct kmem_cache s, struct page page)
				1122	{
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	1123	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
				1124
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1125	if (page->inuse) {
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	1126
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1127	if (page->freelist)
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	1128	add_partial(n, page);
				1129	else if (PageError(page) && (s->flags & SLAB_STORE_USER))
				1130	add_full(n, page);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1131	slab_unlock(page);
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	1132
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1133	} else {
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	1134	if (n->nr_partial < MIN_PARTIAL) {
				1135	/*
				1136	* Adding an empty page to the partial slabs in order
				1137	* to avoid page allocator overhead. This page needs to
				1138	* come after all the others that are not fully empty
				1139	* in order to make sure that we do maximum
				1140	* defragmentation.
				1141	*/
				1142	add_partial_tail(n, page);
				1143	slab_unlock(page);
				1144	} else {
				1145	slab_unlock(page);
				1146	discard_slab(s, page);
				1147	}
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1148	}
				1149	}
				1150
				1151	/*
				1152	* Remove the cpu slab
				1153	*/
				1154	static void deactivate_slab(struct kmem_cache s, struct page page, int cpu)
				1155	{
				1156	s->cpu_slab[cpu] = NULL;
				1157	ClearPageActive(page);
				1158
				1159	putback_slab(s, page);
				1160	}
				1161
				1162	static void flush_slab(struct kmem_cache s, struct page page, int cpu)
				1163	{
				1164	slab_lock(page);
				1165	deactivate_slab(s, page, cpu);
				1166	}
				1167
				1168	/*
				1169	* Flush cpu slab.
				1170	* Called from IPI handler with interrupts disabled.
				1171	*/
				1172	static void __flush_cpu_slab(struct kmem_cache *s, int cpu)
				1173	{
				1174	struct page *page = s->cpu_slab[cpu];
				1175
				1176	if (likely(page))
				1177	flush_slab(s, page, cpu);
				1178	}
				1179
				1180	static void flush_cpu_slab(void *d)
				1181	{
				1182	struct kmem_cache *s = d;
				1183	int cpu = smp_processor_id();
				1184
				1185	__flush_cpu_slab(s, cpu);
				1186	}
				1187
				1188	static void flush_all(struct kmem_cache *s)
				1189	{
				1190	#ifdef CONFIG_SMP
				1191	on_each_cpu(flush_cpu_slab, s, 1, 1);
				1192	#else
				1193	unsigned long flags;
				1194
				1195	local_irq_save(flags);
				1196	flush_cpu_slab(s);
				1197	local_irq_restore(flags);
				1198	#endif
				1199	}
				1200
				1201	/*
				1202	* slab_alloc is optimized to only modify two cachelines on the fast path
				1203	* (aside from the stack):
				1204	*
				1205	* 1. The page struct
				1206	* 2. The first cacheline of the object to be allocated.
				1207	*
				1208	* The only cache lines that are read (apart from code) is the
				1209	* per cpu array in the kmem_cache struct.
				1210	*
				1211	* Fastpath is not possible if we need to get a new slab or have
				1212	* debugging enabled (which means all slabs are marked with PageError)
				1213	*/
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	1214	static void slab_alloc(struct kmem_cache s,
				1215	gfp_t gfpflags, int node, void *addr)
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1216	{
				1217	struct page *page;
				1218	void **object;
				1219	unsigned long flags;
				1220	int cpu;
				1221
				1222	local_irq_save(flags);
				1223	cpu = smp_processor_id();
				1224	page = s->cpu_slab[cpu];
				1225	if (!page)
				1226	goto new_slab;
				1227
				1228	slab_lock(page);
				1229	if (unlikely(node != -1 && page_to_nid(page) != node))
				1230	goto another_slab;
				1231	redo:
				1232	object = page->freelist;
				1233	if (unlikely(!object))
				1234	goto another_slab;
				1235	if (unlikely(PageError(page)))
				1236	goto debug;
				1237
				1238	have_object:
				1239	page->inuse++;
				1240	page->freelist = object[page->offset];
				1241	slab_unlock(page);
				1242	local_irq_restore(flags);
				1243	return object;
				1244
				1245	another_slab:
				1246	deactivate_slab(s, page, cpu);
				1247
				1248	new_slab:
				1249	page = get_partial(s, gfpflags, node);
				1250	if (likely(page)) {
				1251	have_slab:
				1252	s->cpu_slab[cpu] = page;
				1253	SetPageActive(page);
				1254	goto redo;
				1255	}
				1256
				1257	page = new_slab(s, gfpflags, node);
				1258	if (page) {
				1259	cpu = smp_processor_id();
				1260	if (s->cpu_slab[cpu]) {
				1261	/*
				1262	* Someone else populated the cpu_slab while we enabled
				1263	* interrupts, or we have got scheduled on another cpu.
				1264	* The page may not be on the requested node.
				1265	*/
				1266	if (node == -1 \|\|
				1267	page_to_nid(s->cpu_slab[cpu]) == node) {
				1268	/*
				1269	* Current cpuslab is acceptable and we
				1270	* want the current one since its cache hot
				1271	*/
				1272	discard_slab(s, page);
				1273	page = s->cpu_slab[cpu];
				1274	slab_lock(page);
				1275	goto redo;
				1276	}
				1277	/* Dump the current slab */
				1278	flush_slab(s, s->cpu_slab[cpu], cpu);
				1279	}
				1280	slab_lock(page);
				1281	goto have_slab;
				1282	}
				1283	local_irq_restore(flags);
				1284	return NULL;
				1285	debug:
				1286	if (!alloc_object_checks(s, page, object))
				1287	goto another_slab;
				1288	if (s->flags & SLAB_STORE_USER)
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	1289	set_track(s, object, TRACK_ALLOC, addr);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1290	goto have_object;
				1291	}
				1292
				1293	void kmem_cache_alloc(struct kmem_cache s, gfp_t gfpflags)
				1294	{
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	1295	return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1296	}
				1297	EXPORT_SYMBOL(kmem_cache_alloc);
				1298
				1299	#ifdef CONFIG_NUMA
				1300	void kmem_cache_alloc_node(struct kmem_cache s, gfp_t gfpflags, int node)
				1301	{
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	1302	return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1303	}
				1304	EXPORT_SYMBOL(kmem_cache_alloc_node);
				1305	#endif
				1306
				1307	/*
				1308	* The fastpath only writes the cacheline of the page struct and the first
				1309	* cacheline of the object.
				1310	*
				1311	* No special cachelines need to be read
				1312	*/
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	1313	static void slab_free(struct kmem_cache s, struct page page,
				1314	void x, void addr)
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1315	{
				1316	void *prior;
				1317	void *object = (void )x;
				1318	unsigned long flags;
				1319
				1320	local_irq_save(flags);
				1321	slab_lock(page);
				1322
				1323	if (unlikely(PageError(page)))
				1324	goto debug;
				1325	checks_ok:
				1326	prior = object[page->offset] = page->freelist;
				1327	page->freelist = object;
				1328	page->inuse--;
				1329
				1330	if (unlikely(PageActive(page)))
				1331	/*
				1332	* Cpu slabs are never on partial lists and are
				1333	* never freed.
				1334	*/
				1335	goto out_unlock;
				1336
				1337	if (unlikely(!page->inuse))
				1338	goto slab_empty;
				1339
				1340	/*
				1341	* Objects left in the slab. If it
				1342	* was not on the partial list before
				1343	* then add it.
				1344	*/
				1345	if (unlikely(!prior))
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	1346	add_partial(get_node(s, page_to_nid(page)), page);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1347
				1348	out_unlock:
				1349	slab_unlock(page);
				1350	local_irq_restore(flags);
				1351	return;
				1352
				1353	slab_empty:
				1354	if (prior)
				1355	/*
Christoph Lameter	643b113	2007-05-06 14:49:42 -0700	[diff] [blame]	1356	* Slab on the partial list.
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1357	*/
				1358	remove_partial(s, page);
				1359
				1360	slab_unlock(page);
				1361	discard_slab(s, page);
				1362	local_irq_restore(flags);
				1363	return;
				1364
				1365	debug:
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	1366	if (!free_object_checks(s, page, x))
				1367	goto out_unlock;
Christoph Lameter	643b113	2007-05-06 14:49:42 -0700	[diff] [blame]	1368	if (!PageActive(page) && !page->freelist)
				1369	remove_full(s, page);
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	1370	if (s->flags & SLAB_STORE_USER)
				1371	set_track(s, x, TRACK_FREE, addr);
				1372	goto checks_ok;
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1373	}
				1374
				1375	void kmem_cache_free(struct kmem_cache s, void x)
				1376	{
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	1377	struct page *page;
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1378
Christoph Lameter	b49af68	2007-05-06 14:49:41 -0700	[diff] [blame]	1379	page = virt_to_head_page(x);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1380
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	1381	slab_free(s, page, x, __builtin_return_address(0));
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1382	}
				1383	EXPORT_SYMBOL(kmem_cache_free);
				1384
				1385	/* Figure out on which slab object the object resides */
				1386	static struct page get_object_page(const void x)
				1387	{
Christoph Lameter	b49af68	2007-05-06 14:49:41 -0700	[diff] [blame]	1388	struct page *page = virt_to_head_page(x);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1389
				1390	if (!PageSlab(page))
				1391	return NULL;
				1392
				1393	return page;
				1394	}
				1395
				1396	/*
				1397	* kmem_cache_open produces objects aligned at "size" and the first object
				1398	* is placed at offset 0 in the slab (We have no metainformation on the
				1399	* slab, all slabs are in essence "off slab").
				1400	*
				1401	* In order to get the desired alignment one just needs to align the
				1402	* size.
				1403	*
				1404	* Notice that the allocation order determines the sizes of the per cpu
				1405	* caches. Each processor has always one slab available for allocations.
				1406	* Increasing the allocation order reduces the number of times that slabs
				1407	* must be moved on and off the partial lists and therefore may influence
				1408	* locking overhead.
				1409	*
				1410	* The offset is used to relocate the free list link in each object. It is
				1411	* therefore possible to move the free list link behind the object. This
				1412	* is necessary for RCU to work properly and also useful for debugging.
				1413	*/
				1414
				1415	/*
				1416	* Mininum / Maximum order of slab pages. This influences locking overhead
				1417	* and slab fragmentation. A higher order reduces the number of partial slabs
				1418	* and increases the number of allocations possible without having to
				1419	* take the list_lock.
				1420	*/
				1421	static int slub_min_order;
				1422	static int slub_max_order = DEFAULT_MAX_ORDER;
				1423
				1424	/*
				1425	* Minimum number of objects per slab. This is necessary in order to
				1426	* reduce locking overhead. Similar to the queue size in SLAB.
				1427	*/
				1428	static int slub_min_objects = DEFAULT_MIN_OBJECTS;
				1429
				1430	/*
				1431	* Merge control. If this is set then no merging of slab caches will occur.
				1432	*/
				1433	static int slub_nomerge;
				1434
				1435	/*
				1436	* Debug settings:
				1437	*/
				1438	static int slub_debug;
				1439
				1440	static char *slub_debug_slabs;
				1441
				1442	/*
				1443	* Calculate the order of allocation given an slab object size.
				1444	*
				1445	* The order of allocation has significant impact on other elements
				1446	* of the system. Generally order 0 allocations should be preferred
				1447	* since they do not cause fragmentation in the page allocator. Larger
				1448	* objects may have problems with order 0 because there may be too much
				1449	* space left unused in a slab. We go to a higher order if more than 1/8th
				1450	* of the slab would be wasted.
				1451	*
				1452	* In order to reach satisfactory performance we must ensure that
				1453	* a minimum number of objects is in one slab. Otherwise we may
				1454	* generate too much activity on the partial lists. This is less a
				1455	* concern for large slabs though. slub_max_order specifies the order
				1456	* where we begin to stop considering the number of objects in a slab.
				1457	*
				1458	* Higher order allocations also allow the placement of more objects
				1459	* in a slab and thereby reduce object handling overhead. If the user
				1460	* has requested a higher mininum order then we start with that one
				1461	* instead of zero.
				1462	*/
				1463	static int calculate_order(int size)
				1464	{
				1465	int order;
				1466	int rem;
				1467
				1468	for (order = max(slub_min_order, fls(size - 1) - PAGE_SHIFT);
				1469	order < MAX_ORDER; order++) {
				1470	unsigned long slab_size = PAGE_SIZE << order;
				1471
				1472	if (slub_max_order > order &&
				1473	slab_size < slub_min_objects * size)
				1474	continue;
				1475
				1476	if (slab_size < size)
				1477	continue;
				1478
				1479	rem = slab_size % size;
				1480
				1481	if (rem <= (PAGE_SIZE << order) / 8)
				1482	break;
				1483
				1484	}
				1485	if (order >= MAX_ORDER)
				1486	return -E2BIG;
				1487	return order;
				1488	}
				1489
				1490	/*
				1491	* Function to figure out which alignment to use from the
				1492	* various ways of specifying it.
				1493	*/
				1494	static unsigned long calculate_alignment(unsigned long flags,
				1495	unsigned long align, unsigned long size)
				1496	{
				1497	/*
				1498	* If the user wants hardware cache aligned objects then
				1499	* follow that suggestion if the object is sufficiently
				1500	* large.
				1501	*
				1502	* The hardware cache alignment cannot override the
				1503	* specified alignment though. If that is greater
				1504	* then use it.
				1505	*/
				1506	if ((flags & (SLAB_MUST_HWCACHE_ALIGN \| SLAB_HWCACHE_ALIGN)) &&
				1507	size > L1_CACHE_BYTES / 2)
				1508	return max_t(unsigned long, align, L1_CACHE_BYTES);
				1509
				1510	if (align < ARCH_SLAB_MINALIGN)
				1511	return ARCH_SLAB_MINALIGN;
				1512
				1513	return ALIGN(align, sizeof(void *));
				1514	}
				1515
				1516	static void init_kmem_cache_node(struct kmem_cache_node *n)
				1517	{
				1518	n->nr_partial = 0;
				1519	atomic_long_set(&n->nr_slabs, 0);
				1520	spin_lock_init(&n->list_lock);
				1521	INIT_LIST_HEAD(&n->partial);
Christoph Lameter	643b113	2007-05-06 14:49:42 -0700	[diff] [blame]	1522	INIT_LIST_HEAD(&n->full);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1523	}
				1524
				1525	#ifdef CONFIG_NUMA
				1526	/*
				1527	* No kmalloc_node yet so do it by hand. We know that this is the first
				1528	* slab on the node for this slabcache. There are no concurrent accesses
				1529	* possible.
				1530	*
				1531	* Note that this function only works on the kmalloc_node_cache
				1532	* when allocating for the kmalloc_node_cache.
				1533	*/
				1534	static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflags,
				1535	int node)
				1536	{
				1537	struct page *page;
				1538	struct kmem_cache_node *n;
				1539
				1540	BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
				1541
				1542	page = new_slab(kmalloc_caches, gfpflags \| GFP_THISNODE, node);
				1543	/* new_slab() disables interupts */
				1544	local_irq_enable();
				1545
				1546	BUG_ON(!page);
				1547	n = page->freelist;
				1548	BUG_ON(!n);
				1549	page->freelist = get_freepointer(kmalloc_caches, n);
				1550	page->inuse++;
				1551	kmalloc_caches->node[node] = n;
				1552	init_object(kmalloc_caches, n, 1);
				1553	init_kmem_cache_node(n);
				1554	atomic_long_inc(&n->nr_slabs);
Christoph Lameter	e95eed5	2007-05-06 14:49:44 -0700	[diff] [blame^]	1555	add_partial(n, page);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	1556	return n;
				1557	}
				1558
				1559	static void free_kmem_cache_nodes(struct kmem_cache *s)
				1560	{
				1561	int node;
				1562
				1563	for_each_online_node(node) {
				1564	struct kmem_cache_node *n = s->node[node];
				1565	if (n && n != &s->local_node)
				1566	kmem_cache_free(kmalloc_caches, n);
				1567	s->node[node] = NULL;
				1568	}
				1569	}
				1570
				1571	static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
				1572	{
				1573	int node;
				1574	int local_node;
				1575
				1576	if (slab_state >= UP)
				1577	local_node = page_to_nid(virt_to_page(s));
				1578	else
				1579	local_node = 0;
				1580
				1581	for_each_online_node(node) {
				1582	struct kmem_cache_node *n;
				1583
				1584	if (local_node == node)
				1585	n = &s->local_node;
				1586	else {
				1587	if (slab_state == DOWN) {
				1588	n = early_kmem_cache_node_alloc(gfpflags,
				1589	node);
				1590	continue;
				1591	}
				1592	n = kmem_cache_alloc_node(kmalloc_caches,
				1593	gfpflags, node);
				1594
				1595	if (!n) {
				1596	free_kmem_cache_nodes(s);
				1597	return 0;
				1598	}
				1599
				1600	}
				1601	s->node[node] = n;
				1602	init_kmem_cache_node(n);
				1603	}
				1604	return 1;
				1605	}
				1606	#else
				1607	static void free_kmem_cache_nodes(struct kmem_cache *s)
				1608	{
				1609	}
				1610
				1611	static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
				1612	{
				1613	init_kmem_cache_node(&s->local_node);
				1614	return 1;
				1615	}
				1616	#endif
				1617
				1618	/*
				1619	* calculate_sizes() determines the order and the distribution of data within
				1620	* a slab object.
				1621	*/
				1622	static int calculate_sizes(struct kmem_cache *s)
				1623	{
				1624	unsigned long flags = s->flags;
				1625	unsigned long size = s->objsize;
				1626	unsigned long align = s->align;
				1627
				1628	/*
				1629	* Determine if we can poison the object itself. If the user of
				1630	* the slab may touch the object after free or before allocation
				1631	* then we should never poison the object itself.
				1632	*/
				1633	if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
				1634	!s->ctor && !s->dtor)
				1635	s->flags \|= __OBJECT_POISON;
				1636	else
				1637	s->flags &= ~__OBJECT_POISON;
				1638
				1639	/*
				1640	* Round up object size to the next word boundary. We can only
				1641	* place the free pointer at word boundaries and this determines
				1642	* the possible location of the free pointer.
				1643	*/
				1644	size = ALIGN(size, sizeof(void *));
				1645
				1646	/*
				1647	* If we are redzoning then check if there is some space between the
				1648	* end of the object and the free pointer. If not then add an
				1649	* additional word, so that we can establish a redzone between
				1650	* the object and the freepointer to be able to check for overwrites.
				1651	*/
				1652	if ((flags & SLAB_RED_ZONE) && size == s->objsize)
				1653	size += sizeof(void *);
				1654
				1655	/*
				1656	* With that we have determined how much of the slab is in actual
				1657	* use by the object. This is the potential offset to the free
				1658	* pointer.
				1659	*/
				1660	s->inuse = size;
				1661
				1662	if (((flags & (SLAB_DESTROY_BY_RCU \| SLAB_POISON)) \|\|
				1663	s->ctor \|\| s->dtor)) {
				1664	/*
				1665	* Relocate free pointer after the object if it is not
				1666	* permitted to overwrite the first word of the object on
				1667	* kmem_cache_free.
				1668	*
				1669	* This is the case if we do RCU, have a constructor or
				1670	* destructor or are poisoning the objects.
				1671	*/
				1672	s->offset = size;
				1673	size += sizeof(void *);
				1674	}
				1675
				1676	if (flags & SLAB_STORE_USER)
				1677	/*
				1678	* Need to store information about allocs and frees after
				1679	* the object.
				1680	*/
				1681	size += 2 * sizeof(struct track);
				1682
				1683	if (flags & DEBUG_DEFAULT_FLAGS)
				1684	/*
				1685	* Add some empty padding so that we can catch
				1686	* overwrites from earlier objects rather than let
				1687	* tracking information or the free pointer be
				1688	* corrupted if an user writes before the start
				1689	* of the object.
				1690	*/
				1691	size += sizeof(void *);
				1692	/*
				1693	* Determine the alignment based on various parameters that the
				1694	* user specified (this is unecessarily complex due to the attempt
				1695	* to be compatible with SLAB. Should be cleaned up some day).
				1696	*/
				1697	align = calculate_alignment(flags, align, s->objsize);
				1698
				1699	/*
				1700	* SLUB stores one object immediately after another beginning from
				1701	* offset 0. In order to align the objects we have to simply size
				1702	* each object to conform to the alignment.
				1703	*/
				1704	size = ALIGN(size, align);
				1705	s->size = size;
				1706
				1707	s->order = calculate_order(size);
				1708	if (s->order < 0)
				1709	return 0;
				1710
				1711	/*
				1712	* Determine the number of objects per slab
				1713	*/
				1714	s->objects = (PAGE_SIZE << s->order) / size;
				1715
				1716	/*
				1717	* Verify that the number of objects is within permitted limits.
				1718	* The page->inuse field is only 16 bit wide! So we cannot have
				1719	* more than 64k objects per slab.
				1720	*/
				1721	if (!s->objects \|\| s->objects > 65535)
				1722	return 0;
				1723	return 1;
				1724
				1725	}
				1726
				1727	static int __init finish_bootstrap(void)
				1728	{
				1729	struct list_head *h;
				1730	int err;
				1731
				1732	slab_state = SYSFS;
				1733
				1734	list_for_each(h, &slab_caches) {
				1735	struct kmem_cache *s =
				1736	container_of(h, struct kmem_cache, list);
				1737
				1738	err = sysfs_slab_add(s);
				1739	BUG_ON(err);
				1740	}
				1741	return 0;
				1742	}
				1743
				1744	static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
				1745	const char *name, size_t size,
				1746	size_t align, unsigned long flags,
				1747	void (ctor)(void , struct kmem_cache *, unsigned long),
				1748	void (dtor)(void , struct kmem_cache *, unsigned long))
				1749	{
				1750	memset(s, 0, kmem_size);
				1751	s->name = name;
				1752	s->ctor = ctor;
				1753	s->dtor = dtor;
				1754	s->objsize = size;
				1755	s->flags = flags;
				1756	s->align = align;
				1757
				1758	BUG_ON(flags & SLUB_UNIMPLEMENTED);
				1759
				1760	/*
				1761	* The page->offset field is only 16 bit wide. This is an offset
				1762	* in units of words from the beginning of an object. If the slab
				1763	* size is bigger then we cannot move the free pointer behind the
				1764	* object anymore.
				1765	*
				1766	* On 32 bit platforms the limit is 256k. On 64bit platforms
				1767	* the limit is 512k.
				1768	*
				1769	* Debugging or ctor/dtors may create a need to move the free
				1770	* pointer. Fail if this happens.
				1771	*/
				1772	if (s->size >= 65535 * sizeof(void *)) {
				1773	BUG_ON(flags & (SLAB_RED_ZONE \| SLAB_POISON \|
				1774	SLAB_STORE_USER \| SLAB_DESTROY_BY_RCU));
				1775	BUG_ON(ctor \|\| dtor);
				1776	}
				1777	else
				1778	/*
				1779	* Enable debugging if selected on the kernel commandline.
				1780	*/
				1781	if (slub_debug && (!slub_debug_slabs \|\|
				1782	strncmp(slub_debug_slabs, name,
				1783	strlen(slub_debug_slabs)) == 0))
				1784	s->flags \|= slub_debug;
				1785
				1786	if (!calculate_sizes(s))
				1787	goto error;
				1788
				1789	s->refcount = 1;
				1790	#ifdef CONFIG_NUMA
				1791	s->defrag_ratio = 100;
				1792	#endif
				1793
				1794	if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
				1795	return 1;
				1796	error:
				1797	if (flags & SLAB_PANIC)
				1798	panic("Cannot create slab %s size=%lu realsize=%u "
				1799	"order=%u offset=%u flags=%lx\n",
				1800	s->name, (unsigned long)size, s->size, s->order,
				1801	s->offset, flags);
				1802	return 0;
				1803	}
				1804	EXPORT_SYMBOL(kmem_cache_open);
				1805
				1806	/*
				1807	* Check if a given pointer is valid
				1808	*/
				1809	int kmem_ptr_validate(struct kmem_cache s, const void object)
				1810	{
				1811	struct page * page;
				1812	void *addr;
				1813
				1814	page = get_object_page(object);
				1815
				1816	if (!page \|\| s != page->slab)
				1817	/* No slab or wrong slab */
				1818	return 0;
				1819
				1820	addr = page_address(page);
				1821	if (object < addr \|\| object >= addr + s->objects * s->size)
				1822	/* Out of bounds */
				1823	return 0;
				1824
				1825	if ((object - addr) % s->size)
				1826	/* Improperly aligned */
				1827	return 0;
				1828
				1829	/*
				1830	* We could also check if the object is on the slabs freelist.
				1831	* But this would be too expensive and it seems that the main
				1832	* purpose of kmem_ptr_valid is to check if the object belongs
				1833	* to a certain slab.
				1834	*/
				1835	return 1;
				1836	}
				1837	EXPORT_SYMBOL(kmem_ptr_validate);
				1838
				1839	/*
				1840	* Determine the size of a slab object
				1841	*/
				1842	unsigned int kmem_cache_size(struct kmem_cache *s)
				1843	{
				1844	return s->objsize;
				1845	}
				1846	EXPORT_SYMBOL(kmem_cache_size);
				1847
				1848	const char kmem_cache_name(struct kmem_cache s)
				1849	{
				1850	return s->name;
				1851	}
				1852	EXPORT_SYMBOL(kmem_cache_name);
				1853
				1854	/*
				1855	* Attempt to free all slabs on a node
				1856	*/
				1857	static int free_list(struct kmem_cache s, struct kmem_cache_node n,
				1858	struct list_head *list)
				1859	{
				1860	int slabs_inuse = 0;
				1861	unsigned long flags;
				1862	struct page page, h;
				1863
				1864	spin_lock_irqsave(&n->list_lock, flags);
				1865	list_for_each_entry_safe(page, h, list, lru)
				1866	if (!page->inuse) {
				1867	list_del(&page->lru);
				1868	discard_slab(s, page);
				1869	} else
				1870	slabs_inuse++;
				1871	spin_unlock_irqrestore(&n->list_lock, flags);
				1872	return slabs_inuse;
				1873	}
				1874
				1875	/*
				1876	* Release all resources used by slab cache
				1877	*/
				1878	static int kmem_cache_close(struct kmem_cache *s)
				1879	{
				1880	int node;
				1881
				1882	flush_all(s);
				1883
				1884	/* Attempt to free all objects */
				1885	for_each_online_node(node) {
				1886	struct kmem_cache_node *n = get_node(s, node);
				1887
				1888	free_list(s, n, &n->partial);
				1889	if (atomic_long_read(&n->nr_slabs))
				1890	return 1;
				1891	}
				1892	free_kmem_cache_nodes(s);
				1893	return 0;
				1894	}
				1895
				1896	/*
				1897	* Close a cache and release the kmem_cache structure
				1898	* (must be used for caches created using kmem_cache_create)
				1899	*/
				1900	void kmem_cache_destroy(struct kmem_cache *s)
				1901	{
				1902	down_write(&slub_lock);
				1903	s->refcount--;
				1904	if (!s->refcount) {
				1905	list_del(&s->list);
				1906	if (kmem_cache_close(s))
				1907	WARN_ON(1);
				1908	sysfs_slab_remove(s);
				1909	kfree(s);
				1910	}
				1911	up_write(&slub_lock);
				1912	}
				1913	EXPORT_SYMBOL(kmem_cache_destroy);
				1914
				1915	/********************************************************************
				1916	* Kmalloc subsystem
				1917	*******************************************************************/
				1918
				1919	struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __cacheline_aligned;
				1920	EXPORT_SYMBOL(kmalloc_caches);
				1921
				1922	#ifdef CONFIG_ZONE_DMA
				1923	static struct kmem_cache *kmalloc_caches_dma[KMALLOC_SHIFT_HIGH + 1];
				1924	#endif
				1925
				1926	static int __init setup_slub_min_order(char *str)
				1927	{
				1928	get_option (&str, &slub_min_order);
				1929
				1930	return 1;
				1931	}
				1932
				1933	__setup("slub_min_order=", setup_slub_min_order);
				1934
				1935	static int __init setup_slub_max_order(char *str)
				1936	{
				1937	get_option (&str, &slub_max_order);
				1938
				1939	return 1;
				1940	}
				1941
				1942	__setup("slub_max_order=", setup_slub_max_order);
				1943
				1944	static int __init setup_slub_min_objects(char *str)
				1945	{
				1946	get_option (&str, &slub_min_objects);
				1947
				1948	return 1;
				1949	}
				1950
				1951	__setup("slub_min_objects=", setup_slub_min_objects);
				1952
				1953	static int __init setup_slub_nomerge(char *str)
				1954	{
				1955	slub_nomerge = 1;
				1956	return 1;
				1957	}
				1958
				1959	__setup("slub_nomerge", setup_slub_nomerge);
				1960
				1961	static int __init setup_slub_debug(char *str)
				1962	{
				1963	if (!str \|\| *str != '=')
				1964	slub_debug = DEBUG_DEFAULT_FLAGS;
				1965	else {
				1966	str++;
				1967	if (str == 0 \|\| str == ',')
				1968	slub_debug = DEBUG_DEFAULT_FLAGS;
				1969	else
				1970	for( ;str && str != ','; str++)
				1971	switch (*str) {
				1972	case 'f' : case 'F' :
				1973	slub_debug \|= SLAB_DEBUG_FREE;
				1974	break;
				1975	case 'z' : case 'Z' :
				1976	slub_debug \|= SLAB_RED_ZONE;
				1977	break;
				1978	case 'p' : case 'P' :
				1979	slub_debug \|= SLAB_POISON;
				1980	break;
				1981	case 'u' : case 'U' :
				1982	slub_debug \|= SLAB_STORE_USER;
				1983	break;
				1984	case 't' : case 'T' :
				1985	slub_debug \|= SLAB_TRACE;
				1986	break;
				1987	default:
				1988	printk(KERN_ERR "slub_debug option '%c' "
				1989	"unknown. skipped\n",*str);
				1990	}
				1991	}
				1992
				1993	if (*str == ',')
				1994	slub_debug_slabs = str + 1;
				1995	return 1;
				1996	}
				1997
				1998	__setup("slub_debug", setup_slub_debug);
				1999
				2000	static struct kmem_cache create_kmalloc_cache(struct kmem_cache s,
				2001	const char *name, int size, gfp_t gfp_flags)
				2002	{
				2003	unsigned int flags = 0;
				2004
				2005	if (gfp_flags & SLUB_DMA)
				2006	flags = SLAB_CACHE_DMA;
				2007
				2008	down_write(&slub_lock);
				2009	if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
				2010	flags, NULL, NULL))
				2011	goto panic;
				2012
				2013	list_add(&s->list, &slab_caches);
				2014	up_write(&slub_lock);
				2015	if (sysfs_slab_add(s))
				2016	goto panic;
				2017	return s;
				2018
				2019	panic:
				2020	panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
				2021	}
				2022
				2023	static struct kmem_cache *get_slab(size_t size, gfp_t flags)
				2024	{
				2025	int index = kmalloc_index(size);
				2026
Christoph Lameter	614410d	2007-05-06 14:49:38 -0700	[diff] [blame]	2027	if (!index)
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2028	return NULL;
				2029
				2030	/* Allocation too large? */
				2031	BUG_ON(index < 0);
				2032
				2033	#ifdef CONFIG_ZONE_DMA
				2034	if ((flags & SLUB_DMA)) {
				2035	struct kmem_cache *s;
				2036	struct kmem_cache *x;
				2037	char *text;
				2038	size_t realsize;
				2039
				2040	s = kmalloc_caches_dma[index];
				2041	if (s)
				2042	return s;
				2043
				2044	/* Dynamically create dma cache */
				2045	x = kmalloc(kmem_size, flags & ~SLUB_DMA);
				2046	if (!x)
				2047	panic("Unable to allocate memory for dma cache\n");
				2048
				2049	if (index <= KMALLOC_SHIFT_HIGH)
				2050	realsize = 1 << index;
				2051	else {
				2052	if (index == 1)
				2053	realsize = 96;
				2054	else
				2055	realsize = 192;
				2056	}
				2057
				2058	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
				2059	(unsigned int)realsize);
				2060	s = create_kmalloc_cache(x, text, realsize, flags);
				2061	kmalloc_caches_dma[index] = s;
				2062	return s;
				2063	}
				2064	#endif
				2065	return &kmalloc_caches[index];
				2066	}
				2067
				2068	void *__kmalloc(size_t size, gfp_t flags)
				2069	{
				2070	struct kmem_cache *s = get_slab(size, flags);
				2071
				2072	if (s)
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	2073	return slab_alloc(s, flags, -1, __builtin_return_address(0));
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2074	return NULL;
				2075	}
				2076	EXPORT_SYMBOL(__kmalloc);
				2077
				2078	#ifdef CONFIG_NUMA
				2079	void *__kmalloc_node(size_t size, gfp_t flags, int node)
				2080	{
				2081	struct kmem_cache *s = get_slab(size, flags);
				2082
				2083	if (s)
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	2084	return slab_alloc(s, flags, node, __builtin_return_address(0));
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2085	return NULL;
				2086	}
				2087	EXPORT_SYMBOL(__kmalloc_node);
				2088	#endif
				2089
				2090	size_t ksize(const void *object)
				2091	{
				2092	struct page *page = get_object_page(object);
				2093	struct kmem_cache *s;
				2094
				2095	BUG_ON(!page);
				2096	s = page->slab;
				2097	BUG_ON(!s);
				2098
				2099	/*
				2100	* Debugging requires use of the padding between object
				2101	* and whatever may come after it.
				2102	*/
				2103	if (s->flags & (SLAB_RED_ZONE \| SLAB_POISON))
				2104	return s->objsize;
				2105
				2106	/*
				2107	* If we have the need to store the freelist pointer
				2108	* back there or track user information then we can
				2109	* only use the space before that information.
				2110	*/
				2111	if (s->flags & (SLAB_DESTROY_BY_RCU \| SLAB_STORE_USER))
				2112	return s->inuse;
				2113
				2114	/*
				2115	* Else we can use all the padding etc for the allocation
				2116	*/
				2117	return s->size;
				2118	}
				2119	EXPORT_SYMBOL(ksize);
				2120
				2121	void kfree(const void *x)
				2122	{
				2123	struct kmem_cache *s;
				2124	struct page *page;
				2125
				2126	if (!x)
				2127	return;
				2128
Christoph Lameter	b49af68	2007-05-06 14:49:41 -0700	[diff] [blame]	2129	page = virt_to_head_page(x);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2130	s = page->slab;
				2131
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	2132	slab_free(s, page, (void *)x, __builtin_return_address(0));
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2133	}
				2134	EXPORT_SYMBOL(kfree);
				2135
				2136	/**
				2137	* krealloc - reallocate memory. The contents will remain unchanged.
				2138	*
				2139	* @p: object to reallocate memory for.
				2140	* @new_size: how many bytes of memory are required.
				2141	* @flags: the type of memory to allocate.
				2142	*
				2143	* The contents of the object pointed to are preserved up to the
				2144	* lesser of the new and old sizes. If @p is %NULL, krealloc()
				2145	* behaves exactly like kmalloc(). If @size is 0 and @p is not a
				2146	* %NULL pointer, the object pointed to is freed.
				2147	*/
				2148	void krealloc(const void p, size_t new_size, gfp_t flags)
				2149	{
				2150	struct kmem_cache *new_cache;
				2151	void *ret;
				2152	struct page *page;
				2153
				2154	if (unlikely(!p))
				2155	return kmalloc(new_size, flags);
				2156
				2157	if (unlikely(!new_size)) {
				2158	kfree(p);
				2159	return NULL;
				2160	}
				2161
Christoph Lameter	b49af68	2007-05-06 14:49:41 -0700	[diff] [blame]	2162	page = virt_to_head_page(p);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2163
				2164	new_cache = get_slab(new_size, flags);
				2165
				2166	/*
				2167	* If new size fits in the current cache, bail out.
				2168	*/
				2169	if (likely(page->slab == new_cache))
				2170	return (void *)p;
				2171
				2172	ret = kmalloc(new_size, flags);
				2173	if (ret) {
				2174	memcpy(ret, p, min(new_size, ksize(p)));
				2175	kfree(p);
				2176	}
				2177	return ret;
				2178	}
				2179	EXPORT_SYMBOL(krealloc);
				2180
				2181	/********************************************************************
				2182	* Basic setup of slabs
				2183	*******************************************************************/
				2184
				2185	void __init kmem_cache_init(void)
				2186	{
				2187	int i;
				2188
				2189	#ifdef CONFIG_NUMA
				2190	/*
				2191	* Must first have the slab cache available for the allocations of the
				2192	* struct kmalloc_cache_node's. There is special bootstrap code in
				2193	* kmem_cache_open for slab_state == DOWN.
				2194	*/
				2195	create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
				2196	sizeof(struct kmem_cache_node), GFP_KERNEL);
				2197	#endif
				2198
				2199	/* Able to allocate the per node structures */
				2200	slab_state = PARTIAL;
				2201
				2202	/* Caches that are not of the two-to-the-power-of size */
				2203	create_kmalloc_cache(&kmalloc_caches[1],
				2204	"kmalloc-96", 96, GFP_KERNEL);
				2205	create_kmalloc_cache(&kmalloc_caches[2],
				2206	"kmalloc-192", 192, GFP_KERNEL);
				2207
				2208	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
				2209	create_kmalloc_cache(&kmalloc_caches[i],
				2210	"kmalloc", 1 << i, GFP_KERNEL);
				2211
				2212	slab_state = UP;
				2213
				2214	/* Provide the correct kmalloc names now that the caches are up */
				2215	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
				2216	kmalloc_caches[i]. name =
				2217	kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
				2218
				2219	#ifdef CONFIG_SMP
				2220	register_cpu_notifier(&slab_notifier);
				2221	#endif
				2222
				2223	if (nr_cpu_ids) /* Remove when nr_cpu_ids is fixed upstream ! */
				2224	kmem_size = offsetof(struct kmem_cache, cpu_slab)
				2225	+ nr_cpu_ids * sizeof(struct page *);
				2226
				2227	printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
				2228	" Processors=%d, Nodes=%d\n",
				2229	KMALLOC_SHIFT_HIGH, L1_CACHE_BYTES,
				2230	slub_min_order, slub_max_order, slub_min_objects,
				2231	nr_cpu_ids, nr_node_ids);
				2232	}
				2233
				2234	/*
				2235	* Find a mergeable slab cache
				2236	*/
				2237	static int slab_unmergeable(struct kmem_cache *s)
				2238	{
				2239	if (slub_nomerge \|\| (s->flags & SLUB_NEVER_MERGE))
				2240	return 1;
				2241
				2242	if (s->ctor \|\| s->dtor)
				2243	return 1;
				2244
				2245	return 0;
				2246	}
				2247
				2248	static struct kmem_cache *find_mergeable(size_t size,
				2249	size_t align, unsigned long flags,
				2250	void (ctor)(void , struct kmem_cache *, unsigned long),
				2251	void (dtor)(void , struct kmem_cache *, unsigned long))
				2252	{
				2253	struct list_head *h;
				2254
				2255	if (slub_nomerge \|\| (flags & SLUB_NEVER_MERGE))
				2256	return NULL;
				2257
				2258	if (ctor \|\| dtor)
				2259	return NULL;
				2260
				2261	size = ALIGN(size, sizeof(void *));
				2262	align = calculate_alignment(flags, align, size);
				2263	size = ALIGN(size, align);
				2264
				2265	list_for_each(h, &slab_caches) {
				2266	struct kmem_cache *s =
				2267	container_of(h, struct kmem_cache, list);
				2268
				2269	if (slab_unmergeable(s))
				2270	continue;
				2271
				2272	if (size > s->size)
				2273	continue;
				2274
				2275	if (((flags \| slub_debug) & SLUB_MERGE_SAME) !=
				2276	(s->flags & SLUB_MERGE_SAME))
				2277	continue;
				2278	/*
				2279	* Check if alignment is compatible.
				2280	* Courtesy of Adrian Drzewiecki
				2281	*/
				2282	if ((s->size & ~(align -1)) != s->size)
				2283	continue;
				2284
				2285	if (s->size - size >= sizeof(void *))
				2286	continue;
				2287
				2288	return s;
				2289	}
				2290	return NULL;
				2291	}
				2292
				2293	struct kmem_cache kmem_cache_create(const char name, size_t size,
				2294	size_t align, unsigned long flags,
				2295	void (ctor)(void , struct kmem_cache *, unsigned long),
				2296	void (dtor)(void , struct kmem_cache *, unsigned long))
				2297	{
				2298	struct kmem_cache *s;
				2299
				2300	down_write(&slub_lock);
				2301	s = find_mergeable(size, align, flags, dtor, ctor);
				2302	if (s) {
				2303	s->refcount++;
				2304	/*
				2305	* Adjust the object sizes so that we clear
				2306	* the complete object on kzalloc.
				2307	*/
				2308	s->objsize = max(s->objsize, (int)size);
				2309	s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
				2310	if (sysfs_slab_alias(s, name))
				2311	goto err;
				2312	} else {
				2313	s = kmalloc(kmem_size, GFP_KERNEL);
				2314	if (s && kmem_cache_open(s, GFP_KERNEL, name,
				2315	size, align, flags, ctor, dtor)) {
				2316	if (sysfs_slab_add(s)) {
				2317	kfree(s);
				2318	goto err;
				2319	}
				2320	list_add(&s->list, &slab_caches);
				2321	} else
				2322	kfree(s);
				2323	}
				2324	up_write(&slub_lock);
				2325	return s;
				2326
				2327	err:
				2328	up_write(&slub_lock);
				2329	if (flags & SLAB_PANIC)
				2330	panic("Cannot create slabcache %s\n", name);
				2331	else
				2332	s = NULL;
				2333	return s;
				2334	}
				2335	EXPORT_SYMBOL(kmem_cache_create);
				2336
				2337	void kmem_cache_zalloc(struct kmem_cache s, gfp_t flags)
				2338	{
				2339	void *x;
				2340
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	2341	x = slab_alloc(s, flags, -1, __builtin_return_address(0));
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2342	if (x)
				2343	memset(x, 0, s->objsize);
				2344	return x;
				2345	}
				2346	EXPORT_SYMBOL(kmem_cache_zalloc);
				2347
				2348	#ifdef CONFIG_SMP
				2349	static void for_all_slabs(void (func)(struct kmem_cache , int), int cpu)
				2350	{
				2351	struct list_head *h;
				2352
				2353	down_read(&slub_lock);
				2354	list_for_each(h, &slab_caches) {
				2355	struct kmem_cache *s =
				2356	container_of(h, struct kmem_cache, list);
				2357
				2358	func(s, cpu);
				2359	}
				2360	up_read(&slub_lock);
				2361	}
				2362
				2363	/*
				2364	* Use the cpu notifier to insure that the slab are flushed
				2365	* when necessary.
				2366	*/
				2367	static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
				2368	unsigned long action, void *hcpu)
				2369	{
				2370	long cpu = (long)hcpu;
				2371
				2372	switch (action) {
				2373	case CPU_UP_CANCELED:
				2374	case CPU_DEAD:
				2375	for_all_slabs(__flush_cpu_slab, cpu);
				2376	break;
				2377	default:
				2378	break;
				2379	}
				2380	return NOTIFY_OK;
				2381	}
				2382
				2383	static struct notifier_block __cpuinitdata slab_notifier =
				2384	{ &slab_cpuup_callback, NULL, 0 };
				2385
				2386	#endif
				2387
				2388	/***************************************************************
				2389	* Compatiblility definitions
				2390	**************************************************************/
				2391
				2392	int kmem_cache_shrink(struct kmem_cache *s)
				2393	{
				2394	flush_all(s);
				2395	return 0;
				2396	}
				2397	EXPORT_SYMBOL(kmem_cache_shrink);
				2398
				2399	#ifdef CONFIG_NUMA
				2400
				2401	/*****************************************************************
				2402	* Generic reaper used to support the page allocator
				2403	* (the cpu slabs are reaped by a per slab workqueue).
				2404	*
				2405	* Maybe move this to the page allocator?
				2406	****************************************************************/
				2407
				2408	static DEFINE_PER_CPU(unsigned long, reap_node);
				2409
				2410	static void init_reap_node(int cpu)
				2411	{
				2412	int node;
				2413
				2414	node = next_node(cpu_to_node(cpu), node_online_map);
				2415	if (node == MAX_NUMNODES)
				2416	node = first_node(node_online_map);
				2417
				2418	__get_cpu_var(reap_node) = node;
				2419	}
				2420
				2421	static void next_reap_node(void)
				2422	{
				2423	int node = __get_cpu_var(reap_node);
				2424
				2425	/*
				2426	* Also drain per cpu pages on remote zones
				2427	*/
				2428	if (node != numa_node_id())
				2429	drain_node_pages(node);
				2430
				2431	node = next_node(node, node_online_map);
				2432	if (unlikely(node >= MAX_NUMNODES))
				2433	node = first_node(node_online_map);
				2434	__get_cpu_var(reap_node) = node;
				2435	}
				2436	#else
				2437	#define init_reap_node(cpu) do { } while (0)
				2438	#define next_reap_node(void) do { } while (0)
				2439	#endif
				2440
				2441	#define REAPTIMEOUT_CPUC (2*HZ)
				2442
				2443	#ifdef CONFIG_SMP
				2444	static DEFINE_PER_CPU(struct delayed_work, reap_work);
				2445
				2446	static void cache_reap(struct work_struct *unused)
				2447	{
				2448	next_reap_node();
				2449	refresh_cpu_vm_stats(smp_processor_id());
				2450	schedule_delayed_work(&__get_cpu_var(reap_work),
				2451	REAPTIMEOUT_CPUC);
				2452	}
				2453
				2454	static void __devinit start_cpu_timer(int cpu)
				2455	{
				2456	struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
				2457
				2458	/*
				2459	* When this gets called from do_initcalls via cpucache_init(),
				2460	* init_workqueues() has already run, so keventd will be setup
				2461	* at that time.
				2462	*/
				2463	if (keventd_up() && reap_work->work.func == NULL) {
				2464	init_reap_node(cpu);
				2465	INIT_DELAYED_WORK(reap_work, cache_reap);
				2466	schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
				2467	}
				2468	}
				2469
				2470	static int __init cpucache_init(void)
				2471	{
				2472	int cpu;
				2473
				2474	/*
				2475	* Register the timers that drain pcp pages and update vm statistics
				2476	*/
				2477	for_each_online_cpu(cpu)
				2478	start_cpu_timer(cpu);
				2479	return 0;
				2480	}
				2481	__initcall(cpucache_init);
				2482	#endif
				2483
				2484	#ifdef SLUB_RESILIENCY_TEST
				2485	static unsigned long validate_slab_cache(struct kmem_cache *s);
				2486
				2487	static void resiliency_test(void)
				2488	{
				2489	u8 *p;
				2490
				2491	printk(KERN_ERR "SLUB resiliency testing\n");
				2492	printk(KERN_ERR "-----------------------\n");
				2493	printk(KERN_ERR "A. Corruption after allocation\n");
				2494
				2495	p = kzalloc(16, GFP_KERNEL);
				2496	p[16] = 0x12;
				2497	printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
				2498	" 0x12->0x%p\n\n", p + 16);
				2499
				2500	validate_slab_cache(kmalloc_caches + 4);
				2501
				2502	/* Hmmm... The next two are dangerous */
				2503	p = kzalloc(32, GFP_KERNEL);
				2504	p[32 + sizeof(void *)] = 0x34;
				2505	printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
				2506	" 0x34 -> -0x%p\n", p);
				2507	printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n");
				2508
				2509	validate_slab_cache(kmalloc_caches + 5);
				2510	p = kzalloc(64, GFP_KERNEL);
				2511	p += 64 + (get_cycles() & 0xff) * sizeof(void *);
				2512	*p = 0x56;
				2513	printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
				2514	p);
				2515	printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n");
				2516	validate_slab_cache(kmalloc_caches + 6);
				2517
				2518	printk(KERN_ERR "\nB. Corruption after free\n");
				2519	p = kzalloc(128, GFP_KERNEL);
				2520	kfree(p);
				2521	*p = 0x78;
				2522	printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
				2523	validate_slab_cache(kmalloc_caches + 7);
				2524
				2525	p = kzalloc(256, GFP_KERNEL);
				2526	kfree(p);
				2527	p[50] = 0x9a;
				2528	printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
				2529	validate_slab_cache(kmalloc_caches + 8);
				2530
				2531	p = kzalloc(512, GFP_KERNEL);
				2532	kfree(p);
				2533	p[512] = 0xab;
				2534	printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
				2535	validate_slab_cache(kmalloc_caches + 9);
				2536	}
				2537	#else
				2538	static void resiliency_test(void) {};
				2539	#endif
				2540
				2541	/*
				2542	* These are not as efficient as kmalloc for the non debug case.
				2543	* We do not have the page struct available so we have to touch one
				2544	* cacheline in struct kmem_cache to check slab flags.
				2545	*/
				2546	void __kmalloc_track_caller(size_t size, gfp_t gfpflags, void caller)
				2547	{
				2548	struct kmem_cache *s = get_slab(size, gfpflags);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2549
				2550	if (!s)
				2551	return NULL;
				2552
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	2553	return slab_alloc(s, gfpflags, -1, caller);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2554	}
				2555
				2556	void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
				2557	int node, void *caller)
				2558	{
				2559	struct kmem_cache *s = get_slab(size, gfpflags);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2560
				2561	if (!s)
				2562	return NULL;
				2563
Christoph Lameter	77c5e2d	2007-05-06 14:49:42 -0700	[diff] [blame]	2564	return slab_alloc(s, gfpflags, node, caller);
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2565	}
				2566
				2567	#ifdef CONFIG_SYSFS
				2568
Christoph Lameter	53e15af	2007-05-06 14:49:43 -0700	[diff] [blame]	2569	static int validate_slab(struct kmem_cache s, struct page page)
				2570	{
				2571	void *p;
				2572	void *addr = page_address(page);
				2573	unsigned long map[BITS_TO_LONGS(s->objects)];
				2574
				2575	if (!check_slab(s, page) \|\|
				2576	!on_freelist(s, page, NULL))
				2577	return 0;
				2578
				2579	/* Now we know that a valid freelist exists */
				2580	bitmap_zero(map, s->objects);
				2581
				2582	for(p = page->freelist; p; p = get_freepointer(s, p)) {
				2583	set_bit((p - addr) / s->size, map);
				2584	if (!check_object(s, page, p, 0))
				2585	return 0;
				2586	}
				2587
				2588	for(p = addr; p < addr + s->objects * s->size; p += s->size)
				2589	if (!test_bit((p - addr) / s->size, map))
				2590	if (!check_object(s, page, p, 1))
				2591	return 0;
				2592	return 1;
				2593	}
				2594
				2595	static void validate_slab_slab(struct kmem_cache s, struct page page)
				2596	{
				2597	if (slab_trylock(page)) {
				2598	validate_slab(s, page);
				2599	slab_unlock(page);
				2600	} else
				2601	printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
				2602	s->name, page);
				2603
				2604	if (s->flags & DEBUG_DEFAULT_FLAGS) {
				2605	if (!PageError(page))
				2606	printk(KERN_ERR "SLUB %s: PageError not set "
				2607	"on slab 0x%p\n", s->name, page);
				2608	} else {
				2609	if (PageError(page))
				2610	printk(KERN_ERR "SLUB %s: PageError set on "
				2611	"slab 0x%p\n", s->name, page);
				2612	}
				2613	}
				2614
				2615	static int validate_slab_node(struct kmem_cache s, struct kmem_cache_node n)
				2616	{
				2617	unsigned long count = 0;
				2618	struct page *page;
				2619	unsigned long flags;
				2620
				2621	spin_lock_irqsave(&n->list_lock, flags);
				2622
				2623	list_for_each_entry(page, &n->partial, lru) {
				2624	validate_slab_slab(s, page);
				2625	count++;
				2626	}
				2627	if (count != n->nr_partial)
				2628	printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
				2629	"counter=%ld\n", s->name, count, n->nr_partial);
				2630
				2631	if (!(s->flags & SLAB_STORE_USER))
				2632	goto out;
				2633
				2634	list_for_each_entry(page, &n->full, lru) {
				2635	validate_slab_slab(s, page);
				2636	count++;
				2637	}
				2638	if (count != atomic_long_read(&n->nr_slabs))
				2639	printk(KERN_ERR "SLUB: %s %ld slabs counted but "
				2640	"counter=%ld\n", s->name, count,
				2641	atomic_long_read(&n->nr_slabs));
				2642
				2643	out:
				2644	spin_unlock_irqrestore(&n->list_lock, flags);
				2645	return count;
				2646	}
				2647
				2648	static unsigned long validate_slab_cache(struct kmem_cache *s)
				2649	{
				2650	int node;
				2651	unsigned long count = 0;
				2652
				2653	flush_all(s);
				2654	for_each_online_node(node) {
				2655	struct kmem_cache_node *n = get_node(s, node);
				2656
				2657	count += validate_slab_node(s, n);
				2658	}
				2659	return count;
				2660	}
				2661
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2662	static unsigned long count_partial(struct kmem_cache_node *n)
				2663	{
				2664	unsigned long flags;
				2665	unsigned long x = 0;
				2666	struct page *page;
				2667
				2668	spin_lock_irqsave(&n->list_lock, flags);
				2669	list_for_each_entry(page, &n->partial, lru)
				2670	x += page->inuse;
				2671	spin_unlock_irqrestore(&n->list_lock, flags);
				2672	return x;
				2673	}
				2674
				2675	enum slab_stat_type {
				2676	SL_FULL,
				2677	SL_PARTIAL,
				2678	SL_CPU,
				2679	SL_OBJECTS
				2680	};
				2681
				2682	#define SO_FULL (1 << SL_FULL)
				2683	#define SO_PARTIAL (1 << SL_PARTIAL)
				2684	#define SO_CPU (1 << SL_CPU)
				2685	#define SO_OBJECTS (1 << SL_OBJECTS)
				2686
				2687	static unsigned long slab_objects(struct kmem_cache *s,
				2688	char *buf, unsigned long flags)
				2689	{
				2690	unsigned long total = 0;
				2691	int cpu;
				2692	int node;
				2693	int x;
				2694	unsigned long *nodes;
				2695	unsigned long *per_cpu;
				2696
				2697	nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
				2698	per_cpu = nodes + nr_node_ids;
				2699
				2700	for_each_possible_cpu(cpu) {
				2701	struct page *page = s->cpu_slab[cpu];
				2702	int node;
				2703
				2704	if (page) {
				2705	node = page_to_nid(page);
				2706	if (flags & SO_CPU) {
				2707	int x = 0;
				2708
				2709	if (flags & SO_OBJECTS)
				2710	x = page->inuse;
				2711	else
				2712	x = 1;
				2713	total += x;
				2714	nodes[node] += x;
				2715	}
				2716	per_cpu[node]++;
				2717	}
				2718	}
				2719
				2720	for_each_online_node(node) {
				2721	struct kmem_cache_node *n = get_node(s, node);
				2722
				2723	if (flags & SO_PARTIAL) {
				2724	if (flags & SO_OBJECTS)
				2725	x = count_partial(n);
				2726	else
				2727	x = n->nr_partial;
				2728	total += x;
				2729	nodes[node] += x;
				2730	}
				2731
				2732	if (flags & SO_FULL) {
				2733	int full_slabs = atomic_read(&n->nr_slabs)
				2734	- per_cpu[node]
				2735	- n->nr_partial;
				2736
				2737	if (flags & SO_OBJECTS)
				2738	x = full_slabs * s->objects;
				2739	else
				2740	x = full_slabs;
				2741	total += x;
				2742	nodes[node] += x;
				2743	}
				2744	}
				2745
				2746	x = sprintf(buf, "%lu", total);
				2747	#ifdef CONFIG_NUMA
				2748	for_each_online_node(node)
				2749	if (nodes[node])
				2750	x += sprintf(buf + x, " N%d=%lu",
				2751	node, nodes[node]);
				2752	#endif
				2753	kfree(nodes);
				2754	return x + sprintf(buf + x, "\n");
				2755	}
				2756
				2757	static int any_slab_objects(struct kmem_cache *s)
				2758	{
				2759	int node;
				2760	int cpu;
				2761
				2762	for_each_possible_cpu(cpu)
				2763	if (s->cpu_slab[cpu])
				2764	return 1;
				2765
				2766	for_each_node(node) {
				2767	struct kmem_cache_node *n = get_node(s, node);
				2768
				2769	if (n->nr_partial \|\| atomic_read(&n->nr_slabs))
				2770	return 1;
				2771	}
				2772	return 0;
				2773	}
				2774
				2775	#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
				2776	#define to_slab(n) container_of(n, struct kmem_cache, kobj);
				2777
				2778	struct slab_attribute {
				2779	struct attribute attr;
				2780	ssize_t (show)(struct kmem_cache s, char *buf);
				2781	ssize_t (store)(struct kmem_cache s, const char *x, size_t count);
				2782	};
				2783
				2784	#define SLAB_ATTR_RO(_name) \
				2785	static struct slab_attribute _name##_attr = __ATTR_RO(_name)
				2786
				2787	#define SLAB_ATTR(_name) \
				2788	static struct slab_attribute _name##_attr = \
				2789	__ATTR(_name, 0644, _name##_show, _name##_store)
				2790
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	2791	static ssize_t slab_size_show(struct kmem_cache s, char buf)
				2792	{
				2793	return sprintf(buf, "%d\n", s->size);
				2794	}
				2795	SLAB_ATTR_RO(slab_size);
				2796
				2797	static ssize_t align_show(struct kmem_cache s, char buf)
				2798	{
				2799	return sprintf(buf, "%d\n", s->align);
				2800	}
				2801	SLAB_ATTR_RO(align);
				2802
				2803	static ssize_t object_size_show(struct kmem_cache s, char buf)
				2804	{
				2805	return sprintf(buf, "%d\n", s->objsize);
				2806	}
				2807	SLAB_ATTR_RO(object_size);
				2808
				2809	static ssize_t objs_per_slab_show(struct kmem_cache s, char buf)
				2810	{
				2811	return sprintf(buf, "%d\n", s->objects);
				2812	}
				2813	SLAB_ATTR_RO(objs_per_slab);
				2814
				2815	static ssize_t order_show(struct kmem_cache s, char buf)
				2816	{
				2817	return sprintf(buf, "%d\n", s->order);
				2818	}
				2819	SLAB_ATTR_RO(order);
				2820
				2821	static ssize_t ctor_show(struct kmem_cache s, char buf)
				2822	{
				2823	if (s->ctor) {
				2824	int n = sprint_symbol(buf, (unsigned long)s->ctor);
				2825
				2826	return n + sprintf(buf + n, "\n");
				2827	}
				2828	return 0;
				2829	}
				2830	SLAB_ATTR_RO(ctor);
				2831
				2832	static ssize_t dtor_show(struct kmem_cache s, char buf)
				2833	{
				2834	if (s->dtor) {
				2835	int n = sprint_symbol(buf, (unsigned long)s->dtor);
				2836
				2837	return n + sprintf(buf + n, "\n");
				2838	}
				2839	return 0;
				2840	}
				2841	SLAB_ATTR_RO(dtor);
				2842
				2843	static ssize_t aliases_show(struct kmem_cache s, char buf)
				2844	{
				2845	return sprintf(buf, "%d\n", s->refcount - 1);
				2846	}
				2847	SLAB_ATTR_RO(aliases);
				2848
				2849	static ssize_t slabs_show(struct kmem_cache s, char buf)
				2850	{
				2851	return slab_objects(s, buf, SO_FULL\|SO_PARTIAL\|SO_CPU);
				2852	}
				2853	SLAB_ATTR_RO(slabs);
				2854
				2855	static ssize_t partial_show(struct kmem_cache s, char buf)
				2856	{
				2857	return slab_objects(s, buf, SO_PARTIAL);
				2858	}
				2859	SLAB_ATTR_RO(partial);
				2860
				2861	static ssize_t cpu_slabs_show(struct kmem_cache s, char buf)
				2862	{
				2863	return slab_objects(s, buf, SO_CPU);
				2864	}
				2865	SLAB_ATTR_RO(cpu_slabs);
				2866
				2867	static ssize_t objects_show(struct kmem_cache s, char buf)
				2868	{
				2869	return slab_objects(s, buf, SO_FULL\|SO_PARTIAL\|SO_CPU\|SO_OBJECTS);
				2870	}
				2871	SLAB_ATTR_RO(objects);
				2872
				2873	static ssize_t sanity_checks_show(struct kmem_cache s, char buf)
				2874	{
				2875	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
				2876	}
				2877
				2878	static ssize_t sanity_checks_store(struct kmem_cache *s,
				2879	const char *buf, size_t length)
				2880	{
				2881	s->flags &= ~SLAB_DEBUG_FREE;
				2882	if (buf[0] == '1')
				2883	s->flags \|= SLAB_DEBUG_FREE;
				2884	return length;
				2885	}
				2886	SLAB_ATTR(sanity_checks);
				2887
				2888	static ssize_t trace_show(struct kmem_cache s, char buf)
				2889	{
				2890	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
				2891	}
				2892
				2893	static ssize_t trace_store(struct kmem_cache s, const char buf,
				2894	size_t length)
				2895	{
				2896	s->flags &= ~SLAB_TRACE;
				2897	if (buf[0] == '1')
				2898	s->flags \|= SLAB_TRACE;
				2899	return length;
				2900	}
				2901	SLAB_ATTR(trace);
				2902
				2903	static ssize_t reclaim_account_show(struct kmem_cache s, char buf)
				2904	{
				2905	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
				2906	}
				2907
				2908	static ssize_t reclaim_account_store(struct kmem_cache *s,
				2909	const char *buf, size_t length)
				2910	{
				2911	s->flags &= ~SLAB_RECLAIM_ACCOUNT;
				2912	if (buf[0] == '1')
				2913	s->flags \|= SLAB_RECLAIM_ACCOUNT;
				2914	return length;
				2915	}
				2916	SLAB_ATTR(reclaim_account);
				2917
				2918	static ssize_t hwcache_align_show(struct kmem_cache s, char buf)
				2919	{
				2920	return sprintf(buf, "%d\n", !!(s->flags &
				2921	(SLAB_HWCACHE_ALIGN\|SLAB_MUST_HWCACHE_ALIGN)));
				2922	}
				2923	SLAB_ATTR_RO(hwcache_align);
				2924
				2925	#ifdef CONFIG_ZONE_DMA
				2926	static ssize_t cache_dma_show(struct kmem_cache s, char buf)
				2927	{
				2928	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
				2929	}
				2930	SLAB_ATTR_RO(cache_dma);
				2931	#endif
				2932
				2933	static ssize_t destroy_by_rcu_show(struct kmem_cache s, char buf)
				2934	{
				2935	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
				2936	}
				2937	SLAB_ATTR_RO(destroy_by_rcu);
				2938
				2939	static ssize_t red_zone_show(struct kmem_cache s, char buf)
				2940	{
				2941	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
				2942	}
				2943
				2944	static ssize_t red_zone_store(struct kmem_cache *s,
				2945	const char *buf, size_t length)
				2946	{
				2947	if (any_slab_objects(s))
				2948	return -EBUSY;
				2949
				2950	s->flags &= ~SLAB_RED_ZONE;
				2951	if (buf[0] == '1')
				2952	s->flags \|= SLAB_RED_ZONE;
				2953	calculate_sizes(s);
				2954	return length;
				2955	}
				2956	SLAB_ATTR(red_zone);
				2957
				2958	static ssize_t poison_show(struct kmem_cache s, char buf)
				2959	{
				2960	return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
				2961	}
				2962
				2963	static ssize_t poison_store(struct kmem_cache *s,
				2964	const char *buf, size_t length)
				2965	{
				2966	if (any_slab_objects(s))
				2967	return -EBUSY;
				2968
				2969	s->flags &= ~SLAB_POISON;
				2970	if (buf[0] == '1')
				2971	s->flags \|= SLAB_POISON;
				2972	calculate_sizes(s);
				2973	return length;
				2974	}
				2975	SLAB_ATTR(poison);
				2976
				2977	static ssize_t store_user_show(struct kmem_cache s, char buf)
				2978	{
				2979	return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
				2980	}
				2981
				2982	static ssize_t store_user_store(struct kmem_cache *s,
				2983	const char *buf, size_t length)
				2984	{
				2985	if (any_slab_objects(s))
				2986	return -EBUSY;
				2987
				2988	s->flags &= ~SLAB_STORE_USER;
				2989	if (buf[0] == '1')
				2990	s->flags \|= SLAB_STORE_USER;
				2991	calculate_sizes(s);
				2992	return length;
				2993	}
				2994	SLAB_ATTR(store_user);
				2995
Christoph Lameter	53e15af	2007-05-06 14:49:43 -0700	[diff] [blame]	2996	static ssize_t validate_show(struct kmem_cache s, char buf)
				2997	{
				2998	return 0;
				2999	}
				3000
				3001	static ssize_t validate_store(struct kmem_cache *s,
				3002	const char *buf, size_t length)
				3003	{
				3004	if (buf[0] == '1')
				3005	validate_slab_cache(s);
				3006	else
				3007	return -EINVAL;
				3008	return length;
				3009	}
				3010	SLAB_ATTR(validate);
				3011
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	3012	#ifdef CONFIG_NUMA
				3013	static ssize_t defrag_ratio_show(struct kmem_cache s, char buf)
				3014	{
				3015	return sprintf(buf, "%d\n", s->defrag_ratio / 10);
				3016	}
				3017
				3018	static ssize_t defrag_ratio_store(struct kmem_cache *s,
				3019	const char *buf, size_t length)
				3020	{
				3021	int n = simple_strtoul(buf, NULL, 10);
				3022
				3023	if (n < 100)
				3024	s->defrag_ratio = n * 10;
				3025	return length;
				3026	}
				3027	SLAB_ATTR(defrag_ratio);
				3028	#endif
				3029
				3030	static struct attribute * slab_attrs[] = {
				3031	&slab_size_attr.attr,
				3032	&object_size_attr.attr,
				3033	&objs_per_slab_attr.attr,
				3034	&order_attr.attr,
				3035	&objects_attr.attr,
				3036	&slabs_attr.attr,
				3037	&partial_attr.attr,
				3038	&cpu_slabs_attr.attr,
				3039	&ctor_attr.attr,
				3040	&dtor_attr.attr,
				3041	&aliases_attr.attr,
				3042	&align_attr.attr,
				3043	&sanity_checks_attr.attr,
				3044	&trace_attr.attr,
				3045	&hwcache_align_attr.attr,
				3046	&reclaim_account_attr.attr,
				3047	&destroy_by_rcu_attr.attr,
				3048	&red_zone_attr.attr,
				3049	&poison_attr.attr,
				3050	&store_user_attr.attr,
Christoph Lameter	53e15af	2007-05-06 14:49:43 -0700	[diff] [blame]	3051	&validate_attr.attr,
Christoph Lameter	81819f0	2007-05-06 14:49:36 -0700	[diff] [blame]	3052	#ifdef CONFIG_ZONE_DMA
				3053	&cache_dma_attr.attr,
				3054	#endif
				3055	#ifdef CONFIG_NUMA
				3056	&defrag_ratio_attr.attr,
				3057	#endif
				3058	NULL
				3059	};
				3060
				3061	static struct attribute_group slab_attr_group = {
				3062	.attrs = slab_attrs,
				3063	};
				3064
				3065	static ssize_t slab_attr_show(struct kobject *kobj,
				3066	struct attribute *attr,
				3067	char *buf)
				3068	{
				3069	struct slab_attribute *attribute;
				3070	struct kmem_cache *s;
				3071	int err;
				3072
				3073	attribute = to_slab_attr(attr);
				3074	s = to_slab(kobj);
				3075
				3076	if (!attribute->show)
				3077	return -EIO;
				3078
				3079	err = attribute->show(s, buf);
				3080
				3081	return err;
				3082	}
				3083
				3084	static ssize_t slab_attr_store(struct kobject *kobj,
				3085	struct attribute *attr,
				3086	const char *buf, size_t len)
				3087	{
				3088	struct slab_attribute *attribute;
				3089	struct kmem_cache *s;
				3090	int err;
				3091
				3092	attribute = to_slab_attr(attr);
				3093	s = to_slab(kobj);
				3094
				3095	if (!attribute->store)
				3096	return -EIO;
				3097
				3098	err = attribute->store(s, buf, len);
				3099
				3100	return err;
				3101	}
				3102
				3103	static struct sysfs_ops slab_sysfs_ops = {
				3104	.show = slab_attr_show,
				3105	.store = slab_attr_store,
				3106	};
				3107
				3108	static struct kobj_type slab_ktype = {
				3109	.sysfs_ops = &slab_sysfs_ops,
				3110	};
				3111
				3112	static int uevent_filter(struct kset kset, struct kobject kobj)
				3113	{
				3114	struct kobj_type *ktype = get_ktype(kobj);
				3115
				3116	if (ktype == &slab_ktype)
				3117	return 1;
				3118	return 0;
				3119	}
				3120
				3121	static struct kset_uevent_ops slab_uevent_ops = {
				3122	.filter = uevent_filter,
				3123	};
				3124
				3125	decl_subsys(slab, &slab_ktype, &slab_uevent_ops);
				3126
				3127	#define ID_STR_LENGTH 64
				3128
				3129	/* Create a unique string id for a slab cache:
				3130	* format
				3131	* :[flags-]size:[memory address of kmemcache]
				3132	*/
				3133	static char create_unique_id(struct kmem_cache s)
				3134	{
				3135	char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
				3136	char *p = name;
				3137
				3138	BUG_ON(!name);
				3139
				3140	*p++ = ':';
				3141	/*
				3142	* First flags affecting slabcache operations. We will only
				3143	* get here for aliasable slabs so we do not need to support
				3144	* too many flags. The flags here must cover all flags that
				3145	* are matched during merging to guarantee that the id is
				3146	* unique.
				3147	*/
				3148	if (s->flags & SLAB_CACHE_DMA)
				3149	*p++ = 'd';
				3150	if (s->flags & SLAB_RECLAIM_ACCOUNT)
				3151	*p++ = 'a';
				3152	if (s->flags & SLAB_DEBUG_FREE)
				3153	*p++ = 'F';
				3154	if (p != name + 1)
				3155	*p++ = '-';
				3156	p += sprintf(p, "%07d", s->size);
				3157	BUG_ON(p > name + ID_STR_LENGTH - 1);
				3158	return name;
				3159	}
				3160
				3161	static int sysfs_slab_add(struct kmem_cache *s)
				3162	{
				3163	int err;
				3164	const char *name;
				3165	int unmergeable;
				3166
				3167	if (slab_state < SYSFS)
				3168	/* Defer until later */
				3169	return 0;
				3170
				3171	unmergeable = slab_unmergeable(s);
				3172	if (unmergeable) {
				3173	/*
				3174	* Slabcache can never be merged so we can use the name proper.
				3175	* This is typically the case for debug situations. In that
				3176	* case we can catch duplicate names easily.
				3177	*/
				3178	sysfs_remove_link(&slab_subsys.kset.kobj, s->name);
				3179	name = s->name;
				3180	} else {
				3181	/*
				3182	* Create a unique name for the slab as a target
				3183	* for the symlinks.
				3184	*/
				3185	name = create_unique_id(s);
				3186	}
				3187
				3188	kobj_set_kset_s(s, slab_subsys);
				3189	kobject_set_name(&s->kobj, name);
				3190	kobject_init(&s->kobj);
				3191	err = kobject_add(&s->kobj);
				3192	if (err)
				3193	return err;
				3194
				3195	err = sysfs_create_group(&s->kobj, &slab_attr_group);
				3196	if (err)
				3197	return err;
				3198	kobject_uevent(&s->kobj, KOBJ_ADD);
				3199	if (!unmergeable) {
				3200	/* Setup first alias */
				3201	sysfs_slab_alias(s, s->name);
				3202	kfree(name);
				3203	}
				3204	return 0;
				3205	}
				3206
				3207	static void sysfs_slab_remove(struct kmem_cache *s)
				3208	{
				3209	kobject_uevent(&s->kobj, KOBJ_REMOVE);
				3210	kobject_del(&s->kobj);
				3211	}
				3212
				3213	/*
				3214	* Need to buffer aliases during bootup until sysfs becomes
				3215	* available lest we loose that information.
				3216	*/
				3217	struct saved_alias {
				3218	struct kmem_cache *s;
				3219	const char *name;
				3220	struct saved_alias *next;
				3221	};
				3222
				3223	struct saved_alias *alias_list;
				3224
				3225	static int sysfs_slab_alias(struct kmem_cache s, const char name)
				3226	{
				3227	struct saved_alias *al;
				3228
				3229	if (slab_state == SYSFS) {
				3230	/*
				3231	* If we have a leftover link then remove it.
				3232	*/
				3233	sysfs_remove_link(&slab_subsys.kset.kobj, name);
				3234	return sysfs_create_link(&slab_subsys.kset.kobj,
				3235	&s->kobj, name);
				3236	}
				3237
				3238	al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
				3239	if (!al)
				3240	return -ENOMEM;
				3241
				3242	al->s = s;
				3243	al->name = name;
				3244	al->next = alias_list;
				3245	alias_list = al;
				3246	return 0;
				3247	}
				3248
				3249	static int __init slab_sysfs_init(void)
				3250	{
				3251	int err;
				3252
				3253	err = subsystem_register(&slab_subsys);
				3254	if (err) {
				3255	printk(KERN_ERR "Cannot register slab subsystem.\n");
				3256	return -ENOSYS;
				3257	}
				3258
				3259	finish_bootstrap();
				3260
				3261	while (alias_list) {
				3262	struct saved_alias *al = alias_list;
				3263
				3264	alias_list = alias_list->next;
				3265	err = sysfs_slab_alias(al->s, al->name);
				3266	BUG_ON(err);
				3267	kfree(al);
				3268	}
				3269
				3270	resiliency_test();
				3271	return 0;
				3272	}
				3273
				3274	__initcall(slab_sysfs_init);
				3275	#else
				3276	__initcall(finish_bootstrap);
				3277	#endif