Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 1 | /* |
| 2 | * linux/mm/allocpercpu.c |
| 3 | * |
Christoph Lameter | cde5353 | 2008-07-04 09:59:22 -0700 | [diff] [blame] | 4 | * Separated from slab.c August 11, 2006 Christoph Lameter |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 5 | */ |
| 6 | #include <linux/mm.h> |
| 7 | #include <linux/module.h> |
Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 8 | #include <linux/bootmem.h> |
| 9 | #include <asm/sections.h> |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 10 | |
Eric Dumazet | be85279 | 2008-03-04 14:28:35 -0800 | [diff] [blame] | 11 | #ifndef cache_line_size |
| 12 | #define cache_line_size() L1_CACHE_BYTES |
| 13 | #endif |
| 14 | |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 15 | /** |
| 16 | * percpu_depopulate - depopulate per-cpu data for given cpu |
| 17 | * @__pdata: per-cpu data to depopulate |
| 18 | * @cpu: depopulate per-cpu data for this cpu |
| 19 | * |
| 20 | * Depopulating per-cpu data for a cpu going offline would be a typical |
| 21 | * use case. You need to register a cpu hotplug handler for that purpose. |
| 22 | */ |
Adrian Bunk | 9d8fddf | 2008-07-25 19:46:23 -0700 | [diff] [blame] | 23 | static void percpu_depopulate(void *__pdata, int cpu) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 24 | { |
| 25 | struct percpu_data *pdata = __percpu_disguise(__pdata); |
Alan Stern | a120586 | 2006-12-06 20:32:37 -0800 | [diff] [blame] | 26 | |
| 27 | kfree(pdata->ptrs[cpu]); |
| 28 | pdata->ptrs[cpu] = NULL; |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 29 | } |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 30 | |
| 31 | /** |
| 32 | * percpu_depopulate_mask - depopulate per-cpu data for some cpu's |
| 33 | * @__pdata: per-cpu data to depopulate |
| 34 | * @mask: depopulate per-cpu data for cpu's selected through mask bits |
| 35 | */ |
Stephen Rothwell | 5d6700e | 2009-04-06 15:08:29 +1000 | [diff] [blame] | 36 | static void __percpu_depopulate_mask(void *__pdata, const cpumask_t *mask) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 37 | { |
| 38 | int cpu; |
Mike Travis | 6d6a436 | 2008-05-12 21:21:13 +0200 | [diff] [blame] | 39 | for_each_cpu_mask_nr(cpu, *mask) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 40 | percpu_depopulate(__pdata, cpu); |
| 41 | } |
Adrian Bunk | 9d8fddf | 2008-07-25 19:46:23 -0700 | [diff] [blame] | 42 | |
| 43 | #define percpu_depopulate_mask(__pdata, mask) \ |
| 44 | __percpu_depopulate_mask((__pdata), &(mask)) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 45 | |
| 46 | /** |
| 47 | * percpu_populate - populate per-cpu data for given cpu |
| 48 | * @__pdata: per-cpu data to populate further |
| 49 | * @size: size of per-cpu object |
| 50 | * @gfp: may sleep or not etc. |
| 51 | * @cpu: populate per-data for this cpu |
| 52 | * |
| 53 | * Populating per-cpu data for a cpu coming online would be a typical |
| 54 | * use case. You need to register a cpu hotplug handler for that purpose. |
| 55 | * Per-cpu object is populated with zeroed buffer. |
| 56 | */ |
Adrian Bunk | 9d8fddf | 2008-07-25 19:46:23 -0700 | [diff] [blame] | 57 | static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 58 | { |
| 59 | struct percpu_data *pdata = __percpu_disguise(__pdata); |
| 60 | int node = cpu_to_node(cpu); |
| 61 | |
Eric Dumazet | be85279 | 2008-03-04 14:28:35 -0800 | [diff] [blame] | 62 | /* |
| 63 | * We should make sure each CPU gets private memory. |
| 64 | */ |
| 65 | size = roundup(size, cache_line_size()); |
| 66 | |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 67 | BUG_ON(pdata->ptrs[cpu]); |
Christoph Lameter | 94f6030 | 2007-07-17 04:03:29 -0700 | [diff] [blame] | 68 | if (node_online(node)) |
| 69 | pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node); |
| 70 | else |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 71 | pdata->ptrs[cpu] = kzalloc(size, gfp); |
| 72 | return pdata->ptrs[cpu]; |
| 73 | } |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 74 | |
| 75 | /** |
| 76 | * percpu_populate_mask - populate per-cpu data for more cpu's |
| 77 | * @__pdata: per-cpu data to populate further |
| 78 | * @size: size of per-cpu object |
| 79 | * @gfp: may sleep or not etc. |
| 80 | * @mask: populate per-cpu data for cpu's selected through mask bits |
| 81 | * |
| 82 | * Per-cpu objects are populated with zeroed buffers. |
| 83 | */ |
Adrian Bunk | 9d8fddf | 2008-07-25 19:46:23 -0700 | [diff] [blame] | 84 | static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, |
| 85 | cpumask_t *mask) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 86 | { |
Mike Travis | d366f8c | 2008-04-04 18:11:12 -0700 | [diff] [blame] | 87 | cpumask_t populated; |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 88 | int cpu; |
| 89 | |
Mike Travis | d366f8c | 2008-04-04 18:11:12 -0700 | [diff] [blame] | 90 | cpus_clear(populated); |
Mike Travis | 6d6a436 | 2008-05-12 21:21:13 +0200 | [diff] [blame] | 91 | for_each_cpu_mask_nr(cpu, *mask) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 92 | if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) { |
| 93 | __percpu_depopulate_mask(__pdata, &populated); |
| 94 | return -ENOMEM; |
| 95 | } else |
| 96 | cpu_set(cpu, populated); |
| 97 | return 0; |
| 98 | } |
Adrian Bunk | 9d8fddf | 2008-07-25 19:46:23 -0700 | [diff] [blame] | 99 | |
| 100 | #define percpu_populate_mask(__pdata, size, gfp, mask) \ |
| 101 | __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 102 | |
| 103 | /** |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 104 | * alloc_percpu - initial setup of per-cpu data |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 105 | * @size: size of per-cpu object |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 106 | * @align: alignment |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 107 | * |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 108 | * Allocate dynamic percpu area. Percpu objects are populated with |
| 109 | * zeroed buffers. |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 110 | */ |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 111 | void *__alloc_percpu(size_t size, size_t align) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 112 | { |
Eric Dumazet | be85279 | 2008-03-04 14:28:35 -0800 | [diff] [blame] | 113 | /* |
| 114 | * We allocate whole cache lines to avoid false sharing |
| 115 | */ |
| 116 | size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size()); |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 117 | void *pdata = kzalloc(sz, GFP_KERNEL); |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 118 | void *__pdata = __percpu_disguise(pdata); |
| 119 | |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 120 | /* |
| 121 | * Can't easily make larger alignment work with kmalloc. WARN |
| 122 | * on it. Larger alignment should only be used for module |
| 123 | * percpu sections on SMP for which this path isn't used. |
| 124 | */ |
Tejun Heo | 60db564 | 2009-03-11 14:36:54 +0900 | [diff] [blame] | 125 | WARN_ON_ONCE(align > SMP_CACHE_BYTES); |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 126 | |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 127 | if (unlikely(!pdata)) |
| 128 | return NULL; |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 129 | if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL, |
| 130 | &cpu_possible_map))) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 131 | return __pdata; |
| 132 | kfree(pdata); |
| 133 | return NULL; |
| 134 | } |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 135 | EXPORT_SYMBOL_GPL(__alloc_percpu); |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 136 | |
| 137 | /** |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 138 | * free_percpu - final cleanup of per-cpu data |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 139 | * @__pdata: object to clean up |
| 140 | * |
| 141 | * We simply clean up any per-cpu object left. No need for the client to |
| 142 | * track and specify through a bis mask which per-cpu objects are to free. |
| 143 | */ |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 144 | void free_percpu(void *__pdata) |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 145 | { |
Alan Stern | a120586 | 2006-12-06 20:32:37 -0800 | [diff] [blame] | 146 | if (unlikely(!__pdata)) |
| 147 | return; |
Rusty Russell | aa85ea5 | 2009-03-30 22:05:15 -0600 | [diff] [blame] | 148 | __percpu_depopulate_mask(__pdata, cpu_possible_mask); |
Christoph Lameter | d00bcc9 | 2006-09-25 23:31:50 -0700 | [diff] [blame] | 149 | kfree(__percpu_disguise(__pdata)); |
| 150 | } |
Tejun Heo | f2a8205 | 2009-02-20 16:29:08 +0900 | [diff] [blame] | 151 | EXPORT_SYMBOL_GPL(free_percpu); |
Tejun Heo | e74e396 | 2009-03-30 19:07:44 +0900 | [diff] [blame] | 152 | |
| 153 | /* |
| 154 | * Generic percpu area setup. |
| 155 | */ |
| 156 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA |
| 157 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
| 158 | |
| 159 | EXPORT_SYMBOL(__per_cpu_offset); |
| 160 | |
| 161 | void __init setup_per_cpu_areas(void) |
| 162 | { |
| 163 | unsigned long size, i; |
| 164 | char *ptr; |
| 165 | unsigned long nr_possible_cpus = num_possible_cpus(); |
| 166 | |
| 167 | /* Copy section for each CPU (we discard the original) */ |
| 168 | size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); |
| 169 | ptr = alloc_bootmem_pages(size * nr_possible_cpus); |
| 170 | |
| 171 | for_each_possible_cpu(i) { |
| 172 | __per_cpu_offset[i] = ptr - __per_cpu_start; |
| 173 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
| 174 | ptr += size; |
| 175 | } |
| 176 | } |
| 177 | #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ |