Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Intel specific MCE features. |
| 3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 4 | * Copyright (C) 2008, 2009 Intel Corporation |
| 5 | * Author: Andi Kleen |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 6 | */ |
| 7 | |
| 8 | #include <linux/init.h> |
| 9 | #include <linux/interrupt.h> |
| 10 | #include <linux/percpu.h> |
Alexey Dobriyan | d43c36d | 2009-10-07 17:09:06 +0400 | [diff] [blame] | 11 | #include <linux/sched.h> |
H. Peter Anvin | 1bf7b31 | 2009-06-17 08:31:15 -0700 | [diff] [blame] | 12 | #include <asm/apic.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 13 | #include <asm/processor.h> |
| 14 | #include <asm/msr.h> |
| 15 | #include <asm/mce.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 16 | |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 17 | /* |
| 18 | * Support for Intel Correct Machine Check Interrupts. This allows |
| 19 | * the CPU to raise an interrupt when a corrected machine check happened. |
| 20 | * Normally we pick those up using a regular polling timer. |
| 21 | * Also supports reliable discovery of shared banks. |
| 22 | */ |
| 23 | |
| 24 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); |
| 25 | |
| 26 | /* |
| 27 | * cmci_discover_lock protects against parallel discovery attempts |
| 28 | * which could race against each other. |
| 29 | */ |
| 30 | static DEFINE_SPINLOCK(cmci_discover_lock); |
| 31 | |
| 32 | #define CMCI_THRESHOLD 1 |
| 33 | |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 34 | static int cmci_supported(int *banks) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 35 | { |
| 36 | u64 cap; |
| 37 | |
Hidetoshi Seto | 62fdac5 | 2009-06-11 16:06:07 +0900 | [diff] [blame] | 38 | if (mce_cmci_disabled || mce_ignore_ce) |
| 39 | return 0; |
| 40 | |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 41 | /* |
| 42 | * Vendor check is not strictly needed, but the initial |
| 43 | * initialization is vendor keyed and this |
| 44 | * makes sure none of the backdoors are entered otherwise. |
| 45 | */ |
| 46 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
| 47 | return 0; |
| 48 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) |
| 49 | return 0; |
| 50 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
| 51 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); |
| 52 | return !!(cap & MCG_CMCI_P); |
| 53 | } |
| 54 | |
| 55 | /* |
| 56 | * The interrupt handler. This is called on every event. |
| 57 | * Just call the poller directly to log any events. |
| 58 | * This could in theory increase the threshold under high load, |
| 59 | * but doesn't for now. |
| 60 | */ |
| 61 | static void intel_threshold_interrupt(void) |
| 62 | { |
| 63 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
Andi Kleen | 9ff36ee | 2009-05-27 21:56:58 +0200 | [diff] [blame] | 64 | mce_notify_irq(); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 65 | } |
| 66 | |
| 67 | static void print_update(char *type, int *hdr, int num) |
| 68 | { |
| 69 | if (*hdr == 0) |
| 70 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); |
| 71 | *hdr = 1; |
| 72 | printk(KERN_CONT " %s:%d", type, num); |
| 73 | } |
| 74 | |
| 75 | /* |
| 76 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks |
| 77 | * on this CPU. Use the algorithm recommended in the SDM to discover shared |
| 78 | * banks. |
| 79 | */ |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 80 | static void cmci_discover(int banks, int boot) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 81 | { |
| 82 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 83 | unsigned long flags; |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 84 | int hdr = 0; |
| 85 | int i; |
| 86 | |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 87 | spin_lock_irqsave(&cmci_discover_lock, flags); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 88 | for (i = 0; i < banks; i++) { |
| 89 | u64 val; |
| 90 | |
| 91 | if (test_bit(i, owned)) |
| 92 | continue; |
| 93 | |
Andi Kleen | a2d32bc | 2009-07-09 00:31:44 +0200 | [diff] [blame] | 94 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 95 | |
| 96 | /* Already owned by someone else? */ |
| 97 | if (val & CMCI_EN) { |
Mike Travis | 10fb7f1 | 2010-03-05 13:10:36 -0600 | [diff] [blame] | 98 | if (test_and_clear_bit(i, owned) && !boot) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 99 | print_update("SHD", &hdr, i); |
| 100 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
| 101 | continue; |
| 102 | } |
| 103 | |
| 104 | val |= CMCI_EN | CMCI_THRESHOLD; |
Andi Kleen | a2d32bc | 2009-07-09 00:31:44 +0200 | [diff] [blame] | 105 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 106 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 107 | |
| 108 | /* Did the enable bit stick? -- the bank supports CMCI */ |
| 109 | if (val & CMCI_EN) { |
Mike Travis | 10fb7f1 | 2010-03-05 13:10:36 -0600 | [diff] [blame] | 110 | if (!test_and_set_bit(i, owned) && !boot) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 111 | print_update("CMCI", &hdr, i); |
| 112 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
| 113 | } else { |
| 114 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); |
| 115 | } |
| 116 | } |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 117 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 118 | if (hdr) |
| 119 | printk(KERN_CONT "\n"); |
| 120 | } |
| 121 | |
| 122 | /* |
| 123 | * Just in case we missed an event during initialization check |
| 124 | * all the CMCI owned banks. |
| 125 | */ |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 126 | void cmci_recheck(void) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 127 | { |
| 128 | unsigned long flags; |
| 129 | int banks; |
| 130 | |
| 131 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) |
| 132 | return; |
| 133 | local_irq_save(flags); |
| 134 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
| 135 | local_irq_restore(flags); |
| 136 | } |
| 137 | |
| 138 | /* |
| 139 | * Disable CMCI on this CPU for all banks it owns when it goes down. |
| 140 | * This allows other CPUs to claim the banks on rediscovery. |
| 141 | */ |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 142 | void cmci_clear(void) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 143 | { |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 144 | unsigned long flags; |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 145 | int i; |
| 146 | int banks; |
| 147 | u64 val; |
| 148 | |
| 149 | if (!cmci_supported(&banks)) |
| 150 | return; |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 151 | spin_lock_irqsave(&cmci_discover_lock, flags); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 152 | for (i = 0; i < banks; i++) { |
| 153 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) |
| 154 | continue; |
| 155 | /* Disable CMCI */ |
Andi Kleen | a2d32bc | 2009-07-09 00:31:44 +0200 | [diff] [blame] | 156 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 157 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); |
Andi Kleen | a2d32bc | 2009-07-09 00:31:44 +0200 | [diff] [blame] | 158 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 159 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
| 160 | } |
Hidetoshi Seto | e529992 | 2009-05-08 17:28:40 +0900 | [diff] [blame] | 161 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 162 | } |
| 163 | |
| 164 | /* |
| 165 | * After a CPU went down cycle through all the others and rediscover |
| 166 | * Must run in process context. |
| 167 | */ |
H. Peter Anvin | df20e2e | 2009-02-24 13:19:02 -0800 | [diff] [blame] | 168 | void cmci_rediscover(int dying) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 169 | { |
| 170 | int banks; |
| 171 | int cpu; |
| 172 | cpumask_var_t old; |
| 173 | |
| 174 | if (!cmci_supported(&banks)) |
| 175 | return; |
| 176 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) |
| 177 | return; |
| 178 | cpumask_copy(old, ¤t->cpus_allowed); |
| 179 | |
Hidetoshi Seto | 61a021a | 2009-04-14 17:09:04 +0900 | [diff] [blame] | 180 | for_each_online_cpu(cpu) { |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 181 | if (cpu == dying) |
| 182 | continue; |
Rusty Russell | 4f06289 | 2009-03-13 14:49:54 +1030 | [diff] [blame] | 183 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 184 | continue; |
| 185 | /* Recheck banks in case CPUs don't all have the same */ |
| 186 | if (cmci_supported(&banks)) |
| 187 | cmci_discover(banks, 0); |
| 188 | } |
| 189 | |
| 190 | set_cpus_allowed_ptr(current, old); |
| 191 | free_cpumask_var(old); |
| 192 | } |
| 193 | |
| 194 | /* |
| 195 | * Reenable CMCI on this CPU in case a CPU down failed. |
| 196 | */ |
| 197 | void cmci_reenable(void) |
| 198 | { |
| 199 | int banks; |
| 200 | if (cmci_supported(&banks)) |
| 201 | cmci_discover(banks, 0); |
| 202 | } |
| 203 | |
Hidetoshi Seto | 514ec49 | 2009-03-16 17:07:33 +0900 | [diff] [blame] | 204 | static void intel_init_cmci(void) |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 205 | { |
| 206 | int banks; |
| 207 | |
| 208 | if (!cmci_supported(&banks)) |
| 209 | return; |
| 210 | |
| 211 | mce_threshold_vector = intel_threshold_interrupt; |
| 212 | cmci_discover(banks, 1); |
| 213 | /* |
| 214 | * For CPU #0 this runs with still disabled APIC, but that's |
| 215 | * ok because only the vector is set up. We still do another |
| 216 | * check for the banks later for CPU #0 just to make sure |
| 217 | * to not miss any events. |
| 218 | */ |
| 219 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); |
| 220 | cmci_recheck(); |
| 221 | } |
| 222 | |
H. Peter Anvin | cc3ca22 | 2009-02-20 23:35:51 -0800 | [diff] [blame] | 223 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 224 | { |
| 225 | intel_init_thermal(c); |
Andi Kleen | 88ccbed | 2009-02-12 13:49:36 +0100 | [diff] [blame] | 226 | intel_init_cmci(); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 227 | } |