Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 1 | /* |
John Gregor | 87427da | 2007-06-11 10:21:14 -0700 | [diff] [blame] | 2 | * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 3 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. |
| 4 | * |
| 5 | * This software is available to you under a choice of one of two |
| 6 | * licenses. You may choose to be licensed under the terms of the GNU |
| 7 | * General Public License (GPL) Version 2, available from the file |
| 8 | * COPYING in the main directory of this source tree, or the |
| 9 | * OpenIB.org BSD license below: |
| 10 | * |
| 11 | * Redistribution and use in source and binary forms, with or |
| 12 | * without modification, are permitted provided that the following |
| 13 | * conditions are met: |
| 14 | * |
| 15 | * - Redistributions of source code must retain the above |
| 16 | * copyright notice, this list of conditions and the following |
| 17 | * disclaimer. |
| 18 | * |
| 19 | * - Redistributions in binary form must reproduce the above |
| 20 | * copyright notice, this list of conditions and the following |
| 21 | * disclaimer in the documentation and/or other materials |
| 22 | * provided with the distribution. |
| 23 | * |
| 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| 28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 31 | * SOFTWARE. |
| 32 | */ |
| 33 | |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 34 | #include "ipath_kernel.h" |
| 35 | |
| 36 | struct infinipath_stats ipath_stats; |
| 37 | |
| 38 | /** |
| 39 | * ipath_snap_cntr - snapshot a chip counter |
| 40 | * @dd: the infinipath device |
| 41 | * @creg: the counter to snapshot |
| 42 | * |
| 43 | * called from add_timer and user counter read calls, to deal with |
| 44 | * counters that wrap in "human time". The words sent and received, and |
| 45 | * the packets sent and received are all that we worry about. For now, |
| 46 | * at least, we don't worry about error counters, because if they wrap |
| 47 | * that quickly, we probably don't care. We may eventually just make this |
| 48 | * handle all the counters. word counters can wrap in about 20 seconds |
| 49 | * of full bandwidth traffic, packet counters in a few hours. |
| 50 | */ |
| 51 | |
| 52 | u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) |
| 53 | { |
| 54 | u32 val, reg64 = 0; |
| 55 | u64 val64; |
| 56 | unsigned long t0, t1; |
| 57 | u64 ret; |
| 58 | |
| 59 | t0 = jiffies; |
| 60 | /* If fast increment counters are only 32 bits, snapshot them, |
| 61 | * and maintain them as 64bit values in the driver */ |
| 62 | if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) && |
| 63 | (creg == dd->ipath_cregs->cr_wordsendcnt || |
| 64 | creg == dd->ipath_cregs->cr_wordrcvcnt || |
| 65 | creg == dd->ipath_cregs->cr_pktsendcnt || |
| 66 | creg == dd->ipath_cregs->cr_pktrcvcnt)) { |
| 67 | val64 = ipath_read_creg(dd, creg); |
| 68 | val = val64 == ~0ULL ? ~0U : 0; |
| 69 | reg64 = 1; |
| 70 | } else /* val64 just to keep gcc quiet... */ |
| 71 | val64 = val = ipath_read_creg32(dd, creg); |
| 72 | /* |
| 73 | * See if a second has passed. This is just a way to detect things |
| 74 | * that are quite broken. Normally this should take just a few |
| 75 | * cycles (the check is for long enough that we don't care if we get |
| 76 | * pre-empted.) An Opteron HT O read timeout is 4 seconds with |
| 77 | * normal NB values |
| 78 | */ |
| 79 | t1 = jiffies; |
| 80 | if (time_before(t0 + HZ, t1) && val == -1) { |
| 81 | ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n", |
| 82 | creg); |
| 83 | ret = 0ULL; |
| 84 | goto bail; |
| 85 | } |
| 86 | if (reg64) { |
| 87 | ret = val64; |
| 88 | goto bail; |
| 89 | } |
| 90 | |
| 91 | if (creg == dd->ipath_cregs->cr_wordsendcnt) { |
| 92 | if (val != dd->ipath_lastsword) { |
| 93 | dd->ipath_sword += val - dd->ipath_lastsword; |
| 94 | dd->ipath_lastsword = val; |
| 95 | } |
| 96 | val64 = dd->ipath_sword; |
| 97 | } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { |
| 98 | if (val != dd->ipath_lastrword) { |
| 99 | dd->ipath_rword += val - dd->ipath_lastrword; |
| 100 | dd->ipath_lastrword = val; |
| 101 | } |
| 102 | val64 = dd->ipath_rword; |
| 103 | } else if (creg == dd->ipath_cregs->cr_pktsendcnt) { |
| 104 | if (val != dd->ipath_lastspkts) { |
| 105 | dd->ipath_spkts += val - dd->ipath_lastspkts; |
| 106 | dd->ipath_lastspkts = val; |
| 107 | } |
| 108 | val64 = dd->ipath_spkts; |
| 109 | } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) { |
| 110 | if (val != dd->ipath_lastrpkts) { |
| 111 | dd->ipath_rpkts += val - dd->ipath_lastrpkts; |
| 112 | dd->ipath_lastrpkts = val; |
| 113 | } |
| 114 | val64 = dd->ipath_rpkts; |
| 115 | } else |
| 116 | val64 = (u64) val; |
| 117 | |
| 118 | ret = val64; |
| 119 | |
| 120 | bail: |
| 121 | return ret; |
| 122 | } |
| 123 | |
| 124 | /** |
| 125 | * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports |
| 126 | * @dd: the infinipath device |
| 127 | * |
| 128 | * print the delta of egrfull/hdrqfull errors for kernel ports no more than |
| 129 | * every 5 seconds. User processes are printed at close, but kernel doesn't |
| 130 | * close, so... Separate routine so may call from other places someday, and |
| 131 | * so function name when printed by _IPATH_INFO is meaningfull |
| 132 | */ |
| 133 | static void ipath_qcheck(struct ipath_devdata *dd) |
| 134 | { |
| 135 | static u64 last_tot_hdrqfull; |
Ralph Campbell | c59a80a | 2007-12-20 02:43:23 -0800 | [diff] [blame^] | 136 | struct ipath_portdata *pd = dd->ipath_pd[0]; |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 137 | size_t blen = 0; |
| 138 | char buf[128]; |
| 139 | |
| 140 | *buf = 0; |
Ralph Campbell | c59a80a | 2007-12-20 02:43:23 -0800 | [diff] [blame^] | 141 | if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) { |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 142 | blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u", |
Ralph Campbell | c59a80a | 2007-12-20 02:43:23 -0800 | [diff] [blame^] | 143 | pd->port_hdrqfull - |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 144 | dd->ipath_p0_hdrqfull); |
Ralph Campbell | c59a80a | 2007-12-20 02:43:23 -0800 | [diff] [blame^] | 145 | dd->ipath_p0_hdrqfull = pd->port_hdrqfull; |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 146 | } |
| 147 | if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) { |
| 148 | blen += snprintf(buf + blen, sizeof buf - blen, |
| 149 | "%srcvegrfull %llu", |
| 150 | blen ? ", " : "", |
| 151 | (unsigned long long) |
| 152 | (ipath_stats.sps_etidfull - |
| 153 | dd->ipath_last_tidfull)); |
| 154 | dd->ipath_last_tidfull = ipath_stats.sps_etidfull; |
| 155 | } |
| 156 | |
| 157 | /* |
| 158 | * this is actually the number of hdrq full interrupts, not actual |
| 159 | * events, but at the moment that's mostly what I'm interested in. |
| 160 | * Actual count, etc. is in the counters, if needed. For production |
| 161 | * users this won't ordinarily be printed. |
| 162 | */ |
| 163 | |
| 164 | if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) && |
| 165 | ipath_stats.sps_hdrqfull != last_tot_hdrqfull) { |
| 166 | blen += snprintf(buf + blen, sizeof buf - blen, |
| 167 | "%shdrqfull %llu (all ports)", |
| 168 | blen ? ", " : "", |
| 169 | (unsigned long long) |
| 170 | (ipath_stats.sps_hdrqfull - |
| 171 | last_tot_hdrqfull)); |
| 172 | last_tot_hdrqfull = ipath_stats.sps_hdrqfull; |
| 173 | } |
| 174 | if (blen) |
| 175 | ipath_dbg("%s\n", buf); |
| 176 | |
Ralph Campbell | c59a80a | 2007-12-20 02:43:23 -0800 | [diff] [blame^] | 177 | if (pd->port_head != (u32) |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 178 | le64_to_cpu(*dd->ipath_hdrqtailptr)) { |
| 179 | if (dd->ipath_lastport0rcv_cnt == |
| 180 | ipath_stats.sps_port0pkts) { |
| 181 | ipath_cdbg(PKT, "missing rcv interrupts? " |
| 182 | "port0 hd=%llx tl=%x; port0pkts %llx\n", |
| 183 | (unsigned long long) |
| 184 | le64_to_cpu(*dd->ipath_hdrqtailptr), |
Ralph Campbell | c59a80a | 2007-12-20 02:43:23 -0800 | [diff] [blame^] | 185 | pd->port_head, |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 186 | (unsigned long long) |
| 187 | ipath_stats.sps_port0pkts); |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 188 | } |
| 189 | dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts; |
| 190 | } |
| 191 | } |
| 192 | |
Dave Olson | 78d1e02 | 2007-07-20 14:41:26 -0700 | [diff] [blame] | 193 | static void ipath_chk_errormask(struct ipath_devdata *dd) |
| 194 | { |
| 195 | static u32 fixed; |
| 196 | u32 ctrl; |
| 197 | unsigned long errormask; |
| 198 | unsigned long hwerrs; |
| 199 | |
| 200 | if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED)) |
| 201 | return; |
| 202 | |
| 203 | errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask); |
| 204 | |
| 205 | if (errormask == dd->ipath_errormask) |
| 206 | return; |
| 207 | fixed++; |
| 208 | |
| 209 | hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); |
| 210 | ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); |
| 211 | |
| 212 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, |
| 213 | dd->ipath_errormask); |
| 214 | |
| 215 | if ((hwerrs & dd->ipath_hwerrmask) || |
| 216 | (ctrl & INFINIPATH_C_FREEZEMODE)) { |
| 217 | /* force re-interrupt of pending events, just in case */ |
| 218 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); |
| 219 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL); |
| 220 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); |
| 221 | dev_info(&dd->pcidev->dev, |
| 222 | "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n", |
| 223 | fixed, errormask, (unsigned long)dd->ipath_errormask, |
| 224 | ctrl, hwerrs); |
| 225 | } else |
| 226 | ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n", |
| 227 | fixed, errormask, |
| 228 | (unsigned long)dd->ipath_errormask); |
| 229 | } |
| 230 | |
| 231 | |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 232 | /** |
| 233 | * ipath_get_faststats - get word counters from chip before they overflow |
| 234 | * @opaque - contains a pointer to the infinipath device ipath_devdata |
| 235 | * |
| 236 | * called from add_timer |
| 237 | */ |
| 238 | void ipath_get_faststats(unsigned long opaque) |
| 239 | { |
| 240 | struct ipath_devdata *dd = (struct ipath_devdata *) opaque; |
| 241 | u32 val; |
| 242 | static unsigned cnt; |
Michael Albaugh | aecd3b5 | 2007-05-17 07:26:28 -0700 | [diff] [blame] | 243 | unsigned long flags; |
Michael Albaugh | 192594d | 2007-10-02 13:26:45 -0700 | [diff] [blame] | 244 | u64 traffic_wds; |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 245 | |
| 246 | /* |
| 247 | * don't access the chip while running diags, or memory diags can |
| 248 | * fail |
| 249 | */ |
Michael Albaugh | 27b044a | 2007-03-15 14:45:08 -0700 | [diff] [blame] | 250 | if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) || |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 251 | ipath_diag_inuse) |
| 252 | /* but re-arm the timer, for diags case; won't hurt other */ |
| 253 | goto done; |
| 254 | |
Michael Albaugh | aecd3b5 | 2007-05-17 07:26:28 -0700 | [diff] [blame] | 255 | /* |
| 256 | * We now try to maintain a "active timer", based on traffic |
| 257 | * exceeding a threshold, so we need to check the word-counts |
| 258 | * even if they are 64-bit. |
| 259 | */ |
Michael Albaugh | 192594d | 2007-10-02 13:26:45 -0700 | [diff] [blame] | 260 | traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) + |
| 261 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); |
Michael Albaugh | aecd3b5 | 2007-05-17 07:26:28 -0700 | [diff] [blame] | 262 | spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); |
Michael Albaugh | 192594d | 2007-10-02 13:26:45 -0700 | [diff] [blame] | 263 | traffic_wds -= dd->ipath_traffic_wds; |
| 264 | dd->ipath_traffic_wds += traffic_wds; |
| 265 | if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD) |
Michael Albaugh | aecd3b5 | 2007-05-17 07:26:28 -0700 | [diff] [blame] | 266 | atomic_add(5, &dd->ipath_active_time); /* S/B #define */ |
Michael Albaugh | aecd3b5 | 2007-05-17 07:26:28 -0700 | [diff] [blame] | 267 | spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); |
| 268 | |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 269 | if (dd->ipath_flags & IPATH_32BITCOUNTERS) { |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 270 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); |
| 271 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); |
| 272 | } |
| 273 | |
| 274 | ipath_qcheck(dd); |
| 275 | |
| 276 | /* |
| 277 | * deal with repeat error suppression. Doesn't really matter if |
| 278 | * last error was almost a full interval ago, or just a few usecs |
| 279 | * ago; still won't get more than 2 per interval. We may want |
| 280 | * longer intervals for this eventually, could do with mod, counter |
| 281 | * or separate timer. Also see code in ipath_handle_errors() and |
| 282 | * ipath_handle_hwerrors(). |
| 283 | */ |
| 284 | |
| 285 | if (dd->ipath_lasterror) |
| 286 | dd->ipath_lasterror = 0; |
| 287 | if (dd->ipath_lasthwerror) |
| 288 | dd->ipath_lasthwerror = 0; |
Dave Olson | 78d1e02 | 2007-07-20 14:41:26 -0700 | [diff] [blame] | 289 | if (dd->ipath_maskederrs |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 290 | && time_after(jiffies, dd->ipath_unmasktime)) { |
| 291 | char ebuf[256]; |
Bryan O'Sullivan | 8ec1077 | 2007-03-15 14:44:55 -0700 | [diff] [blame] | 292 | int iserr; |
| 293 | iserr = ipath_decode_err(ebuf, sizeof ebuf, |
Dave Olson | 78d1e02 | 2007-07-20 14:41:26 -0700 | [diff] [blame] | 294 | dd->ipath_maskederrs); |
| 295 | if (dd->ipath_maskederrs & |
Bryan O'Sullivan | 8ec1077 | 2007-03-15 14:44:55 -0700 | [diff] [blame] | 296 | ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | |
| 297 | INFINIPATH_E_PKTERRS )) |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 298 | ipath_dev_err(dd, "Re-enabling masked errors " |
| 299 | "(%s)\n", ebuf); |
| 300 | else { |
| 301 | /* |
| 302 | * rcvegrfull and rcvhdrqfull are "normal", for some |
| 303 | * types of processes (mostly benchmarks) that send |
| 304 | * huge numbers of messages, while not processing |
| 305 | * them. So only complain about these at debug |
| 306 | * level. |
| 307 | */ |
Bryan O'Sullivan | 8ec1077 | 2007-03-15 14:44:55 -0700 | [diff] [blame] | 308 | if (iserr) |
| 309 | ipath_dbg("Re-enabling queue full errors (%s)\n", |
| 310 | ebuf); |
| 311 | else |
| 312 | ipath_cdbg(ERRPKT, "Re-enabling packet" |
| 313 | " problem interrupt (%s)\n", ebuf); |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 314 | } |
Dave Olson | 78d1e02 | 2007-07-20 14:41:26 -0700 | [diff] [blame] | 315 | |
| 316 | /* re-enable masked errors */ |
| 317 | dd->ipath_errormask |= dd->ipath_maskederrs; |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 318 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, |
Dave Olson | 78d1e02 | 2007-07-20 14:41:26 -0700 | [diff] [blame] | 319 | dd->ipath_errormask); |
| 320 | dd->ipath_maskederrs = 0; |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 321 | } |
| 322 | |
| 323 | /* limit qfull messages to ~one per minute per port */ |
| 324 | if ((++cnt & 0x10)) { |
| 325 | for (val = dd->ipath_cfgports - 1; ((int)val) >= 0; |
| 326 | val--) { |
| 327 | if (dd->ipath_lastegrheads[val] != -1) |
| 328 | dd->ipath_lastegrheads[val] = -1; |
| 329 | if (dd->ipath_lastrcvhdrqtails[val] != -1) |
| 330 | dd->ipath_lastrcvhdrqtails[val] = -1; |
| 331 | } |
| 332 | } |
| 333 | |
Dave Olson | 78d1e02 | 2007-07-20 14:41:26 -0700 | [diff] [blame] | 334 | ipath_chk_errormask(dd); |
Bryan O'Sullivan | 108ecf0 | 2006-03-29 15:23:29 -0800 | [diff] [blame] | 335 | done: |
| 336 | mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); |
| 337 | } |