Derek Bruening | 8863985 | 2016-05-25 02:04:04 +0000 | [diff] [blame] | 1 | //===-- working_set.cpp ---------------------------------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file is a part of EfficiencySanitizer, a family of performance tuners. |
| 11 | // |
| 12 | // This file contains working-set-specific code. |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "working_set.h" |
| 16 | #include "esan.h" |
| 17 | #include "esan_flags.h" |
| 18 | #include "esan_shadow.h" |
| 19 | |
| 20 | // We shadow every cache line of app memory with one shadow byte. |
| 21 | // - The highest bit of each shadow byte indicates whether the corresponding |
| 22 | // cache line has ever been accessed. |
| 23 | // - The lowest bit of each shadow byte indicates whether the corresponding |
| 24 | // cache line was accessed since the last sample. |
| 25 | // - The other bits can be used either for a single working set snapshot |
| 26 | // between two consecutive samples, or an aggregate working set snapshot |
| 27 | // over multiple sample periods (future work). |
| 28 | // We live with races in accessing each shadow byte. |
| 29 | typedef unsigned char byte; |
| 30 | |
| 31 | namespace __esan { |
| 32 | |
| 33 | // See the shadow byte layout description above. |
| 34 | static const u32 TotalWorkingSetBitIdx = 7; |
| 35 | static const u32 CurWorkingSetBitIdx = 0; |
| 36 | static const byte ShadowAccessedVal = |
| 37 | (1 << TotalWorkingSetBitIdx) | (1 << CurWorkingSetBitIdx); |
| 38 | |
| 39 | void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size, |
| 40 | bool IsWrite) { |
| 41 | if (Size == 0) |
| 42 | return; |
| 43 | SIZE_T I = 0; |
| 44 | uptr LineSize = getFlags()->cache_line_size; |
| 45 | // As Addr+Size could overflow at the top of a 32-bit address space, |
| 46 | // we avoid the simpler formula that rounds the start and end. |
| 47 | SIZE_T NumLines = Size / LineSize + |
| 48 | // Add any extra at the start or end adding on an extra line: |
| 49 | (LineSize - 1 + Addr % LineSize + Size % LineSize) / LineSize; |
| 50 | byte *Shadow = (byte *)appToShadow(Addr); |
| 51 | // Write shadow bytes until we're word-aligned. |
| 52 | while (I < NumLines && (uptr)Shadow % 4 != 0) { |
| 53 | if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal) |
| 54 | *Shadow |= ShadowAccessedVal; |
| 55 | ++Shadow; |
| 56 | ++I; |
| 57 | } |
| 58 | // Write whole shadow words at a time. |
| 59 | // Using a word-stride loop improves the runtime of a microbenchmark of |
| 60 | // memset calls by 10%. |
| 61 | u32 WordValue = ShadowAccessedVal | ShadowAccessedVal << 8 | |
| 62 | ShadowAccessedVal << 16 | ShadowAccessedVal << 24; |
| 63 | while (I + 4 <= NumLines) { |
| 64 | if ((*(u32*)Shadow & WordValue) != WordValue) |
| 65 | *(u32*)Shadow |= WordValue; |
| 66 | Shadow += 4; |
| 67 | I += 4; |
| 68 | } |
| 69 | // Write any trailing shadow bytes. |
| 70 | while (I < NumLines) { |
| 71 | if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal) |
| 72 | *Shadow |= ShadowAccessedVal; |
| 73 | ++Shadow; |
| 74 | ++I; |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | void initializeWorkingSet() { |
| 79 | // The shadow mapping assumes 64 so this cannot be changed. |
| 80 | CHECK(getFlags()->cache_line_size == 64); |
| 81 | } |
| 82 | |
| 83 | int finalizeWorkingSet() { |
| 84 | // FIXME NYI: we need to add memory scanning to report the total lines |
| 85 | // touched, and later add sampling to get intermediate values. |
| 86 | Report("%s is not finished: nothing yet to report\n", SanitizerToolName); |
| 87 | return 0; |
| 88 | } |
| 89 | |
| 90 | } // namespace __esan |