blob: 3ed82b337631ece539c13845af0fd5136a5930e8 [file] [log] [blame]
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +00001//===-- xray_arm.cc ---------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of XRay, a dynamic runtime instrumentation system.
11//
12// Implementation of ARM-specific routines (32-bit).
13//
14//===----------------------------------------------------------------------===//
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000015#include "sanitizer_common/sanitizer_common.h"
Diana Picus87d025f2016-11-16 09:32:23 +000016#include "xray_defs.h"
Diana Picus6b88e322016-12-22 07:35:56 +000017#include "xray_emulate_tsc.h"
Dean Michael Berris4ef1a692016-10-06 07:09:40 +000018#include "xray_interface_internal.h"
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000019#include <atomic>
20#include <cassert>
21
Serge Rogatch9bce1e72017-01-19 20:27:11 +000022extern "C" void __clear_cache(void* start, void* end);
23
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000024namespace __xray {
25
Diana Picus6b88e322016-12-22 07:35:56 +000026uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
27 // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
28 // not have a constant frequency like TSC on x86[_64]; it may go faster or
29 // slower depending on CPU's turbo or power saving modes. Furthermore, to
30 // read from CP15 on ARM a kernel modification or a driver is needed.
31 // We can not require this from users of compiler-rt.
32 // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
33 // To get the measurements per second, we scale this by the number of
34 // nanoseconds per second, pretending that the TSC frequency is 1GHz and
35 // one TSC tick is 1 nanosecond.
36 return NanosecondsPerSecond;
37}
38
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000039// The machine codes for some instructions used in runtime patching.
Dean Michael Berris4ef1a692016-10-06 07:09:40 +000040enum class PatchOpcodes : uint32_t {
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000041 PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
Dean Michael Berris4ef1a692016-10-06 07:09:40 +000042 PO_BlxIp = 0xE12FFF3C, // BLX ip
43 PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr}
44 PO_B20 = 0xEA000005 // B #20
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000045};
46
47// 0xUUUUWXYZ -> 0x000W0XYZ
Dean Michael Berris4031e4b2016-11-16 01:01:13 +000048inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000049 return (Value & 0xfff) | ((Value & 0xf000) << 4);
50}
51
52// 0xWXYZUUUU -> 0x000W0XYZ
Dean Michael Berris4031e4b2016-11-16 01:01:13 +000053inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000054 return getMovwMask(Value >> 16);
55}
56
57// Writes the following instructions:
58// MOVW R<regNo>, #<lower 16 bits of the |Value|>
59// MOVT R<regNo>, #<higher 16 bits of the |Value|>
Dean Michael Berris4031e4b2016-11-16 01:01:13 +000060inline static uint32_t *
61write32bitLoadReg(uint8_t regNo, uint32_t *Address,
62 const uint32_t Value) XRAY_NEVER_INSTRUMENT {
Dean Michael Berris4ef1a692016-10-06 07:09:40 +000063 // This is a fatal error: we cannot just report it and continue execution.
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000064 assert(regNo <= 15 && "Register number must be 0 to 15.");
65 // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ
Dean Michael Berris4ef1a692016-10-06 07:09:40 +000066 *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value));
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000067 Address++;
68 // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ
Dean Michael Berris4ef1a692016-10-06 07:09:40 +000069 *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value));
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000070 return Address + 1;
71}
72
73// Writes the following instructions:
74// MOVW r0, #<lower 16 bits of the |Value|>
75// MOVT r0, #<higher 16 bits of the |Value|>
Dean Michael Berris4031e4b2016-11-16 01:01:13 +000076inline static uint32_t *
Dean Michael Berrisea9042c2017-02-07 23:35:34 +000077write32bitLoadR0(uint32_t *Address,
Dean Michael Berris4031e4b2016-11-16 01:01:13 +000078 const uint32_t Value) XRAY_NEVER_INSTRUMENT {
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000079 return write32bitLoadReg(0, Address, Value);
80}
81
82// Writes the following instructions:
83// MOVW ip, #<lower 16 bits of the |Value|>
84// MOVT ip, #<higher 16 bits of the |Value|>
Dean Michael Berris4031e4b2016-11-16 01:01:13 +000085inline static uint32_t *
Dean Michael Berrisea9042c2017-02-07 23:35:34 +000086write32bitLoadIP(uint32_t *Address,
Dean Michael Berris4031e4b2016-11-16 01:01:13 +000087 const uint32_t Value) XRAY_NEVER_INSTRUMENT {
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000088 return write32bitLoadReg(12, Address, Value);
89}
90
91inline static bool patchSled(const bool Enable, const uint32_t FuncId,
Dean Michael Berris4031e4b2016-11-16 01:01:13 +000092 const XRaySledEntry &Sled,
93 void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +000094 // When |Enable| == true,
95 // We replace the following compile-time stub (sled):
96 //
97 // xray_sled_n:
98 // B #20
99 // 6 NOPs (24 bytes)
100 //
101 // With the following runtime patch:
102 //
103 // xray_sled_n:
104 // PUSH {r0, lr}
105 // MOVW r0, #<lower 16 bits of function ID>
106 // MOVT r0, #<higher 16 bits of function ID>
107 // MOVW ip, #<lower 16 bits of address of TracingHook>
108 // MOVT ip, #<higher 16 bits of address of TracingHook>
109 // BLX ip
110 // POP {r0, lr}
111 //
112 // Replacement of the first 4-byte instruction should be the last and atomic
113 // operation, so that the user code which reaches the sled concurrently
114 // either jumps over the whole sled, or executes the whole sled when the
115 // latter is ready.
116 //
117 // When |Enable|==false, we set back the first instruction in the sled to be
118 // B #20
119
120 uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
Serge Rogatch9bce1e72017-01-19 20:27:11 +0000121 uint32_t *CurAddress = FirstAddress + 1;
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +0000122 if (Enable) {
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +0000123 CurAddress =
Dean Michael Berrisea9042c2017-02-07 23:35:34 +0000124 write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +0000125 CurAddress =
Dean Michael Berrisea9042c2017-02-07 23:35:34 +0000126 write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +0000127 *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
128 CurAddress++;
129 *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
Serge Rogatch9bce1e72017-01-19 20:27:11 +0000130 CurAddress++;
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +0000131 std::atomic_store_explicit(
132 reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
133 uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
134 } else {
135 std::atomic_store_explicit(
136 reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
137 uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
138 }
Serge Rogatch9bce1e72017-01-19 20:27:11 +0000139 __clear_cache(reinterpret_cast<char*>(FirstAddress),
140 reinterpret_cast<char*>(CurAddress));
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +0000141 return true;
142}
143
144bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
Dean Michael Berris4031e4b2016-11-16 01:01:13 +0000145 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +0000146 return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
147}
148
149bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
Dean Michael Berris4031e4b2016-11-16 01:01:13 +0000150 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +0000151 return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
152}
153
Dean Michael Berris1b09aae2016-10-13 23:56:54 +0000154bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
Dean Michael Berris4031e4b2016-11-16 01:01:13 +0000155 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
Serge Rogatchc4540b32017-01-26 16:18:13 +0000156 return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
Dean Michael Berris1b09aae2016-10-13 23:56:54 +0000157}
158
Dean Michael Berris607617b2017-02-02 07:51:21 +0000159// FIXME: Maybe implement this better?
160bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
161
Dean Michael Berrisd1617cd2016-09-20 14:35:57 +0000162} // namespace __xray