blob: eebd07731d6ab9da8fe563c9d04657d140ff9983 [file] [log] [blame]
Chandler Carruth90358e12018-07-13 11:13:58 +00001//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10///
11/// Provide a pass which mitigates speculative execution attacks which operate
12/// by speculating incorrectly past some predicate (a type check, bounds check,
13/// or other condition) to reach a load with invalid inputs and leak the data
14/// accessed by that load using a side channel out of the speculative domain.
15///
16/// For details on the attacks, see the first variant in both the Project Zero
17/// writeup and the Spectre paper:
18/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
19/// https://spectreattack.com/spectre.pdf
20///
21//===----------------------------------------------------------------------===//
22
23#include "X86.h"
24#include "X86InstrBuilder.h"
25#include "X86InstrInfo.h"
26#include "X86Subtarget.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/ScopeExit.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/SparseBitVector.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineConstantPool.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineFunctionPass.h"
40#include "llvm/CodeGen/MachineInstr.h"
41#include "llvm/CodeGen/MachineInstrBuilder.h"
42#include "llvm/CodeGen/MachineModuleInfo.h"
43#include "llvm/CodeGen/MachineOperand.h"
44#include "llvm/CodeGen/MachineRegisterInfo.h"
45#include "llvm/CodeGen/MachineSSAUpdater.h"
46#include "llvm/CodeGen/TargetInstrInfo.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/TargetSchedule.h"
49#include "llvm/CodeGen/TargetSubtargetInfo.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/MC/MCSchedule.h"
52#include "llvm/Pass.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Debug.h"
55#include "llvm/Support/raw_ostream.h"
56#include <algorithm>
57#include <cassert>
58#include <iterator>
59#include <utility>
60
61using namespace llvm;
62
63#define PASS_KEY "x86-speculative-load-hardening"
64#define DEBUG_TYPE PASS_KEY
65
66STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
67STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
68STATISTIC(NumAddrRegsHardened,
69 "Number of address mode used registers hardaned");
70STATISTIC(NumPostLoadRegsHardened,
71 "Number of post-load register values hardened");
72STATISTIC(NumInstsInserted, "Number of instructions inserted");
73STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
74
75static cl::opt<bool> HardenEdgesWithLFENCE(
76 PASS_KEY "-lfence",
77 cl::desc(
78 "Use LFENCE along each conditional edge to harden against speculative "
79 "loads rather than conditional movs and poisoned pointers."),
80 cl::init(false), cl::Hidden);
81
82static cl::opt<bool> EnablePostLoadHardening(
83 PASS_KEY "-post-load",
84 cl::desc("Harden the value loaded *after* it is loaded by "
85 "flushing the loaded bits to 1. This is hard to do "
86 "in general but can be done easily for GPRs."),
87 cl::init(true), cl::Hidden);
88
89static cl::opt<bool> FenceCallAndRet(
90 PASS_KEY "-fence-call-and-ret",
91 cl::desc("Use a full speculation fence to harden both call and ret edges "
92 "rather than a lighter weight mitigation."),
93 cl::init(false), cl::Hidden);
94
95static cl::opt<bool> HardenInterprocedurally(
96 PASS_KEY "-ip",
97 cl::desc("Harden interprocedurally by passing our state in and out of "
98 "functions in the high bits of the stack pointer."),
99 cl::init(true), cl::Hidden);
100
101static cl::opt<bool>
102 HardenLoads(PASS_KEY "-loads",
103 cl::desc("Sanitize loads from memory. When disable, no "
104 "significant security is provided."),
105 cl::init(true), cl::Hidden);
106
107namespace llvm {
108
109void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
110
111} // end namespace llvm
112
113namespace {
114
115class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
116public:
117 X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) {
118 initializeX86SpeculativeLoadHardeningPassPass(
119 *PassRegistry::getPassRegistry());
120 }
121
122 StringRef getPassName() const override {
123 return "X86 speculative load hardening";
124 }
125 bool runOnMachineFunction(MachineFunction &MF) override;
126 void getAnalysisUsage(AnalysisUsage &AU) const override;
127
128 /// Pass identification, replacement for typeid.
129 static char ID;
130
131private:
132 /// The information about a block's conditional terminators needed to trace
133 /// our predicate state through the exiting edges.
134 struct BlockCondInfo {
135 MachineBasicBlock *MBB;
136
137 // We mostly have one conditional branch, and in extremely rare cases have
138 // two. Three and more are so rare as to be unimportant for compile time.
139 SmallVector<MachineInstr *, 2> CondBrs;
140
141 MachineInstr *UncondBr;
142 };
143
144 const X86Subtarget *Subtarget;
145 MachineRegisterInfo *MRI;
146 const X86InstrInfo *TII;
147 const TargetRegisterInfo *TRI;
148 const TargetRegisterClass *PredStateRC;
149
150 void hardenEdgesWithLFENCE(MachineFunction &MF);
151
152 SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
153
154 void checkAllLoads(MachineFunction &MF, MachineSSAUpdater &PredStateSSA);
155
156 unsigned saveEFLAGS(MachineBasicBlock &MBB,
157 MachineBasicBlock::iterator InsertPt, DebugLoc Loc);
158 void restoreEFLAGS(MachineBasicBlock &MBB,
159 MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
160 unsigned OFReg);
161
162 void mergePredStateIntoSP(MachineBasicBlock &MBB,
163 MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
164 unsigned PredStateReg);
165 unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
166 MachineBasicBlock::iterator InsertPt,
167 DebugLoc Loc);
168
169 void
170 hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
171 MachineOperand &IndexMO, MachineSSAUpdater &PredStateSSA,
172 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
173 MachineInstr *
174 sinkPostLoadHardenedInst(MachineInstr &MI,
175 SmallPtrSetImpl<MachineInstr *> &HardenedLoads);
176 void hardenPostLoad(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
177 void checkReturnInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
178 void checkCallInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
179};
180
181} // end anonymous namespace
182
183char X86SpeculativeLoadHardeningPass::ID = 0;
184
185void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
186 AnalysisUsage &AU) const {
187 MachineFunctionPass::getAnalysisUsage(AU);
188}
189
190static MachineBasicBlock &splitEdge(MachineBasicBlock &MBB,
191 MachineBasicBlock &Succ, int SuccCount,
192 MachineInstr *Br, MachineInstr *&UncondBr,
193 const X86InstrInfo &TII) {
194 assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
195
196 MachineFunction &MF = *MBB.getParent();
197
198 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
199
200 // We have to insert the new block immediately after the current one as we
201 // don't know what layout-successor relationships the successor has and we
202 // may not be able to (and generally don't want to) try to fix those up.
203 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
204
205 // Update the branch instruction if necessary.
206 if (Br) {
207 assert(Br->getOperand(0).getMBB() == &Succ &&
208 "Didn't start with the right target!");
209 Br->getOperand(0).setMBB(&NewMBB);
210
211 // If this successor was reached through a branch rather than fallthrough,
212 // we might have *broken* fallthrough and so need to inject a new
213 // unconditional branch.
214 if (!UncondBr) {
215 MachineBasicBlock &OldLayoutSucc =
216 *std::next(MachineFunction::iterator(&NewMBB));
217 assert(MBB.isSuccessor(&OldLayoutSucc) &&
218 "Without an unconditional branch, the old layout successor should "
219 "be an actual successor!");
220 auto BrBuilder =
221 BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
222 // Update the unconditional branch now that we've added one.
223 UncondBr = &*BrBuilder;
224 }
225
226 // Insert unconditional "jump Succ" instruction in the new block if
227 // necessary.
228 if (!NewMBB.isLayoutSuccessor(&Succ)) {
229 SmallVector<MachineOperand, 4> Cond;
230 TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
231 }
232 } else {
233 assert(!UncondBr &&
234 "Cannot have a branchless successor and an unconditional branch!");
235 assert(NewMBB.isLayoutSuccessor(&Succ) &&
236 "A non-branch successor must have been a layout successor before "
237 "and now is a layout successor of the new block.");
238 }
239
240 // If this is the only edge to the successor, we can just replace it in the
241 // CFG. Otherwise we need to add a new entry in the CFG for the new
242 // successor.
243 if (SuccCount == 1) {
244 MBB.replaceSuccessor(&Succ, &NewMBB);
245 } else {
246 MBB.splitSuccessor(&Succ, &NewMBB);
247 }
248
249 // Hook up the edge from the new basic block to the old successor in the CFG.
250 NewMBB.addSuccessor(&Succ);
251
252 // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
253 for (MachineInstr &MI : Succ) {
254 if (!MI.isPHI())
255 break;
256 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
257 OpIdx += 2) {
258 MachineOperand &OpV = MI.getOperand(OpIdx);
259 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
260 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
261 if (OpMBB.getMBB() != &MBB)
262 continue;
263
264 // If this is the last edge to the succesor, just replace MBB in the PHI
265 if (SuccCount == 1) {
266 OpMBB.setMBB(&NewMBB);
267 break;
268 }
269
270 // Otherwise, append a new pair of operands for the new incoming edge.
271 MI.addOperand(MF, OpV);
272 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
273 break;
274 }
275 }
276
277 // Inherit live-ins from the successor
278 for (auto &LI : Succ.liveins())
279 NewMBB.addLiveIn(LI);
280
281 LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
282 << Succ.getName() << "'.\n");
283 return NewMBB;
284}
285
286bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
287 MachineFunction &MF) {
288 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
289 << " **********\n");
290
291 Subtarget = &MF.getSubtarget<X86Subtarget>();
292 MRI = &MF.getRegInfo();
293 TII = Subtarget->getInstrInfo();
294 TRI = Subtarget->getRegisterInfo();
295 // FIXME: Support for 32-bit.
296 PredStateRC = &X86::GR64_NOSPRegClass;
297
298 if (MF.begin() == MF.end())
299 // Nothing to do for a degenerate empty function...
300 return false;
301
302 // We support an alternative hardening technique based on a debug flag.
303 if (HardenEdgesWithLFENCE) {
304 hardenEdgesWithLFENCE(MF);
305 return true;
306 }
307
308 // Create a dummy debug loc to use for all the generated code here.
309 DebugLoc Loc;
310
311 MachineBasicBlock &Entry = *MF.begin();
312 auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
313
314 // Do a quick scan to see if we have any checkable loads.
315 bool HasCheckableLoad = false;
316 for (MachineBasicBlock &MBB : MF) {
317 for (MachineInstr &MI : MBB) {
318 // Stop searching blocks at an LFENCE.
319 if (MI.getOpcode() == X86::LFENCE)
320 break;
321
322 // Looking for loads only.
323 if (!MI.mayLoad())
324 continue;
325
326 // An MFENCE is modeled as a load but doesn't require hardening.
327 if (MI.getOpcode() == X86::MFENCE)
328 continue;
329
330 HasCheckableLoad = true;
331 break;
332 }
333 if (HasCheckableLoad)
334 break;
335 }
336
337 // See if we have any conditional branching blocks that we will need to trace
338 // predicate state through.
339 SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
340
341 // If we have no interesting conditions or loads, nothing to do here.
342 if (!HasCheckableLoad && Infos.empty())
343 return true;
344
345 unsigned PredStateReg;
346 unsigned PredStateSizeInBytes = TRI->getRegSizeInBits(*PredStateRC) / 8;
347
348 // The poison value is required to be an all-ones value for many aspects of
349 // this mitigation.
350 const int PoisonVal = -1;
351 unsigned PoisonReg = MRI->createVirtualRegister(PredStateRC);
352 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PoisonReg)
353 .addImm(PoisonVal);
354 ++NumInstsInserted;
355
356 // If we have loads being hardened and we've asked for call and ret edges to
357 // get a full fence-based mitigation, inject that fence.
358 if (HasCheckableLoad && FenceCallAndRet) {
359 // We need to insert an LFENCE at the start of the function to suspend any
360 // incoming misspeculation from the caller. This helps two-fold: the caller
361 // may not have been protected as this code has been, and this code gets to
362 // not take any specific action to protect across calls.
363 // FIXME: We could skip this for functions which unconditionally return
364 // a constant.
365 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
366 ++NumInstsInserted;
367 ++NumLFENCEsInserted;
368 }
369
370 // If we have no conditionals to protect in blocks, then all we needed to do
371 // was protect the entry and so we're done.
372 if (Infos.empty())
373 // We may have changed the function's code at this point to insert fences.
374 return true;
375
376 // For every basic block in the function which can b
377 if (HardenInterprocedurally && !FenceCallAndRet) {
378 // Set up the predicate state by extracting it from the incoming stack
379 // pointer so we pick up any misspeculation in our caller.
380 PredStateReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
381 } else {
382 // Otherwise, just build the predicate state itself by zeroing a register
383 // as we don't need any initial state.
384 PredStateReg = MRI->createVirtualRegister(PredStateRC);
385 unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
386 auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
387 PredStateSubReg);
388 ++NumInstsInserted;
389 MachineOperand *ZeroEFLAGSDefOp =
390 ZeroI->findRegisterDefOperand(X86::EFLAGS);
391 assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
392 "Must have an implicit def of EFLAGS!");
393 ZeroEFLAGSDefOp->setIsDead(true);
394 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
395 PredStateReg)
396 .addImm(0)
397 .addReg(PredStateSubReg)
398 .addImm(X86::sub_32bit);
399 }
400
401 // We're going to need to trace predicate state throughout the function's
402 // CFG. Prepare for this by setting up our initial state of PHIs with unique
403 // predecessor entries and all the initial predicate state.
404
405 // FIXME: It's really frustrating that we have to do this, but SSA-form in
406 // MIR isn't what you might expect. We may have multiple entries in PHI nodes
407 // for a single predecessor. This makes CFG-updating extremely complex, so
408 // here we simplify all PHI nodes to a model even simpler than the IR's
409 // model: exactly one entry per predecessor, regardless of how many edges
410 // there are.
411 SmallPtrSet<MachineBasicBlock *, 4> Preds;
412 SmallVector<int, 4> DupIndices;
413 for (auto &MBB : MF)
414 for (auto &MI : MBB) {
415 if (!MI.isPHI())
416 break;
417
418 // First we scan the operands of the PHI looking for duplicate entries
419 // a particular predecessor. We retain the operand index of each duplicate
420 // entry found.
421 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
422 OpIdx += 2)
423 if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
424 DupIndices.push_back(OpIdx);
425
426 // Now walk the duplicate indices, removing both the block and value. Note
427 // that these are stored as a vector making this element-wise removal
428 // :w
429 // potentially quadratic.
430 //
431 // FIXME: It is really frustrating that we have to use a quadratic
432 // removal algorithm here. There should be a better way, but the use-def
433 // updates required make that impossible using the public API.
434 //
435 // Note that we have to process these backwards so that we don't
436 // invalidate other indices with each removal.
437 while (!DupIndices.empty()) {
438 int OpIdx = DupIndices.pop_back_val();
439 // Remove both the block and value operand, again in reverse order to
440 // preserve indices.
441 MI.RemoveOperand(OpIdx + 1);
442 MI.RemoveOperand(OpIdx);
443 }
444
445 Preds.clear();
446 }
447
448 // Track the updated values in an SSA updater to rewrite into SSA form at the
449 // end.
450 MachineSSAUpdater PredStateSSA(MF);
451 PredStateSSA.Initialize(PredStateReg);
452 PredStateSSA.AddAvailableValue(&Entry, PredStateReg);
453 // Collect the inserted instructions so we can rewrite their uses of the
454 // predicate state into SSA form.
455 SmallVector<MachineInstr *, 16> CMovs;
456
457 // Now walk all of the basic blocks looking for ones that end in conditional
458 // jumps where we need to update this register along each edge.
459 for (BlockCondInfo &Info : Infos) {
460 MachineBasicBlock &MBB = *Info.MBB;
461 SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
462 MachineInstr *UncondBr = Info.UncondBr;
463
464 LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
465 << "\n");
466 ++NumCondBranchesTraced;
467
468 // Compute the non-conditional successor as either the target of any
469 // unconditional branch or the layout successor.
470 MachineBasicBlock *UncondSucc =
471 UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
472 ? UncondBr->getOperand(0).getMBB()
473 : nullptr)
474 : &*std::next(MachineFunction::iterator(&MBB));
475
476 // Count how many edges there are to any given successor.
477 SmallDenseMap<MachineBasicBlock *, int> SuccCounts;
478 if (UncondSucc)
479 ++SuccCounts[UncondSucc];
480 for (auto *CondBr : CondBrs)
481 ++SuccCounts[CondBr->getOperand(0).getMBB()];
482
483 // A lambda to insert cmov instructions into a block checking all of the
484 // condition codes in a sequence.
485 auto BuildCheckingBlockForSuccAndConds =
486 [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
487 MachineInstr *Br, MachineInstr *&UncondBr,
488 ArrayRef<X86::CondCode> Conds) {
489 // First, we split the edge to insert the checking block into a safe
490 // location.
491 auto &CheckingMBB =
492 (SuccCount == 1 && Succ.pred_size() == 1)
493 ? Succ
494 : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
495
496 bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
497 if (!LiveEFLAGS)
498 CheckingMBB.addLiveIn(X86::EFLAGS);
499
500 // Now insert the cmovs to implement the checks.
501 auto InsertPt = CheckingMBB.begin();
Erich Keaneac8cb222018-07-13 14:43:20 +0000502 assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
503 "Should never have a PHI in the initial checking block as it "
504 "always has a single predecessor!");
Chandler Carruth90358e12018-07-13 11:13:58 +0000505
506 // We will wire each cmov to each other, but need to start with the
507 // incoming pred state.
508 unsigned CurStateReg = PredStateReg;
509
510 for (X86::CondCode Cond : Conds) {
511 auto CMovOp = X86::getCMovFromCond(Cond, PredStateSizeInBytes);
512
513 unsigned UpdatedStateReg = MRI->createVirtualRegister(PredStateRC);
514 auto CMovI = BuildMI(CheckingMBB, InsertPt, Loc, TII->get(CMovOp),
515 UpdatedStateReg)
516 .addReg(CurStateReg)
517 .addReg(PoisonReg);
518 // If this is the last cmov and the EFLAGS weren't originally
519 // live-in, mark them as killed.
520 if (!LiveEFLAGS && Cond == Conds.back())
521 CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
522
523 ++NumInstsInserted;
524 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
525 dbgs() << "\n");
526
527 // The first one of the cmovs will be using the top level
528 // `PredStateReg` and need to get rewritten into SSA form.
529 if (CurStateReg == PredStateReg)
530 CMovs.push_back(&*CMovI);
531
532 // The next cmov should start from this one's def.
533 CurStateReg = UpdatedStateReg;
534 }
535
536 // And put the last one into the available values for PredStateSSA.
537 PredStateSSA.AddAvailableValue(&CheckingMBB, CurStateReg);
538 };
539
540 std::vector<X86::CondCode> UncondCodeSeq;
541 for (auto *CondBr : CondBrs) {
542 MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
543 int &SuccCount = SuccCounts[&Succ];
544
545 X86::CondCode Cond = X86::getCondFromBranchOpc(CondBr->getOpcode());
546 X86::CondCode InvCond = X86::GetOppositeBranchCondition(Cond);
547 UncondCodeSeq.push_back(Cond);
548
549 BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
550 {InvCond});
551
552 // Decrement the successor count now that we've split one of the edges.
553 // We need to keep the count of edges to the successor accurate in order
554 // to know above when to *replace* the successor in the CFG vs. just
555 // adding the new successor.
556 --SuccCount;
557 }
558
559 // Since we may have split edges and changed the number of successors,
560 // normalize the probabilities. This avoids doing it each time we split an
561 // edge.
562 MBB.normalizeSuccProbs();
563
564 // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
565 // need to intersect the other condition codes. We can do this by just
566 // doing a cmov for each one.
567 if (!UncondSucc)
568 // If we have no fallthrough to protect (perhaps it is an indirect jump?)
569 // just skip this and continue.
570 continue;
571
572 assert(SuccCounts[UncondSucc] == 1 &&
573 "We should never have more than one edge to the unconditional "
574 "successor at this point because every other edge must have been "
575 "split above!");
576
577 // Sort and unique the codes to minimize them.
578 llvm::sort(UncondCodeSeq.begin(), UncondCodeSeq.end());
579 UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
580 UncondCodeSeq.end());
581
582 // Build a checking version of the successor.
583 BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
584 UncondBr, UncondBr, UncondCodeSeq);
585 }
586
587 // We may also enter basic blocks in this function via exception handling
588 // control flow. Here, if we are hardening interprocedurally, we need to
589 // re-capture the predicate state from the throwing code. In the Itanium ABI,
590 // the throw will always look like a call to __cxa_throw and will have the
591 // predicate state in the stack pointer, so extract fresh predicate state from
592 // the stack pointer and make it available in SSA.
593 // FIXME: Handle non-itanium ABI EH models.
594 if (HardenInterprocedurally) {
595 for (MachineBasicBlock &MBB : MF) {
596 assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
597 assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
598 assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
599 if (!MBB.isEHPad())
600 continue;
601 PredStateSSA.AddAvailableValue(
602 &MBB,
603 extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
604 }
605 }
606
607 // Now check all of the loads using the predicate state.
608 checkAllLoads(MF, PredStateSSA);
609
610 // Now rewrite all the uses of the pred state using the SSA updater so that
611 // we track updates through the CFG.
612 for (MachineInstr *CMovI : CMovs)
613 for (MachineOperand &Op : CMovI->operands()) {
614 if (!Op.isReg() || Op.getReg() != PredStateReg)
615 continue;
616
617 PredStateSSA.RewriteUse(Op);
618 }
619
620 // If we are hardening interprocedurally, find each returning block and
621 // protect the caller from being returned to through misspeculation.
622 if (HardenInterprocedurally)
623 for (MachineBasicBlock &MBB : MF) {
624 if (MBB.empty())
625 continue;
626
627 MachineInstr &MI = MBB.back();
628 if (!MI.isReturn())
629 continue;
630
631 checkReturnInstr(MI, PredStateSSA);
632 }
633
634 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
635 dbgs() << "\n"; MF.verify(this));
636 return true;
637}
638
639/// Implements the naive hardening approach of putting an LFENCE after every
640/// potentially mis-predicted control flow construct.
641///
642/// We include this as an alternative mostly for the purpose of comparison. The
643/// performance impact of this is expected to be extremely severe and not
644/// practical for any real-world users.
645void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
646 MachineFunction &MF) {
647 // First, we scan the function looking for blocks that are reached along edges
648 // that we might want to harden.
649 SmallSetVector<MachineBasicBlock *, 8> Blocks;
650 for (MachineBasicBlock &MBB : MF) {
651 // If there are no or only one successor, nothing to do here.
652 if (MBB.succ_size() <= 1)
653 continue;
654
655 // Skip blocks unless their terminators start with a branch. Other
656 // terminators don't seem interesting for guarding against misspeculation.
657 auto TermIt = MBB.getFirstTerminator();
658 if (TermIt == MBB.end() || !TermIt->isBranch())
659 continue;
660
661 // Add all the non-EH-pad succossors to the blocks we want to harden. We
662 // skip EH pads because there isn't really a condition of interest on
663 // entering.
664 for (MachineBasicBlock *SuccMBB : MBB.successors())
665 if (!SuccMBB->isEHPad())
666 Blocks.insert(SuccMBB);
667 }
668
669 for (MachineBasicBlock *MBB : Blocks) {
670 auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
671 BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
672 ++NumInstsInserted;
673 ++NumLFENCEsInserted;
674 }
675}
676
677SmallVector<X86SpeculativeLoadHardeningPass::BlockCondInfo, 16>
678X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
679 SmallVector<BlockCondInfo, 16> Infos;
680
681 // Walk the function and build up a summary for each block's conditions that
682 // we need to trace through.
683 for (MachineBasicBlock &MBB : MF) {
684 // If there are no or only one successor, nothing to do here.
685 if (MBB.succ_size() <= 1)
686 continue;
687
688 // We want to reliably handle any conditional branch terminators in the
689 // MBB, so we manually analyze the branch. We can handle all of the
690 // permutations here, including ones that analyze branch cannot.
691 //
692 // The approach is to walk backwards across the terminators, resetting at
693 // any unconditional non-indirect branch, and track all conditional edges
694 // to basic blocks as well as the fallthrough or unconditional successor
695 // edge. For each conditional edge, we track the target and the opposite
696 // condition code in order to inject a "no-op" cmov into that successor
697 // that will harden the predicate. For the fallthrough/unconditional
698 // edge, we inject a separate cmov for each conditional branch with
699 // matching condition codes. This effectively implements an "and" of the
700 // condition flags, even if there isn't a single condition flag that would
701 // directly implement that. We don't bother trying to optimize either of
702 // these cases because if such an optimization is possible, LLVM should
703 // have optimized the conditional *branches* in that way already to reduce
704 // instruction count. This late, we simply assume the minimal number of
705 // branch instructions is being emitted and use that to guide our cmov
706 // insertion.
707
708 BlockCondInfo Info = {&MBB, {}, nullptr};
709
710 // Now walk backwards through the terminators and build up successors they
711 // reach and the conditions.
712 for (MachineInstr &MI : llvm::reverse(MBB)) {
713 // Once we've handled all the terminators, we're done.
714 if (!MI.isTerminator())
715 break;
716
717 // If we see a non-branch terminator, we can't handle anything so bail.
718 if (!MI.isBranch()) {
719 Info.CondBrs.clear();
720 break;
721 }
722
723 // If we see an unconditional branch, reset our state, clear any
724 // fallthrough, and set this is the "else" successor.
725 if (MI.getOpcode() == X86::JMP_1) {
726 Info.CondBrs.clear();
727 Info.UncondBr = &MI;
728 continue;
729 }
730
731 // If we get an invalid condition, we have an indirect branch or some
732 // other unanalyzable "fallthrough" case. We model this as a nullptr for
733 // the destination so we can still guard any conditional successors.
734 // Consider code sequences like:
735 // ```
736 // jCC L1
737 // jmpq *%rax
738 // ```
739 // We still want to harden the edge to `L1`.
740 if (X86::getCondFromBranchOpc(MI.getOpcode()) == X86::COND_INVALID) {
741 Info.CondBrs.clear();
742 Info.UncondBr = &MI;
743 continue;
744 }
745
746 // We have a vanilla conditional branch, add it to our list.
747 Info.CondBrs.push_back(&MI);
748 }
749 if (Info.CondBrs.empty()) {
750 ++NumBranchesUntraced;
751 LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
752 MBB.dump());
753 continue;
754 }
755
756 Infos.push_back(Info);
757 }
758
759 return Infos;
760}
761
762/// Returns true if the instruction has no behavior (specified or otherwise)
763/// that is based on the value of any of its register operands
764///
765/// A classical example of something that is inherently not data invariant is an
766/// indirect jump -- the destination is loaded into icache based on the bits set
767/// in the jump destination register.
768///
769/// FIXME: This should become part of our instruction tables.
770static bool isDataInvariant(MachineInstr &MI) {
771 switch (MI.getOpcode()) {
772 default:
773 // By default, assume that the instruction is not data invariant.
774 return false;
775
776 // FIXME: For now, we just use a very boring, conservative set of unary
777 // instructions because we're mostly interested in handling simple
778 // transformations.
779 case TargetOpcode::COPY:
780 return true;
781 }
782}
783
784/// Returns true if the instruction has no behavior (specified or otherwise)
785/// that is based on the value loaded from memory or the value of any
786/// non-address register operands.
787///
788/// For example, if the latency of the instruction is dependent on the
789/// particular bits set in any of the registers *or* any of the bits loaded from
790/// memory.
791///
792/// A classical example of something that is inherently not data invariant is an
793/// indirect jump -- the destination is loaded into icache based on the bits set
794/// in the jump destination register.
795///
796/// FIXME: This should become part of our instruction tables.
797static bool isDataInvariantLoad(MachineInstr &MI) {
798 switch (MI.getOpcode()) {
799 default:
800 // By default, assume that the load will immediately leak.
801 return false;
802
803 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
804 // However, they set flags and are perhaps the most surprisingly constant
805 // time operations so we call them out here separately.
806 case X86::IMUL16rm:
807 case X86::IMUL16rmi8:
808 case X86::IMUL16rmi:
809 case X86::IMUL32rm:
810 case X86::IMUL32rmi8:
811 case X86::IMUL32rmi:
812 case X86::IMUL64rm:
813 case X86::IMUL64rmi32:
814 case X86::IMUL64rmi8:
815
816 // Bitfield and bit scanning instructions that are somewhat surprisingly
817 // constant time as they scan across bits and do other fairly complex
818 // operations like popcnt, but are believed to be constant time on x86.
819 // However, these set flags.
820 case X86::BLCFILL32rm:
821 case X86::BLCFILL64rm:
822 case X86::BLCI32rm:
823 case X86::BLCI64rm:
824 case X86::BLCIC32rm:
825 case X86::BLCIC64rm:
826 case X86::BLCMSK32rm:
827 case X86::BLCMSK64rm:
828 case X86::BLCS32rm:
829 case X86::BLCS64rm:
830 case X86::BLSFILL32rm:
831 case X86::BLSFILL64rm:
832 case X86::BLSI32rm:
833 case X86::BLSI64rm:
834 case X86::BLSIC32rm:
835 case X86::BLSIC64rm:
836 case X86::BLSMSK32rm:
837 case X86::BLSMSK64rm:
838 case X86::BLSR32rm:
839 case X86::BLSR64rm:
840 case X86::BZHI32rm:
841 case X86::BZHI64rm:
842 case X86::LZCNT16rm:
843 case X86::LZCNT32rm:
844 case X86::LZCNT64rm:
845 case X86::POPCNT16rm:
846 case X86::POPCNT32rm:
847 case X86::POPCNT64rm:
848 case X86::TZCNT16rm:
849 case X86::TZCNT32rm:
850 case X86::TZCNT64rm:
851 case X86::TZMSK32rm:
852 case X86::TZMSK64rm:
853
854 // Basic arithmetic is constant time on the input but does set flags.
855 case X86::ADC8rm:
856 case X86::ADC16rm:
857 case X86::ADC32rm:
858 case X86::ADC64rm:
859 case X86::ADCX32rm:
860 case X86::ADCX64rm:
861 case X86::ADD8rm:
862 case X86::ADD16rm:
863 case X86::ADD32rm:
864 case X86::ADD64rm:
865 case X86::ADOX32rm:
866 case X86::ADOX64rm:
867 case X86::AND8rm:
868 case X86::AND16rm:
869 case X86::AND32rm:
870 case X86::AND64rm:
871 case X86::ANDN32rm:
872 case X86::ANDN64rm:
873 case X86::BSF16rm:
874 case X86::BSF32rm:
875 case X86::BSF64rm:
876 case X86::BSR16rm:
877 case X86::BSR32rm:
878 case X86::BSR64rm:
879 case X86::OR8rm:
880 case X86::OR16rm:
881 case X86::OR32rm:
882 case X86::OR64rm:
883 case X86::SBB8rm:
884 case X86::SBB16rm:
885 case X86::SBB32rm:
886 case X86::SBB64rm:
887 case X86::SUB8rm:
888 case X86::SUB16rm:
889 case X86::SUB32rm:
890 case X86::SUB64rm:
891 case X86::XOR8rm:
892 case X86::XOR16rm:
893 case X86::XOR32rm:
894 case X86::XOR64rm:
895 case X86::BEXTR32rm:
896 case X86::BEXTR64rm:
897 case X86::BEXTRI32mi:
898 case X86::BEXTRI64mi:
899 // Check whether the EFLAGS implicit-def is dead. We assume that this will
900 // always find the implicit-def because this code should only be reached
901 // for instructions that do in fact implicitly def this.
902 if (!MI.findRegisterDefOperand(X86::EFLAGS)->isDead()) {
903 // If we would clobber EFLAGS that are used, just bail for now.
904 LLVM_DEBUG(dbgs() << " Unable to harden post-load due to EFLAGS: ";
905 MI.dump(); dbgs() << "\n");
906 return false;
907 }
908
909 // Otherwise, fallthrough to handle these the same as instructions that
910 // don't set EFLAGS.
911 LLVM_FALLTHROUGH;
912
913 // Integer multiply w/o affecting flags is still believed to be constant
914 // time on x86. Called out separately as this is among the most surprising
915 // instructions to exhibit that behavior.
916 case X86::MULX32rm:
917 case X86::MULX64rm:
918
919 // Arithmetic instructions that are both constant time and don't set flags.
920 case X86::PDEP32rm:
921 case X86::PDEP64rm:
922 case X86::PEXT32rm:
923 case X86::PEXT64rm:
924 case X86::RORX32mi:
925 case X86::RORX64mi:
926 case X86::SARX32rm:
927 case X86::SARX64rm:
928 case X86::SHLX32rm:
929 case X86::SHLX64rm:
930 case X86::SHRX32rm:
931 case X86::SHRX64rm:
932
933 // Conversions are believed to be constant time and don't set flags.
934 // FIXME: Add AVX versions.
935 case X86::CVTSD2SI64rm_Int:
936 case X86::CVTSD2SIrm_Int:
937 case X86::CVTSS2SI64rm_Int:
938 case X86::CVTSS2SIrm_Int:
939 case X86::CVTTSD2SI64rm:
940 case X86::CVTTSD2SI64rm_Int:
941 case X86::CVTTSD2SIrm:
942 case X86::CVTTSD2SIrm_Int:
943 case X86::CVTTSS2SI64rm:
944 case X86::CVTTSS2SI64rm_Int:
945 case X86::CVTTSS2SIrm:
946 case X86::CVTTSS2SIrm_Int:
947
948 // Loads to register don't set flags.
949 case X86::MOV8rm:
950 case X86::MOV8rm_NOREX:
951 case X86::MOV16rm:
952 case X86::MOV32rm:
953 case X86::MOV64rm:
954 case X86::MOVSX16rm8:
955 case X86::MOVSX32rm16:
956 case X86::MOVSX32rm8:
957 case X86::MOVSX32rm8_NOREX:
958 case X86::MOVSX64rm16:
959 case X86::MOVSX64rm32:
960 case X86::MOVSX64rm8:
961 case X86::MOVZX16rm8:
962 case X86::MOVZX32rm16:
963 case X86::MOVZX32rm8:
964 case X86::MOVZX32rm8_NOREX:
965 case X86::MOVZX64rm16:
966 case X86::MOVZX64rm8:
967 return true;
968 }
969}
970
971static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
972 const TargetRegisterInfo &TRI) {
973 // Check if EFLAGS are alive by seeing if there is a def of them or they
974 // live-in, and then seeing if that def is in turn used.
975 for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) {
976 if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
977 // If the def is dead, then EFLAGS is not live.
978 if (DefOp->isDead())
979 return false;
980
981 // Otherwise we've def'ed it, and it is live.
982 return true;
983 }
984 // While at this instruction, also check if we use and kill EFLAGS
985 // which means it isn't live.
986 if (MI.killsRegister(X86::EFLAGS, &TRI))
987 return false;
988 }
989
990 // If we didn't find anything conclusive (neither definitely alive or
991 // definitely dead) return whether it lives into the block.
992 return MBB.isLiveIn(X86::EFLAGS);
993}
994
995void X86SpeculativeLoadHardeningPass::checkAllLoads(
996 MachineFunction &MF, MachineSSAUpdater &PredStateSSA) {
997 // If the actual checking of loads is disabled, skip doing anything here.
998 if (!HardenLoads)
999 return;
1000
1001 SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1002 SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1003
1004 SmallSet<unsigned, 16> HardenedAddrRegs;
1005
1006 SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
1007
1008 // Track the set of load-dependent registers through the basic block. Because
1009 // the values of these registers have an existing data dependency on a loaded
1010 // value which we would have checked, we can omit any checks on them.
1011 SparseBitVector<> LoadDepRegs;
1012
1013 for (MachineBasicBlock &MBB : MF) {
1014 // We harden the loads of a basic block in several passes:
1015 //
1016 // 1) Collect all the loads which can have their loaded value hardened
1017 // and all the loads that instead need their address hardened. During
1018 // this walk we propagate load dependence for address hardened loads and
1019 // also look for LFENCE to stop hardening wherever possible. When
1020 // deciding whether or not to harden the loaded value or not, we check
1021 // to see if any registers used in the address will have been hardened
1022 // at this point and if so, harden any remaining address registers as
1023 // that often successfully re-uses hardened addresses and minimizes
1024 // instructions. FIXME: We should consider an aggressive mode where we
1025 // continue to keep as many loads value hardened even when some address
1026 // register hardening would be free (due to reuse).
1027 for (MachineInstr &MI : MBB) {
1028 // We naively assume that all def'ed registers of an instruction have
1029 // a data dependency on all of their operands.
1030 // FIXME: Do a more careful analysis of x86 to build a conservative model
1031 // here.
1032 if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1033 return Op.isReg() && LoadDepRegs.test(Op.getReg());
1034 }))
1035 for (MachineOperand &Def : MI.defs())
1036 if (Def.isReg())
1037 LoadDepRegs.set(Def.getReg());
1038
1039 // Both Intel and AMD are guiding that they will change the semantics of
1040 // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1041 // no more need to guard things in this block.
1042 if (MI.getOpcode() == X86::LFENCE)
1043 break;
1044
1045 // If this instruction cannot load, nothing to do.
1046 if (!MI.mayLoad())
1047 continue;
1048
1049 // Some instructions which "load" are trivially safe or unimportant.
1050 if (MI.getOpcode() == X86::MFENCE)
1051 continue;
1052
1053 // Extract the memory operand information about this instruction.
1054 // FIXME: This doesn't handle loading pseudo instructions which we often
1055 // could handle with similarly generic logic. We probably need to add an
1056 // MI-layer routine similar to the MC-layer one we use here which maps
1057 // pseudos much like this maps real instructions.
1058 const MCInstrDesc &Desc = MI.getDesc();
1059 int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1060 if (MemRefBeginIdx < 0) {
1061 LLVM_DEBUG(dbgs() << "WARNING: unable to harden loading instruction: ";
1062 MI.dump());
1063 continue;
1064 }
1065
1066 MemRefBeginIdx += X86II::getOperandBias(Desc);
1067
1068 MachineOperand &BaseMO = MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1069 MachineOperand &IndexMO =
1070 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1071
1072 // If we have at least one (non-frame-index, non-RIP) register operand,
1073 // and neither operand is load-dependent, we need to check the load.
1074 unsigned BaseReg = 0, IndexReg = 0;
1075 if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1076 BaseMO.getReg() != X86::NoRegister)
1077 BaseReg = BaseMO.getReg();
1078 if (IndexMO.getReg() != X86::NoRegister)
1079 IndexReg = IndexMO.getReg();
1080
1081 if (!BaseReg && !IndexReg)
1082 // No register operands!
1083 continue;
1084
1085 // If any register operand is dependent, this load is dependent and we
1086 // needn't check it.
1087 // FIXME: Is this true in the case where we are hardening loads after
1088 // they complete? Unclear, need to investigate.
1089 if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
1090 (IndexReg && LoadDepRegs.test(IndexReg)))
1091 continue;
1092
1093 // If post-load hardening is enabled, this load is known to be
1094 // data-invariant, and we aren't already going to harden one of the
1095 // address registers, queue it up to be hardened post-load. Notably, even
1096 // once hardened this won't introduce a useful dependency that could prune
1097 // out subsequent loads.
1098 if (EnablePostLoadHardening && isDataInvariantLoad(MI) &&
1099 !HardenedAddrRegs.count(BaseReg) &&
1100 !HardenedAddrRegs.count(IndexReg)) {
1101 HardenPostLoad.insert(&MI);
1102 HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1103 continue;
1104 }
1105
1106 // Record this instruction for address hardening and record its register
1107 // operands as being address-hardened.
1108 HardenLoadAddr.insert(&MI);
1109 if (BaseReg)
1110 HardenedAddrRegs.insert(BaseReg);
1111 if (IndexReg)
1112 HardenedAddrRegs.insert(IndexReg);
1113
1114 for (MachineOperand &Def : MI.defs())
1115 if (Def.isReg())
1116 LoadDepRegs.set(Def.getReg());
1117 }
1118
1119 // Now re-walk the instructions in the basic block, and apply whichever
1120 // hardening strategy we have elected. Note that we do this in a second
1121 // pass specifically so that we have the complete set of instructions for
1122 // which we will do post-load hardening and can defer it in certain
1123 // circumstances.
1124 //
1125 // FIXME: This could probably be made even more effective by doing it
1126 // across the entire function. Rather than just walking the flat list
1127 // backwards here, we could walk the function in PO and each block bottom
1128 // up, allowing us to in some cases sink hardening across block blocks. As
1129 // long as the in-block predicate state is used at the eventual hardening
1130 // site, this remains safe.
1131 for (MachineInstr &MI : MBB) {
1132 // We cannot both require hardening the def of a load and its address.
1133 assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1134 "Requested to harden both the address and def of a load!");
1135
1136 // Check if this is a load whose address needs to be hardened.
1137 if (HardenLoadAddr.erase(&MI)) {
1138 const MCInstrDesc &Desc = MI.getDesc();
1139 int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1140 assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1141
1142 MemRefBeginIdx += X86II::getOperandBias(Desc);
1143
1144 MachineOperand &BaseMO =
1145 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1146 MachineOperand &IndexMO =
1147 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1148 hardenLoadAddr(MI, BaseMO, IndexMO, PredStateSSA, AddrRegToHardenedReg);
1149 continue;
1150 }
1151
1152 // Test if this instruction is one of our post load instructions (and
1153 // remove it from the set if so).
1154 if (HardenPostLoad.erase(&MI)) {
1155 assert(!MI.isCall() && "Must not try to post-load harden a call!");
1156
1157 // If this is a data-invariant load, we want to try and sink any
1158 // hardening as far as possible.
1159 if (isDataInvariantLoad(MI)) {
1160 // Sink the instruction we'll need to harden as far as we can down the
1161 // graph.
1162 MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1163
1164 // If we managed to sink this instruction, update everything so we
1165 // harden that instruction when we reach it in the instruction
1166 // sequence.
1167 if (SunkMI != &MI) {
1168 // If in sinking there was no instruction needing to be hardened,
1169 // we're done.
1170 if (!SunkMI)
1171 continue;
1172
1173 // Otherwise, add this to the set of defs we harden.
1174 HardenPostLoad.insert(SunkMI);
1175 continue;
1176 }
1177 }
1178
1179 // The register def'ed by this instruction is trivially hardened so map
1180 // it to itself.
1181 AddrRegToHardenedReg[MI.getOperand(0).getReg()] =
1182 MI.getOperand(0).getReg();
1183
1184 hardenPostLoad(MI, PredStateSSA);
1185 continue;
1186 }
1187
1188 // After we finish processing the instruction and doing any hardening
1189 // necessary for it, we need to handle transferring the predicate state
1190 // into a call and recovering it after the call returns (if it returns).
1191 if (!MI.isCall())
1192 continue;
1193
1194 // If we're not hardening interprocedurally, we can just skip calls.
1195 if (!HardenInterprocedurally)
1196 continue;
1197
1198 auto InsertPt = MI.getIterator();
1199 DebugLoc Loc = MI.getDebugLoc();
1200
1201 // First, we transfer the predicate state into the called function by
1202 // merging it into the stack pointer. This will kill the current def of
1203 // the state.
1204 unsigned StateReg = PredStateSSA.GetValueAtEndOfBlock(&MBB);
1205 mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
1206
1207 // If this call is also a return (because it is a tail call) we're done.
1208 if (MI.isReturn())
1209 continue;
1210
1211 // Otherwise we need to step past the call and recover the predicate
1212 // state from SP after the return, and make this new state available.
1213 ++InsertPt;
1214 unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
1215 PredStateSSA.AddAvailableValue(&MBB, NewStateReg);
1216 }
1217
1218 HardenPostLoad.clear();
1219 HardenLoadAddr.clear();
1220 HardenedAddrRegs.clear();
1221 AddrRegToHardenedReg.clear();
1222
1223 // Currently, we only track data-dependent loads within a basic block.
1224 // FIXME: We should see if this is necessary or if we could be more
1225 // aggressive here without opening up attack avenues.
1226 LoadDepRegs.clear();
1227 }
1228}
1229
1230/// Save EFLAGS into the returned GPR. This can in turn be restored with
1231/// `restoreEFLAGS`.
1232///
1233/// Note that LLVM can only lower very simple patterns of saved and restored
1234/// EFLAGS registers. The restore should always be within the same basic block
1235/// as the save so that no PHI nodes are inserted.
1236unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
1237 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1238 DebugLoc Loc) {
1239 // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1240 // what instruction selection does.
1241 unsigned Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1242 // We directly copy the FLAGS register and rely on later lowering to clean
1243 // this up into the appropriate setCC instructions.
1244 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1245 ++NumInstsInserted;
1246 return Reg;
1247}
1248
1249/// Restore EFLAGS from the provided GPR. This should be produced by
1250/// `saveEFLAGS`.
1251///
1252/// This must be done within the same basic block as the save in order to
1253/// reliably lower.
1254void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1255 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
1256 unsigned Reg) {
1257 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1258 ++NumInstsInserted;
1259}
1260
1261/// Takes the current predicate state (in a register) and merges it into the
1262/// stack pointer. The state is essentially a single bit, but we merge this in
1263/// a way that won't form non-canonical pointers and also will be preserved
1264/// across normal stack adjustments.
1265void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1266 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
1267 unsigned PredStateReg) {
1268 unsigned TmpReg = MRI->createVirtualRegister(PredStateRC);
1269 // FIXME: This hard codes a shift distance based on the number of bits needed
1270 // to stay canonical on 64-bit. We should compute this somehow and support
1271 // 32-bit as part of that.
1272 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1273 .addReg(PredStateReg, RegState::Kill)
1274 .addImm(47);
1275 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1276 ++NumInstsInserted;
1277 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1278 .addReg(X86::RSP)
1279 .addReg(TmpReg, RegState::Kill);
1280 OrI->addRegisterDead(X86::EFLAGS, TRI);
1281 ++NumInstsInserted;
1282}
1283
1284/// Extracts the predicate state stored in the high bits of the stack pointer.
1285unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1286 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1287 DebugLoc Loc) {
1288 unsigned PredStateReg = MRI->createVirtualRegister(PredStateRC);
1289 unsigned TmpReg = MRI->createVirtualRegister(PredStateRC);
1290
1291 // We know that the stack pointer will have any preserved predicate state in
1292 // its high bit. We just want to smear this across the other bits. Turns out,
1293 // this is exactly what an arithmetic right shift does.
1294 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1295 .addReg(X86::RSP);
1296 auto ShiftI =
1297 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1298 .addReg(TmpReg, RegState::Kill)
1299 .addImm(TRI->getRegSizeInBits(*PredStateRC) - 1);
1300 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1301 ++NumInstsInserted;
1302
1303 return PredStateReg;
1304}
1305
1306void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1307 MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1308 MachineSSAUpdater &PredStateSSA,
1309 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
1310 MachineBasicBlock &MBB = *MI.getParent();
1311 DebugLoc Loc = MI.getDebugLoc();
1312
1313 // Check if EFLAGS are alive by seeing if there is a def of them or they
1314 // live-in, and then seeing if that def is in turn used.
1315 bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1316
1317 SmallVector<MachineOperand *, 2> HardenOpRegs;
1318
1319 if (BaseMO.isFI()) {
1320 // A frame index is never a dynamically controllable load, so only
1321 // harden it if we're covering fixed address loads as well.
1322 LLVM_DEBUG(
1323 dbgs() << " Skipping hardening base of explicit stack frame load: ";
1324 MI.dump(); dbgs() << "\n");
1325 } else if (BaseMO.getReg() == X86::RIP ||
1326 BaseMO.getReg() == X86::NoRegister) {
1327 // For both RIP-relative addressed loads or absolute loads, we cannot
1328 // meaningfully harden them because the address being loaded has no
1329 // dynamic component.
1330 //
1331 // FIXME: When using a segment base (like TLS does) we end up with the
1332 // dynamic address being the base plus -1 because we can't mutate the
1333 // segment register here. This allows the signed 32-bit offset to point at
1334 // valid segment-relative addresses and load them successfully.
1335 LLVM_DEBUG(
1336 dbgs() << " Cannot harden base of "
1337 << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1338 << " address in a load!");
1339 } else {
1340 assert(BaseMO.isReg() &&
1341 "Only allowed to have a frame index or register base.");
1342 HardenOpRegs.push_back(&BaseMO);
1343 }
1344
1345 if (IndexMO.getReg() != X86::NoRegister &&
1346 (HardenOpRegs.empty() ||
1347 HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1348 HardenOpRegs.push_back(&IndexMO);
1349
1350 assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1351 "Should have exactly one or two registers to harden!");
1352 assert((HardenOpRegs.size() == 1 ||
1353 HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1354 "Should not have two of the same registers!");
1355
1356 // Remove any registers that have alreaded been checked.
1357 llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1358 // See if this operand's register has already been checked.
1359 auto It = AddrRegToHardenedReg.find(Op->getReg());
1360 if (It == AddrRegToHardenedReg.end())
1361 // Not checked, so retain this one.
1362 return false;
1363
1364 // Otherwise, we can directly update this operand and remove it.
1365 Op->setReg(It->second);
1366 return true;
1367 });
1368 // If there are none left, we're done.
1369 if (HardenOpRegs.empty())
1370 return;
1371
1372 // Compute the current predicate state.
1373 unsigned StateReg = PredStateSSA.GetValueAtEndOfBlock(&MBB);
1374
1375 auto InsertPt = MI.getIterator();
1376
1377 // If EFLAGS are live and we don't have access to instructions that avoid
1378 // clobbering EFLAGS we need to save and restore them. This in turn makes
1379 // the EFLAGS no longer live.
1380 unsigned FlagsReg = 0;
1381 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1382 EFLAGSLive = false;
1383 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1384 }
1385
1386 for (MachineOperand *Op : HardenOpRegs) {
1387 auto *OpRC = MRI->getRegClass(Op->getReg());
1388
1389 unsigned OpReg = Op->getReg();
1390 unsigned TmpReg = MRI->createVirtualRegister(OpRC);
1391
1392 if (!EFLAGSLive) {
1393 // Merge our potential poison state into the value with an or.
1394 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1395 .addReg(StateReg)
1396 .addReg(OpReg);
1397 OrI->addRegisterDead(X86::EFLAGS, TRI);
1398 ++NumInstsInserted;
1399 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1400 } else {
1401 // We need to avoid touching EFLAGS so shift out all but the least
1402 // significant bit using the instruction that doesn't update flags.
1403 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1404 .addReg(OpReg)
1405 .addReg(StateReg);
1406 (void)ShiftI;
1407 ++NumInstsInserted;
1408 LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1409 dbgs() << "\n");
1410 }
1411
1412 // Record this register as checked and update the operand.
1413 assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1414 "Should not have checked this register yet!");
1415 AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1416 Op->setReg(TmpReg);
1417 ++NumAddrRegsHardened;
1418 }
1419
1420 // And restore the flags if needed.
1421 if (FlagsReg)
1422 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1423}
1424
1425MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1426 MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedLoads) {
1427 assert(isDataInvariantLoad(InitialMI) &&
1428 "Cannot get here with a non-invariant load!");
1429
1430 // See if we can sink hardening the loaded value.
1431 auto SinkCheckToSingleUse =
1432 [&](MachineInstr &MI) -> Optional<MachineInstr *> {
1433 unsigned DefReg = MI.getOperand(0).getReg();
1434
1435 // We need to find a single use which we can sink the check. We can
1436 // primarily do this because many uses may already end up checked on their
1437 // own.
1438 MachineInstr *SingleUseMI = nullptr;
1439 for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1440 // If we're already going to harden this use, it is data invariant and
1441 // within our block and we just need to check that the use isn't in an
1442 // address.
1443 if (HardenedLoads.count(&UseMI)) {
1444 const MCInstrDesc &Desc = UseMI.getDesc();
1445 int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1446 assert(MemRefBeginIdx >= 0 &&
1447 "Should always have mem references here!");
1448 MemRefBeginIdx += X86II::getOperandBias(Desc);
1449
1450 MachineOperand &BaseMO =
1451 UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1452 MachineOperand &IndexMO =
1453 UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1454 if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1455 (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1456 // The load uses the register as part of its address making it not
1457 // invariant.
1458 return {};
1459
1460 continue;
1461 }
1462
1463 if (SingleUseMI)
1464 // We already have a single use, this would make two. Bail.
1465 return {};
1466
1467 // If this single use isn't data invariant, isn't in this block, or has
1468 // interfering EFLAGS, we can't sink the hardening to it.
1469 if (!isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent())
1470 return {};
1471
1472 // If this instruction defines multiple registers bail as we won't harden
1473 // all of them.
1474 if (UseMI.getDesc().getNumDefs() > 1)
1475 return {};
1476
1477 // If this register isn't a virtual register we can't walk uses of sanely,
1478 // just bail. Also check that its register class is one of the ones we
1479 // can harden.
1480 unsigned UseDefReg = UseMI.getOperand(0).getReg();
1481 if (!TRI->isVirtualRegister(UseDefReg) ||
1482 !MRI->getRegClass(UseDefReg)->hasSubClassEq(&X86::GR64RegClass))
1483 return {};
1484
1485 SingleUseMI = &UseMI;
1486 }
1487
1488 // If SingleUseMI is still null, there is no use that needs its own
1489 // checking. Otherwise, it is the single use that needs checking.
1490 return {SingleUseMI};
1491 };
1492
1493 MachineInstr *MI = &InitialMI;
1494 while (Optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1495 // Update which MI we're checking now.
1496 MI = *SingleUse;
1497 if (!MI)
1498 break;
1499 }
1500
1501 return MI;
1502}
1503
1504// We can harden non-leaking loads into register without touching the address
1505// by just hiding all of the loaded bits. We use an `or` instruction to do
1506// this because having the poison value be all ones allows us to use the same
1507// value below. And the goal is just for the loaded bits to not be exposed to
1508// execution and coercing them to one is sufficient.
1509void X86SpeculativeLoadHardeningPass::hardenPostLoad(
1510 MachineInstr &MI, MachineSSAUpdater &PredStateSSA) {
1511 assert(isDataInvariantLoad(MI) &&
1512 "Cannot get here with a non-invariant load!");
1513
1514 MachineBasicBlock &MBB = *MI.getParent();
1515 DebugLoc Loc = MI.getDebugLoc();
1516
1517 // For all of these, the def'ed register operand is operand zero.
1518 auto &DefOp = MI.getOperand(0);
1519 unsigned OldDefReg = DefOp.getReg();
1520
1521 auto *DefRC = MRI->getRegClass(OldDefReg);
1522 int DefRegBytes = TRI->getRegSizeInBits(*DefRC) / 8;
1523
1524 unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1525 unsigned OrOpCode = OrOpCodes[Log2_32(DefRegBytes)];
1526
1527 unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1528
1529 auto GetStateRegInRC = [&](const TargetRegisterClass &RC) {
1530 unsigned StateReg = PredStateSSA.GetValueAtEndOfBlock(&MBB);
1531
1532 int Bytes = TRI->getRegSizeInBits(RC) / 8;
1533 // FIXME: Need to teach this about 32-bit mode.
1534 if (Bytes != 8) {
1535 unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1536 unsigned NarrowStateReg = MRI->createVirtualRegister(&RC);
1537 BuildMI(MBB, MI.getIterator(), Loc, TII->get(TargetOpcode::COPY),
1538 NarrowStateReg)
1539 .addReg(StateReg, 0, SubRegImm);
1540 StateReg = NarrowStateReg;
1541 }
1542 return StateReg;
1543 };
1544
1545 auto InsertPt = std::next(MI.getIterator());
1546 unsigned FlagsReg = 0;
1547 bool EFLAGSLive = isEFLAGSLive(MBB, InsertPt, *TRI);
1548 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1549 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1550 EFLAGSLive = false;
1551 }
1552
1553 if (!EFLAGSLive) {
1554 unsigned StateReg = GetStateRegInRC(*DefRC);
1555 unsigned NewDefReg = MRI->createVirtualRegister(DefRC);
1556 DefOp.setReg(NewDefReg);
1557 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), OldDefReg)
1558 .addReg(StateReg)
1559 .addReg(NewDefReg);
1560 OrI->addRegisterDead(X86::EFLAGS, TRI);
1561 ++NumInstsInserted;
1562 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1563 } else {
1564 assert(Subtarget->hasBMI2() &&
1565 "Cannot harden loads and preserve EFLAGS without BMI2!");
1566
1567 unsigned ShiftOpCode = DefRegBytes < 4 ? X86::SHRX32rr : X86::SHRX64rr;
1568 auto &ShiftRC =
1569 DefRegBytes < 4 ? X86::GR32_NOSPRegClass : X86::GR64_NOSPRegClass;
1570 int ShiftRegBytes = TRI->getRegSizeInBits(ShiftRC) / 8;
1571 unsigned DefSubRegImm = SubRegImms[Log2_32(DefRegBytes)];
1572
1573 unsigned StateReg = GetStateRegInRC(ShiftRC);
1574
1575 // First have the def instruction def a temporary register.
1576 unsigned TmpReg = MRI->createVirtualRegister(DefRC);
1577 DefOp.setReg(TmpReg);
1578 // Now copy it into a register of the shift RC.
1579 unsigned ShiftInputReg = TmpReg;
1580 if (DefRegBytes != ShiftRegBytes) {
1581 unsigned UndefReg = MRI->createVirtualRegister(&ShiftRC);
1582 BuildMI(MBB, InsertPt, Loc, TII->get(X86::IMPLICIT_DEF), UndefReg);
1583 ShiftInputReg = MRI->createVirtualRegister(&ShiftRC);
1584 BuildMI(MBB, InsertPt, Loc, TII->get(X86::INSERT_SUBREG), ShiftInputReg)
1585 .addReg(UndefReg)
1586 .addReg(TmpReg)
1587 .addImm(DefSubRegImm);
1588 }
1589
1590 // We shift this once if the shift is wider than the def and thus we can
1591 // shift *all* of the def'ed bytes out. Otherwise we need to do two shifts.
1592
1593 unsigned ShiftedReg = MRI->createVirtualRegister(&ShiftRC);
1594 auto Shift1I =
1595 BuildMI(MBB, InsertPt, Loc, TII->get(ShiftOpCode), ShiftedReg)
1596 .addReg(ShiftInputReg)
1597 .addReg(StateReg);
1598 (void)Shift1I;
1599 ++NumInstsInserted;
1600 LLVM_DEBUG(dbgs() << " Inserting shrx: "; Shift1I->dump(); dbgs() << "\n");
1601
1602 // The only way we have a bit left is if all 8 bytes were defined. Do an
1603 // extra shift to get the last bit in this case.
1604 if (DefRegBytes == ShiftRegBytes) {
1605 // We can just directly def the old def register as its the same size.
1606 ShiftInputReg = ShiftedReg;
1607 auto Shift2I =
1608 BuildMI(MBB, InsertPt, Loc, TII->get(ShiftOpCode), OldDefReg)
1609 .addReg(ShiftInputReg)
1610 .addReg(StateReg);
1611 (void)Shift2I;
1612 ++NumInstsInserted;
1613 LLVM_DEBUG(dbgs() << " Inserting shrx: "; Shift2I->dump();
1614 dbgs() << "\n");
1615 } else {
1616 // When we have different size shift register we need to fix up the
1617 // class. We can do that as we copy into the old def register.
1618 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), OldDefReg)
1619 .addReg(ShiftedReg, 0, DefSubRegImm);
1620 }
1621 }
1622
1623 if (FlagsReg)
1624 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1625
1626 ++NumPostLoadRegsHardened;
1627}
1628
1629void X86SpeculativeLoadHardeningPass::checkReturnInstr(
1630 MachineInstr &MI, MachineSSAUpdater &PredStateSSA) {
1631 MachineBasicBlock &MBB = *MI.getParent();
1632 DebugLoc Loc = MI.getDebugLoc();
1633 auto InsertPt = MI.getIterator();
1634
1635 if (FenceCallAndRet) {
1636 // Simply forcibly block speculation of loads out of the function by using
1637 // an LFENCE. This is potentially a heavy-weight mitigation strategy, but
1638 // should be secure, is simple from an ABI perspective, and the cost can be
1639 // minimized through inlining.
1640 //
1641 // FIXME: We should investigate ways to establish a strong data-dependency
1642 // on the return. However, poisoning the stack pointer is unlikely to work
1643 // because the return is *predicted* rather than relying on the load of the
1644 // return address to actually resolve.
1645 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE));
1646 ++NumInstsInserted;
1647 ++NumLFENCEsInserted;
1648 return;
1649 }
1650
1651 // Take our predicate state, shift it to the high 17 bits (so that we keep
1652 // pointers canonical) and merge it into RSP. This will allow the caller to
1653 // extract it when we return (speculatively).
1654 mergePredStateIntoSP(MBB, InsertPt, Loc,
1655 PredStateSSA.GetValueAtEndOfBlock(&MBB));
1656}
1657
1658INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
1659 "X86 speculative load hardener", false, false)
1660INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
1661 "X86 speculative load hardener", false, false)
1662
1663FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() {
1664 return new X86SpeculativeLoadHardeningPass();
1665}