Doxygenize the documentation comments

There were many // comment used to document classes, functions etc. but
those are not picked up by doxygen which expects /// comments. This
converts many comments from // to /// in order to improve the generated
documentation.

BUG=
R=jvoung@chromium.org, kschimpf@google.com

Review URL: https://codereview.chromium.org/1216963007.
diff --git a/Doxyfile b/Doxyfile
index 3b19653..3b940f1 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -699,7 +699,7 @@
 # will automatically be disabled.
 # The default value is: YES.
 
-WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_UNDOCUMENTED   = NO
 
 # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
 # potential errors in the documentation, such as not documenting some parameters
diff --git a/src/IceAPInt.h b/src/IceAPInt.h
index 01ce280..9ef0702 100644
--- a/src/IceAPInt.h
+++ b/src/IceAPInt.h
@@ -47,8 +47,8 @@
   uint64_t getRawData() const { return Val; }
 
 private:
-  uint32_t BitWidth; // The number of bits in this APInt.
-  uint64_t Val;      // The (64-bit) equivalent integer value.
+  uint32_t BitWidth; /// The number of bits in this APInt.
+  uint64_t Val;      /// The (64-bit) equivalent integer value.
 
   /// Clear unused high order bits.
   void clearUnusedBits() {
diff --git a/src/IceAssembler.cpp b/src/IceAssembler.cpp
index 94cb5a1..48db036 100644
--- a/src/IceAssembler.cpp
+++ b/src/IceAssembler.cpp
@@ -17,9 +17,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Assembler base class.
-//
+///
+/// \file
+/// This file implements the Assembler base class.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceAssembler.h"
diff --git a/src/IceAssembler.h b/src/IceAssembler.h
index c2a40d6..437dfc3 100644
--- a/src/IceAssembler.h
+++ b/src/IceAssembler.h
@@ -13,11 +13,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the Assembler base class.  Instructions are assembled
-// by architecture-specific assemblers that derive from this base class.
-// This base class manages buffers and fixups for emitting code, etc.
-//
+///
+/// \file
+/// This file declares the Assembler base class.  Instructions are assembled
+/// by architecture-specific assemblers that derive from this base class.
+/// This base class manages buffers and fixups for emitting code, etc.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEASSEMBLER_H
@@ -28,7 +29,7 @@
 
 namespace Ice {
 
-// Assembler buffers are used to emit binary code. They grow on demand.
+/// Assembler buffers are used to emit binary code. They grow on demand.
 class AssemblerBuffer {
   AssemblerBuffer(const AssemblerBuffer &) = delete;
   AssemblerBuffer &operator=(const AssemblerBuffer &) = delete;
@@ -37,7 +38,7 @@
   AssemblerBuffer(Assembler &);
   ~AssemblerBuffer();
 
-  // Basic support for emitting, loading, and storing.
+  /// Basic support for emitting, loading, and storing.
   template <typename T> void emit(T Value) {
     assert(hasEnsuredCapacity());
     *reinterpret_cast<T *>(Cursor) = Value;
@@ -56,21 +57,20 @@
     *reinterpret_cast<T *>(Contents + Position) = Value;
   }
 
-  // Emit a fixup at the current location.
+  /// Emit a fixup at the current location.
   void emitFixup(AssemblerFixup *Fixup) { Fixup->set_position(size()); }
 
-  // Get the size of the emitted code.
+  /// Get the size of the emitted code.
   intptr_t size() const { return Cursor - Contents; }
   uintptr_t contents() const { return Contents; }
 
-  // To emit an instruction to the assembler buffer, the EnsureCapacity helper
-  // must be used to guarantee that the underlying data area is big enough to
-  // hold the emitted instruction. Usage:
-  //
-  //     AssemblerBuffer buffer;
-  //     AssemblerBuffer::EnsureCapacity ensured(&buffer);
-  //     ... emit bytes for single instruction ...
-
+  /// To emit an instruction to the assembler buffer, the EnsureCapacity helper
+  /// must be used to guarantee that the underlying data area is big enough to
+  /// hold the emitted instruction. Usage:
+  ///
+  ///     AssemblerBuffer buffer;
+  ///     AssemblerBuffer::EnsureCapacity ensured(&buffer);
+  ///     ... emit bytes for single instruction ...
   class EnsureCapacity {
     EnsureCapacity(const EnsureCapacity &) = delete;
     EnsureCapacity &operator=(const EnsureCapacity &) = delete;
@@ -100,10 +100,10 @@
     return true;
   }
 
-  // Returns the position in the instruction stream.
+  /// Returns the position in the instruction stream.
   intptr_t getPosition() const { return Cursor - Contents; }
 
-  // Create and track a fixup in the current function.
+  /// Create and track a fixup in the current function.
   AssemblerFixup *createFixup(FixupKind Kind, const Constant *Value);
 
   const FixupRefList &fixups() const { return Fixups; }
@@ -114,9 +114,9 @@
   }
 
 private:
-  // The limit is set to kMinimumGap bytes before the end of the data area.
-  // This leaves enough space for the longest possible instruction and allows
-  // for a single, fast space check per instruction.
+  /// The limit is set to kMinimumGap bytes before the end of the data area.
+  /// This leaves enough space for the longest possible instruction and allows
+  /// for a single, fast space check per instruction.
   static constexpr intptr_t kMinimumGap = 32;
 
   uintptr_t Contents;
@@ -124,7 +124,7 @@
   uintptr_t Limit;
   // The member variable is named Assemblr to avoid hiding the class Assembler.
   Assembler &Assemblr;
-  // List of pool-allocated fixups relative to the current function.
+  /// List of pool-allocated fixups relative to the current function.
   FixupRefList Fixups;
 
   uintptr_t cursor() const { return Cursor; }
@@ -134,8 +134,8 @@
     return (Limit - Contents) + kMinimumGap;
   }
 
-  // Compute the limit based on the data area and the capacity. See
-  // description of kMinimumGap for the reasoning behind the value.
+  /// Compute the limit based on the data area and the capacity. See
+  /// description of kMinimumGap for the reasoning behind the value.
   static uintptr_t computeLimit(uintptr_t Data, intptr_t Capacity) {
     return Data + Capacity - kMinimumGap;
   }
@@ -158,7 +158,7 @@
 
   virtual ~Assembler() = default;
 
-  // Allocate a chunk of bytes using the per-Assembler allocator.
+  /// Allocate a chunk of bytes using the per-Assembler allocator.
   uintptr_t allocateBytes(size_t bytes) {
     // For now, alignment is not related to NaCl bundle alignment, since
     // the buffer's GetPosition is relative to the base. So NaCl bundle
@@ -170,13 +170,13 @@
     return reinterpret_cast<uintptr_t>(Allocator.Allocate(bytes, Alignment));
   }
 
-  // Allocate data of type T using the per-Assembler allocator.
+  /// Allocate data of type T using the per-Assembler allocator.
   template <typename T> T *allocate() { return Allocator.Allocate<T>(); }
 
-  // Align the tail end of the function to the required target alignment.
+  /// Align the tail end of the function to the required target alignment.
   virtual void alignFunction() = 0;
 
-  // Add nop padding of a particular width to the current bundle.
+  /// Add nop padding of a particular width to the current bundle.
   virtual void padWithNop(intptr_t Padding) = 0;
 
   virtual SizeT getBundleAlignLog2Bytes() const = 0;
@@ -184,8 +184,8 @@
   virtual const char *getNonExecPadDirective() const = 0;
   virtual llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const = 0;
 
-  // Mark the current text location as the start of a CFG node
-  // (represented by NodeNumber).
+  /// Mark the current text location as the start of a CFG node
+  /// (represented by NodeNumber).
   virtual void bindCfgNodeLabel(SizeT NodeNumber) = 0;
 
   virtual bool fixupIsPCRel(FixupKind Kind) const = 0;
@@ -205,7 +205,7 @@
   const IceString &getFunctionName() { return FunctionName; }
   void setFunctionName(const IceString &NewName) { FunctionName = NewName; }
   intptr_t getBufferSize() const { return Buffer.size(); }
-  // Roll back to a (smaller) size.
+  /// Roll back to a (smaller) size.
   void setBufferSize(intptr_t NewSize) { Buffer.setSize(NewSize); }
   void setPreliminary(bool Value) { Preliminary = Value; }
   bool getPreliminary() const { return Preliminary; }
@@ -220,15 +220,15 @@
   const AssemblerKind Kind;
 
   ArenaAllocator<32 * 1024> Allocator;
-  // FunctionName and IsInternal are transferred from the original Cfg
-  // object, since the Cfg object may be deleted by the time the
-  // assembler buffer is emitted.
+  /// FunctionName and IsInternal are transferred from the original Cfg
+  /// object, since the Cfg object may be deleted by the time the
+  /// assembler buffer is emitted.
   IceString FunctionName = "";
   bool IsInternal = false;
-  // Preliminary indicates whether a preliminary pass is being made
-  // for calculating bundle padding (Preliminary=true), versus the
-  // final pass where all changes to label bindings, label links, and
-  // relocation fixups are fully committed (Preliminary=false).
+  /// Preliminary indicates whether a preliminary pass is being made
+  /// for calculating bundle padding (Preliminary=true), versus the
+  /// final pass where all changes to label bindings, label links, and
+  /// relocation fixups are fully committed (Preliminary=false).
   bool Preliminary = false;
 
 protected:
diff --git a/src/IceAssemblerARM32.h b/src/IceAssemblerARM32.h
index f67b630..977b43d 100644
--- a/src/IceAssemblerARM32.h
+++ b/src/IceAssemblerARM32.h
@@ -14,9 +14,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Assembler class for ARM32.
-//
+///
+/// \file
+/// This file implements the Assembler class for ARM32.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEASSEMBLERARM32_H
diff --git a/src/IceAssemblerMIPS32.h b/src/IceAssemblerMIPS32.h
index fc83919..b088d56 100644
--- a/src/IceAssemblerMIPS32.h
+++ b/src/IceAssemblerMIPS32.h
@@ -14,9 +14,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Assembler class for MIPS32.
-//
+///
+/// \file
+/// This file implements the Assembler class for MIPS32.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEASSEMBLERMIPS32_H
diff --git a/src/IceAssemblerX8632.cpp b/src/IceAssemblerX8632.cpp
new file mode 100644
index 0000000..6135b5c
--- /dev/null
+++ b/src/IceAssemblerX8632.cpp
@@ -0,0 +1,2559 @@
+//===- subzero/src/IceAssemblerX8632.cpp - Assembler for x86-32  ----------===//
+// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+//
+// Modified by the Subzero authors.
+//
+//===----------------------------------------------------------------------===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the Assembler class for x86-32.
+///
+//===----------------------------------------------------------------------===//
+
+#include "IceAssemblerX8632.h"
+
+#include "IceCfg.h"
+#include "IceOperand.h"
+
+namespace Ice {
+namespace X8632 {
+
+Address Address::ofConstPool(Assembler *Asm, const Constant *Imm) {
+  AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Imm);
+  const RelocOffsetT Offset = 0;
+  return Address::Absolute(Offset, Fixup);
+}
+
+AssemblerX8632::~AssemblerX8632() {
+  if (BuildDefs::asserts()) {
+    for (const Label *Label : CfgNodeLabels) {
+      Label->FinalCheck();
+    }
+    for (const Label *Label : LocalLabels) {
+      Label->FinalCheck();
+    }
+  }
+}
+
+void AssemblerX8632::alignFunction() {
+  SizeT Align = 1 << getBundleAlignLog2Bytes();
+  SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
+  const SizeT HltSize = 1;
+  while (BytesNeeded > 0) {
+    hlt();
+    BytesNeeded -= HltSize;
+  }
+}
+
+Label *AssemblerX8632::GetOrCreateLabel(SizeT Number, LabelVector &Labels) {
+  Label *L = nullptr;
+  if (Number == Labels.size()) {
+    L = new (this->allocate<Label>()) Label();
+    Labels.push_back(L);
+    return L;
+  }
+  if (Number > Labels.size()) {
+    Labels.resize(Number + 1);
+  }
+  L = Labels[Number];
+  if (!L) {
+    L = new (this->allocate<Label>()) Label();
+    Labels[Number] = L;
+  }
+  return L;
+}
+
+Label *AssemblerX8632::GetOrCreateCfgNodeLabel(SizeT NodeNumber) {
+  return GetOrCreateLabel(NodeNumber, CfgNodeLabels);
+}
+
+Label *AssemblerX8632::GetOrCreateLocalLabel(SizeT Number) {
+  return GetOrCreateLabel(Number, LocalLabels);
+}
+
+void AssemblerX8632::bindCfgNodeLabel(SizeT NodeNumber) {
+  assert(!getPreliminary());
+  Label *L = GetOrCreateCfgNodeLabel(NodeNumber);
+  this->bind(L);
+}
+
+void AssemblerX8632::BindLocalLabel(SizeT Number) {
+  Label *L = GetOrCreateLocalLabel(Number);
+  if (!getPreliminary())
+    this->bind(L);
+}
+
+void AssemblerX8632::call(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitRegisterOperand(2, reg);
+}
+
+void AssemblerX8632::call(const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitOperand(2, address);
+}
+
+void AssemblerX8632::call(const ConstantRelocatable *label) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  intptr_t call_start = Buffer.getPosition();
+  emitUint8(0xE8);
+  emitFixup(this->createFixup(llvm::ELF::R_386_PC32, label));
+  emitInt32(-4);
+  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
+  (void)call_start;
+}
+
+void AssemblerX8632::call(const Immediate &abs_address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  intptr_t call_start = Buffer.getPosition();
+  emitUint8(0xE8);
+  emitFixup(
+      this->createFixup(llvm::ELF::R_386_PC32, AssemblerFixup::NullSymbol));
+  emitInt32(abs_address.value() - 4);
+  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
+  (void)call_start;
+}
+
+void AssemblerX8632::pushl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x50 + reg);
+}
+
+void AssemblerX8632::popl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x58 + reg);
+}
+
+void AssemblerX8632::popl(const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x8F);
+  emitOperand(0, address);
+}
+
+void AssemblerX8632::pushal() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x60);
+}
+
+void AssemblerX8632::popal() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x61);
+}
+
+void AssemblerX8632::setcc(CondX86::BrCond condition, ByteRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x90 + condition);
+  emitUint8(0xC0 + dst);
+}
+
+void AssemblerX8632::setcc(CondX86::BrCond condition, const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x90 + condition);
+  emitOperand(0, address);
+}
+
+void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (isByteSizedType(Ty)) {
+    emitUint8(0xB0 + dst);
+    emitUint8(imm.value() & 0xFF);
+    return;
+  }
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0xB8 + dst);
+  emitImmediate(Ty, imm);
+}
+
+void AssemblerX8632::mov(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0x88);
+  } else {
+    emitUint8(0x89);
+  }
+  emitRegisterOperand(src, dst);
+}
+
+void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0x8A);
+  } else {
+    emitUint8(0x8B);
+  }
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::mov(Type Ty, const Address &dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0x88);
+  } else {
+    emitUint8(0x89);
+  }
+  emitOperand(src, dst);
+}
+
+void AssemblerX8632::mov(Type Ty, const Address &dst, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0xC6);
+    emitOperand(0, dst);
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0xC7);
+    emitOperand(0, dst);
+    emitImmediate(Ty, imm);
+  }
+}
+
+void AssemblerX8632::movzx(Type SrcTy, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xB6 : 0xB7);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movzx(Type SrcTy, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xB6 : 0xB7);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::movsx(Type SrcTy, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xBE : 0xBF);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movsx(Type SrcTy, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xBE : 0xBF);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::lea(Type Ty, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x8D);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst,
+                          GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  else
+    assert(Ty == IceType_i32);
+  emitUint8(0x0F);
+  emitUint8(0x40 + cond);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst,
+                          const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  else
+    assert(Ty == IceType_i32);
+  emitUint8(0x0F);
+  emitUint8(0x40 + cond);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::rep_movsb() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0xA4);
+}
+
+void AssemblerX8632::movss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x10);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::movss(Type Ty, const Address &dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x11);
+  emitOperand(src, dst);
+}
+
+void AssemblerX8632::movss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x11);
+  emitXmmRegisterOperand(src, dst);
+}
+
+void AssemblerX8632::movd(XmmRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x6E);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movd(XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x6E);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::movd(GPRRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitRegisterOperand(src, dst);
+}
+
+void AssemblerX8632::movd(const Address &dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitOperand(src, dst);
+}
+
+void AssemblerX8632::movq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movq(const Address &dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xD6);
+  emitOperand(src, dst);
+}
+
+void AssemblerX8632::movq(XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::addss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::addss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::subss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::subss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::mulss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::mulss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::divss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::divss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::fld(Type Ty, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
+  emitOperand(0, src);
+}
+
+void AssemblerX8632::fstp(Type Ty, const Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
+  emitOperand(3, dst);
+}
+
+void AssemblerX8632::fstp(X87STRegister st) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDD);
+  emitUint8(0xD8 + st);
+}
+
+void AssemblerX8632::movaps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x28);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movups(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x10);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movups(XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x10);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::movups(const Address &dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x11);
+  emitOperand(src, dst);
+}
+
+void AssemblerX8632::padd(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xFC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xFD);
+  } else {
+    emitUint8(0xFE);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::padd(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xFC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xFD);
+  } else {
+    emitUint8(0xFE);
+  }
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::pand(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDB);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pand(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDB);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::pandn(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDF);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pandn(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDF);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::pmull(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD5);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x38);
+    emitUint8(0x40);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pmull(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD5);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x38);
+    emitUint8(0x40);
+  }
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::pmuludq(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xF4);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pmuludq(Type /* Ty */, XmmRegister dst,
+                             const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xF4);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::por(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEB);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::por(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEB);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::psub(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xF8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xF9);
+  } else {
+    emitUint8(0xFA);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psub(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xF8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xF9);
+  } else {
+    emitUint8(0xFA);
+  }
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::pxor(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEF);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pxor(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEF);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::psll(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xF1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xF2);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psll(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xF1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xF2);
+  }
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::psll(Type Ty, XmmRegister dst, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0x71);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x72);
+  }
+  emitRegisterOperand(6, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+void AssemblerX8632::psra(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xE1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xE2);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psra(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xE1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xE2);
+  }
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::psra(Type Ty, XmmRegister dst, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0x71);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x72);
+  }
+  emitRegisterOperand(4, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+void AssemblerX8632::psrl(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD1);
+  } else if (Ty == IceType_f64) {
+    emitUint8(0xD3);
+  } else {
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
+    emitUint8(0xD2);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psrl(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD1);
+  } else if (Ty == IceType_f64) {
+    emitUint8(0xD3);
+  } else {
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
+    emitUint8(0xD2);
+  }
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::psrl(Type Ty, XmmRegister dst, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0x71);
+  } else if (Ty == IceType_f64) {
+    emitUint8(0x73);
+  } else {
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
+    emitUint8(0x72);
+  }
+  emitRegisterOperand(2, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+// {add,sub,mul,div}ps are given a Ty parameter for consistency with
+// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows
+// addpd, etc., we can use the Ty parameter to decide on adding
+// a 0x66 prefix.
+void AssemblerX8632::addps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::addps(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::subps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::subps(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::divps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::divps(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::mulps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::mulps(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::minps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5D);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::maxps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5F);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::andps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::andps(XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::orps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x56);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::blendvps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x14);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::blendvps(Type /* Ty */, XmmRegister dst,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x14);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::pblendvb(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x10);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pblendvb(Type /* Ty */, XmmRegister dst,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x10);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::cmpps(XmmRegister dst, XmmRegister src,
+                           CondX86::CmppsCond CmpCondition) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC2);
+  emitXmmRegisterOperand(dst, src);
+  emitUint8(CmpCondition);
+}
+
+void AssemblerX8632::cmpps(XmmRegister dst, const Address &src,
+                           CondX86::CmppsCond CmpCondition) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC2);
+  emitOperand(dst, src);
+  emitUint8(CmpCondition);
+}
+
+void AssemblerX8632::sqrtps(XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+void AssemblerX8632::rsqrtps(XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x52);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+void AssemblerX8632::reciprocalps(XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x53);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+void AssemblerX8632::movhlps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x12);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movlhps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x16);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::unpcklps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x14);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::unpckhps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x15);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::unpcklpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x14);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::unpckhpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x15);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::set1ps(XmmRegister dst, GPRRegister tmp1,
+                            const Immediate &imm) {
+  // Load 32-bit immediate value into tmp1.
+  mov(IceType_i32, tmp1, imm);
+  // Move value from tmp1 into dst.
+  movd(dst, tmp1);
+  // Broadcast low lane into other three lanes.
+  shufps(dst, dst, Immediate(0x0));
+}
+
+void AssemblerX8632::shufps(XmmRegister dst, XmmRegister src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::pshufd(Type /* Ty */, XmmRegister dst, XmmRegister src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x70);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::pshufd(Type /* Ty */, XmmRegister dst, const Address &src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x70);
+  emitOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::shufps(Type /* Ty */, XmmRegister dst, XmmRegister src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::shufps(Type /* Ty */, XmmRegister dst, const Address &src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  emitOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::minpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5D);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::maxpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5F);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::sqrtpd(XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+void AssemblerX8632::shufpd(XmmRegister dst, XmmRegister src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
+                              XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::cvttps2dq(Type /* Ignore */, XmmRegister dst,
+                               XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvttps2dq(Type /* Ignore */, XmmRegister dst,
+                               const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::cvtsi2ss(Type DestTy, XmmRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2A);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvtsi2ss(Type DestTy, XmmRegister dst,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2A);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::cvtfloat2float(Type SrcTy, XmmRegister dst,
+                                    XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // ss2sd or sd2ss
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5A);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvtfloat2float(Type SrcTy, XmmRegister dst,
+                                    const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5A);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::cvttss2si(Type SrcTy, GPRRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2C);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvttss2si(Type SrcTy, GPRRegister dst,
+                               const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2C);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::ucomiss(Type Ty, XmmRegister a, XmmRegister b) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_f64)
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x2E);
+  emitXmmRegisterOperand(a, b);
+}
+
+void AssemblerX8632::ucomiss(Type Ty, XmmRegister a, const Address &b) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_f64)
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x2E);
+  emitOperand(a, b);
+}
+
+void AssemblerX8632::movmskpd(GPRRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x50);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movmskps(GPRRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x50);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::sqrtss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::sqrtss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::xorpd(XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::xorpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::orpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x56);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::xorps(XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::xorps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::andpd(XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::andpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::insertps(Type Ty, XmmRegister dst, XmmRegister src,
+                              const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  assert(isVectorFloatingType(Ty));
+  (void)Ty;
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  emitUint8(0x21);
+  emitXmmRegisterOperand(dst, src);
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::insertps(Type Ty, XmmRegister dst, const Address &src,
+                              const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  assert(isVectorFloatingType(Ty));
+  (void)Ty;
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  emitUint8(0x21);
+  emitOperand(dst, src);
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::pinsr(Type Ty, XmmRegister dst, GPRRegister src,
+                           const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  if (Ty == IceType_i16) {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0xC4);
+    emitXmmRegisterOperand(dst, XmmRegister(src));
+    emitUint8(imm.value());
+  } else {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x3A);
+    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
+    emitXmmRegisterOperand(dst, XmmRegister(src));
+    emitUint8(imm.value());
+  }
+}
+
+void AssemblerX8632::pinsr(Type Ty, XmmRegister dst, const Address &src,
+                           const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  if (Ty == IceType_i16) {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0xC4);
+    emitOperand(dst, src);
+    emitUint8(imm.value());
+  } else {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x3A);
+    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
+    emitOperand(dst, src);
+    emitUint8(imm.value());
+  }
+}
+
+void AssemblerX8632::pextr(Type Ty, GPRRegister dst, XmmRegister src,
+                           const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  if (Ty == IceType_i16) {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0xC5);
+    emitXmmRegisterOperand(XmmRegister(dst), src);
+    emitUint8(imm.value());
+  } else {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x3A);
+    emitUint8(isByteSizedType(Ty) ? 0x14 : 0x16);
+    // SSE 4.1 versions are "MRI" because dst can be mem, while
+    // pextrw (SSE2) is RMI because dst must be reg.
+    emitXmmRegisterOperand(src, XmmRegister(dst));
+    emitUint8(imm.value());
+  }
+}
+
+void AssemblerX8632::pmovsxdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x25);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pcmpeq(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x74);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x75);
+  } else {
+    emitUint8(0x76);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pcmpeq(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x74);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x75);
+  } else {
+    emitUint8(0x76);
+  }
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::pcmpgt(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x64);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x65);
+  } else {
+    emitUint8(0x66);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pcmpgt(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x64);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x65);
+  } else {
+    emitUint8(0x66);
+  }
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::roundsd(XmmRegister dst, XmmRegister src,
+                             RoundingMode mode) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  emitUint8(0x0B);
+  emitXmmRegisterOperand(dst, src);
+  // Mask precision exeption.
+  emitUint8(static_cast<uint8_t>(mode) | 0x8);
+}
+
+void AssemblerX8632::fnstcw(const Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xD9);
+  emitOperand(7, dst);
+}
+
+void AssemblerX8632::fldcw(const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xD9);
+  emitOperand(5, src);
+}
+
+void AssemblerX8632::fistpl(const Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDF);
+  emitOperand(7, dst);
+}
+
+void AssemblerX8632::fistps(const Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDB);
+  emitOperand(3, dst);
+}
+
+void AssemblerX8632::fildl(const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDF);
+  emitOperand(5, src);
+}
+
+void AssemblerX8632::filds(const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDB);
+  emitOperand(0, src);
+}
+
+void AssemblerX8632::fincstp() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xD9);
+  emitUint8(0xF7);
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, GPRRegister reg, const Immediate &imm) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (isByteSizedType(Ty)) {
+    emitComplexI8(Tag, Operand(reg), imm);
+    return;
+  }
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitComplex(Ty, Tag, Operand(reg), imm);
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(Tag * 8 + 2);
+  else
+    emitUint8(Tag * 8 + 3);
+  emitRegisterOperand(reg0, reg1);
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, GPRRegister reg,
+                               const Address &address) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(Tag * 8 + 2);
+  else
+    emitUint8(Tag * 8 + 3);
+  emitOperand(reg, address);
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, const Address &address,
+                               GPRRegister reg) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(Tag * 8 + 0);
+  else
+    emitUint8(Tag * 8 + 1);
+  emitOperand(reg, address);
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, const Address &address,
+                               const Immediate &imm) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (isByteSizedType(Ty)) {
+    emitComplexI8(Tag, address, imm);
+    return;
+  }
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitComplex(Ty, Tag, address, imm);
+}
+
+void AssemblerX8632::cmp(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<7>(Ty, reg, imm);
+}
+
+void AssemblerX8632::cmp(Type Ty, GPRRegister reg0, GPRRegister reg1) {
+  arith_int<7>(Ty, reg0, reg1);
+}
+
+void AssemblerX8632::cmp(Type Ty, GPRRegister reg, const Address &address) {
+  arith_int<7>(Ty, reg, address);
+}
+
+void AssemblerX8632::cmp(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<7>(Ty, address, reg);
+}
+
+void AssemblerX8632::cmp(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<7>(Ty, address, imm);
+}
+
+void AssemblerX8632::test(Type Ty, GPRRegister reg1, GPRRegister reg2) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(0x84);
+  else
+    emitUint8(0x85);
+  emitRegisterOperand(reg1, reg2);
+}
+
+void AssemblerX8632::test(Type Ty, const Address &addr, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(0x84);
+  else
+    emitUint8(0x85);
+  emitOperand(reg, addr);
+}
+
+void AssemblerX8632::test(Type Ty, GPRRegister reg,
+                          const Immediate &immediate) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // For registers that have a byte variant (EAX, EBX, ECX, and EDX)
+  // we only test the byte register to keep the encoding short.
+  // This is legal even if the register had high bits set since
+  // this only sets flags registers based on the "AND" of the two operands,
+  // and the immediate had zeros at those high bits.
+  if (immediate.is_uint8() && reg < 4) {
+    // Use zero-extended 8-bit immediate.
+    if (reg == RegX8632::Encoded_Reg_eax) {
+      emitUint8(0xA8);
+    } else {
+      emitUint8(0xF6);
+      emitUint8(0xC0 + reg);
+    }
+    emitUint8(immediate.value() & 0xFF);
+  } else if (reg == RegX8632::Encoded_Reg_eax) {
+    // Use short form if the destination is EAX.
+    if (Ty == IceType_i16)
+      emitOperandSizeOverride();
+    emitUint8(0xA9);
+    emitImmediate(Ty, immediate);
+  } else {
+    if (Ty == IceType_i16)
+      emitOperandSizeOverride();
+    emitUint8(0xF7);
+    emitRegisterOperand(0, reg);
+    emitImmediate(Ty, immediate);
+  }
+}
+
+void AssemblerX8632::test(Type Ty, const Address &addr,
+                          const Immediate &immediate) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // If the immediate is short, we only test the byte addr to keep the
+  // encoding short.
+  if (immediate.is_uint8()) {
+    // Use zero-extended 8-bit immediate.
+    emitUint8(0xF6);
+    emitOperand(0, addr);
+    emitUint8(immediate.value() & 0xFF);
+  } else {
+    if (Ty == IceType_i16)
+      emitOperandSizeOverride();
+    emitUint8(0xF7);
+    emitOperand(0, addr);
+    emitImmediate(Ty, immediate);
+  }
+}
+
+void AssemblerX8632::And(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<4>(Ty, dst, src);
+}
+
+void AssemblerX8632::And(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<4>(Ty, dst, address);
+}
+
+void AssemblerX8632::And(Type Ty, GPRRegister dst, const Immediate &imm) {
+  arith_int<4>(Ty, dst, imm);
+}
+
+void AssemblerX8632::And(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<4>(Ty, address, reg);
+}
+
+void AssemblerX8632::And(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<4>(Ty, address, imm);
+}
+
+void AssemblerX8632::Or(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<1>(Ty, dst, src);
+}
+
+void AssemblerX8632::Or(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<1>(Ty, dst, address);
+}
+
+void AssemblerX8632::Or(Type Ty, GPRRegister dst, const Immediate &imm) {
+  arith_int<1>(Ty, dst, imm);
+}
+
+void AssemblerX8632::Or(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<1>(Ty, address, reg);
+}
+
+void AssemblerX8632::Or(Type Ty, const Address &address, const Immediate &imm) {
+  arith_int<1>(Ty, address, imm);
+}
+
+void AssemblerX8632::Xor(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<6>(Ty, dst, src);
+}
+
+void AssemblerX8632::Xor(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<6>(Ty, dst, address);
+}
+
+void AssemblerX8632::Xor(Type Ty, GPRRegister dst, const Immediate &imm) {
+  arith_int<6>(Ty, dst, imm);
+}
+
+void AssemblerX8632::Xor(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<6>(Ty, address, reg);
+}
+
+void AssemblerX8632::Xor(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<6>(Ty, address, imm);
+}
+
+void AssemblerX8632::add(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<0>(Ty, dst, src);
+}
+
+void AssemblerX8632::add(Type Ty, GPRRegister reg, const Address &address) {
+  arith_int<0>(Ty, reg, address);
+}
+
+void AssemblerX8632::add(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<0>(Ty, reg, imm);
+}
+
+void AssemblerX8632::add(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<0>(Ty, address, reg);
+}
+
+void AssemblerX8632::add(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<0>(Ty, address, imm);
+}
+
+void AssemblerX8632::adc(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<2>(Ty, dst, src);
+}
+
+void AssemblerX8632::adc(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<2>(Ty, dst, address);
+}
+
+void AssemblerX8632::adc(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<2>(Ty, reg, imm);
+}
+
+void AssemblerX8632::adc(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<2>(Ty, address, reg);
+}
+
+void AssemblerX8632::adc(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<2>(Ty, address, imm);
+}
+
+void AssemblerX8632::sub(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<5>(Ty, dst, src);
+}
+
+void AssemblerX8632::sub(Type Ty, GPRRegister reg, const Address &address) {
+  arith_int<5>(Ty, reg, address);
+}
+
+void AssemblerX8632::sub(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<5>(Ty, reg, imm);
+}
+
+void AssemblerX8632::sub(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<5>(Ty, address, reg);
+}
+
+void AssemblerX8632::sub(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<5>(Ty, address, imm);
+}
+
+void AssemblerX8632::sbb(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<3>(Ty, dst, src);
+}
+
+void AssemblerX8632::sbb(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<3>(Ty, dst, address);
+}
+
+void AssemblerX8632::sbb(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<3>(Ty, reg, imm);
+}
+
+void AssemblerX8632::sbb(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<3>(Ty, address, reg);
+}
+
+void AssemblerX8632::sbb(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<3>(Ty, address, imm);
+}
+
+void AssemblerX8632::cbw() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitOperandSizeOverride();
+  emitUint8(0x98);
+}
+
+void AssemblerX8632::cwd() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitOperandSizeOverride();
+  emitUint8(0x99);
+}
+
+void AssemblerX8632::cdq() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x99);
+}
+
+void AssemblerX8632::div(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(6, reg);
+}
+
+void AssemblerX8632::div(Type Ty, const Address &addr) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(6, addr);
+}
+
+void AssemblerX8632::idiv(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(7, reg);
+}
+
+void AssemblerX8632::idiv(Type Ty, const Address &addr) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(7, addr);
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAF);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister reg, const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAF);
+  emitOperand(reg, address);
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister reg, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (imm.is_int8()) {
+    emitUint8(0x6B);
+    emitRegisterOperand(reg, reg);
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0x69);
+    emitRegisterOperand(reg, reg);
+    emitImmediate(Ty, imm);
+  }
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(5, reg);
+}
+
+void AssemblerX8632::imul(Type Ty, const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(5, address);
+}
+
+void AssemblerX8632::mul(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(4, reg);
+}
+
+void AssemblerX8632::mul(Type Ty, const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(4, address);
+}
+
+void AssemblerX8632::incl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x40 + reg);
+}
+
+void AssemblerX8632::incl(const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitOperand(0, address);
+}
+
+void AssemblerX8632::decl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x48 + reg);
+}
+
+void AssemblerX8632::decl(const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitOperand(1, address);
+}
+
+void AssemblerX8632::rol(Type Ty, GPRRegister reg, const Immediate &imm) {
+  emitGenericShift(0, Ty, reg, imm);
+}
+
+void AssemblerX8632::rol(Type Ty, GPRRegister operand, GPRRegister shifter) {
+  emitGenericShift(0, Ty, Operand(operand), shifter);
+}
+
+void AssemblerX8632::rol(Type Ty, const Address &operand, GPRRegister shifter) {
+  emitGenericShift(0, Ty, operand, shifter);
+}
+
+void AssemblerX8632::shl(Type Ty, GPRRegister reg, const Immediate &imm) {
+  emitGenericShift(4, Ty, reg, imm);
+}
+
+void AssemblerX8632::shl(Type Ty, GPRRegister operand, GPRRegister shifter) {
+  emitGenericShift(4, Ty, Operand(operand), shifter);
+}
+
+void AssemblerX8632::shl(Type Ty, const Address &operand, GPRRegister shifter) {
+  emitGenericShift(4, Ty, operand, shifter);
+}
+
+void AssemblerX8632::shr(Type Ty, GPRRegister reg, const Immediate &imm) {
+  emitGenericShift(5, Ty, reg, imm);
+}
+
+void AssemblerX8632::shr(Type Ty, GPRRegister operand, GPRRegister shifter) {
+  emitGenericShift(5, Ty, Operand(operand), shifter);
+}
+
+void AssemblerX8632::shr(Type Ty, const Address &operand, GPRRegister shifter) {
+  emitGenericShift(5, Ty, operand, shifter);
+}
+
+void AssemblerX8632::sar(Type Ty, GPRRegister reg, const Immediate &imm) {
+  emitGenericShift(7, Ty, reg, imm);
+}
+
+void AssemblerX8632::sar(Type Ty, GPRRegister operand, GPRRegister shifter) {
+  emitGenericShift(7, Ty, Operand(operand), shifter);
+}
+
+void AssemblerX8632::sar(Type Ty, const Address &address, GPRRegister shifter) {
+  emitGenericShift(7, Ty, address, shifter);
+}
+
+void AssemblerX8632::shld(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xA5);
+  emitRegisterOperand(src, dst);
+}
+
+void AssemblerX8632::shld(Type Ty, GPRRegister dst, GPRRegister src,
+                          const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  assert(imm.is_int8());
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xA4);
+  emitRegisterOperand(src, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+void AssemblerX8632::shld(Type Ty, const Address &operand, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xA5);
+  emitOperand(src, operand);
+}
+
+void AssemblerX8632::shrd(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAD);
+  emitRegisterOperand(src, dst);
+}
+
+void AssemblerX8632::shrd(Type Ty, GPRRegister dst, GPRRegister src,
+                          const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  assert(imm.is_int8());
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAC);
+  emitRegisterOperand(src, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+void AssemblerX8632::shrd(Type Ty, const Address &dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAD);
+  emitOperand(src, dst);
+}
+
+void AssemblerX8632::neg(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(3, reg);
+}
+
+void AssemblerX8632::neg(Type Ty, const Address &addr) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(3, addr);
+}
+
+void AssemblerX8632::notl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF7);
+  emitUint8(0xD0 | reg);
+}
+
+void AssemblerX8632::bswap(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i32);
+  (void)Ty;
+  emitUint8(0x0F);
+  emitUint8(0xC8 | reg);
+}
+
+void AssemblerX8632::bsf(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBC);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::bsf(Type Ty, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBC);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::bsr(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBD);
+  emitRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::bsr(Type Ty, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBD);
+  emitOperand(dst, src);
+}
+
+void AssemblerX8632::bt(GPRRegister base, GPRRegister offset) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xA3);
+  emitRegisterOperand(offset, base);
+}
+
+void AssemblerX8632::ret() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xC3);
+}
+
+void AssemblerX8632::ret(const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xC2);
+  assert(imm.is_uint16());
+  emitUint8(imm.value() & 0xFF);
+  emitUint8((imm.value() >> 8) & 0xFF);
+}
+
+void AssemblerX8632::nop(int size) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // There are nops up to size 15, but for now just provide up to size 8.
+  assert(0 < size && size <= MAX_NOP_SIZE);
+  switch (size) {
+  case 1:
+    emitUint8(0x90);
+    break;
+  case 2:
+    emitUint8(0x66);
+    emitUint8(0x90);
+    break;
+  case 3:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x00);
+    break;
+  case 4:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x40);
+    emitUint8(0x00);
+    break;
+  case 5:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x44);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  case 6:
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x44);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  case 7:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x80);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  case 8:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x84);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  default:
+    llvm_unreachable("Unimplemented");
+  }
+}
+
+void AssemblerX8632::int3() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xCC);
+}
+
+void AssemblerX8632::hlt() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF4);
+}
+
+void AssemblerX8632::ud2() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x0B);
+}
+
+void AssemblerX8632::j(CondX86::BrCond condition, Label *label, bool near) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    static const int kLongSize = 6;
+    intptr_t offset = label->Position() - Buffer.size();
+    assert(offset <= 0);
+    if (Utils::IsInt(8, offset - kShortSize)) {
+      // TODO(stichnot): Here and in jmp(), we may need to be more
+      // conservative about the backward branch distance if the branch
+      // instruction is within a bundle_lock sequence, because the
+      // distance may increase when padding is added.  This isn't an
+      // issue for branches outside a bundle_lock, because if padding
+      // is added, the retry may change it to a long backward branch
+      // without affecting any of the bookkeeping.
+      emitUint8(0x70 + condition);
+      emitUint8((offset - kShortSize) & 0xFF);
+    } else {
+      emitUint8(0x0F);
+      emitUint8(0x80 + condition);
+      emitInt32(offset - kLongSize);
+    }
+  } else if (near) {
+    emitUint8(0x70 + condition);
+    emitNearLabelLink(label);
+  } else {
+    emitUint8(0x0F);
+    emitUint8(0x80 + condition);
+    emitLabelLink(label);
+  }
+}
+
+void AssemblerX8632::j(CondX86::BrCond condition,
+                       const ConstantRelocatable *label) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x80 + condition);
+  emitFixup(this->createFixup(llvm::ELF::R_386_PC32, label));
+  emitInt32(-4);
+}
+
+void AssemblerX8632::jmp(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitRegisterOperand(4, reg);
+}
+
+void AssemblerX8632::jmp(Label *label, bool near) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    static const int kLongSize = 5;
+    intptr_t offset = label->Position() - Buffer.size();
+    assert(offset <= 0);
+    if (Utils::IsInt(8, offset - kShortSize)) {
+      emitUint8(0xEB);
+      emitUint8((offset - kShortSize) & 0xFF);
+    } else {
+      emitUint8(0xE9);
+      emitInt32(offset - kLongSize);
+    }
+  } else if (near) {
+    emitUint8(0xEB);
+    emitNearLabelLink(label);
+  } else {
+    emitUint8(0xE9);
+    emitLabelLink(label);
+  }
+}
+
+void AssemblerX8632::jmp(const ConstantRelocatable *label) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xE9);
+  emitFixup(this->createFixup(llvm::ELF::R_386_PC32, label));
+  emitInt32(-4);
+}
+
+void AssemblerX8632::mfence() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xAE);
+  emitUint8(0xF0);
+}
+
+void AssemblerX8632::lock() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF0);
+}
+
+void AssemblerX8632::cmpxchg(Type Ty, const Address &address, GPRRegister reg,
+                             bool Locked) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (Locked)
+    emitUint8(0xF0);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xB0);
+  else
+    emitUint8(0xB1);
+  emitOperand(reg, address);
+}
+
+void AssemblerX8632::cmpxchg8b(const Address &address, bool Locked) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Locked)
+    emitUint8(0xF0);
+  emitUint8(0x0F);
+  emitUint8(0xC7);
+  emitOperand(1, address);
+}
+
+void AssemblerX8632::xadd(Type Ty, const Address &addr, GPRRegister reg,
+                          bool Locked) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (Locked)
+    emitUint8(0xF0);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xC0);
+  else
+    emitUint8(0xC1);
+  emitOperand(reg, addr);
+}
+
+void AssemblerX8632::xchg(Type Ty, const Address &addr, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0x86);
+  else
+    emitUint8(0x87);
+  emitOperand(reg, addr);
+}
+
+void AssemblerX8632::emitSegmentOverride(uint8_t prefix) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(prefix);
+}
+
+void AssemblerX8632::align(intptr_t alignment, intptr_t offset) {
+  assert(llvm::isPowerOf2_32(alignment));
+  intptr_t pos = offset + Buffer.getPosition();
+  intptr_t mod = pos & (alignment - 1);
+  if (mod == 0) {
+    return;
+  }
+  intptr_t bytes_needed = alignment - mod;
+  while (bytes_needed > MAX_NOP_SIZE) {
+    nop(MAX_NOP_SIZE);
+    bytes_needed -= MAX_NOP_SIZE;
+  }
+  if (bytes_needed) {
+    nop(bytes_needed);
+  }
+  assert(((offset + Buffer.getPosition()) & (alignment - 1)) == 0);
+}
+
+void AssemblerX8632::bind(Label *label) {
+  intptr_t bound = Buffer.size();
+  assert(!label->IsBound()); // Labels can only be bound once.
+  while (label->IsLinked()) {
+    intptr_t position = label->LinkPosition();
+    intptr_t next = Buffer.load<int32_t>(position);
+    Buffer.store<int32_t>(position, bound - (position + 4));
+    label->position_ = next;
+  }
+  while (label->HasNear()) {
+    intptr_t position = label->NearPosition();
+    intptr_t offset = bound - (position + 1);
+    assert(Utils::IsInt(8, offset));
+    Buffer.store<int8_t>(position, offset);
+  }
+  label->BindTo(bound);
+}
+
+void AssemblerX8632::emitOperand(int rm, const Operand &operand) {
+  assert(rm >= 0 && rm < 8);
+  const intptr_t length = operand.length_;
+  assert(length > 0);
+  // Emit the ModRM byte updated with the given RM value.
+  assert((operand.encoding_[0] & 0x38) == 0);
+  emitUint8(operand.encoding_[0] + (rm << 3));
+  if (operand.fixup()) {
+    emitFixup(operand.fixup());
+  }
+  // Emit the rest of the encoded operand.
+  for (intptr_t i = 1; i < length; i++) {
+    emitUint8(operand.encoding_[i]);
+  }
+}
+
+void AssemblerX8632::emitImmediate(Type Ty, const Immediate &imm) {
+  if (Ty == IceType_i16) {
+    assert(!imm.fixup());
+    emitInt16(imm.value());
+  } else {
+    if (imm.fixup()) {
+      emitFixup(imm.fixup());
+    }
+    emitInt32(imm.value());
+  }
+}
+
+void AssemblerX8632::emitComplexI8(int rm, const Operand &operand,
+                                   const Immediate &immediate) {
+  assert(rm >= 0 && rm < 8);
+  assert(immediate.is_int8());
+  if (operand.IsRegister(RegX8632::Encoded_Reg_eax)) {
+    // Use short form if the destination is al.
+    emitUint8(0x04 + (rm << 3));
+    emitUint8(immediate.value() & 0xFF);
+  } else {
+    // Use sign-extended 8-bit immediate.
+    emitUint8(0x80);
+    emitOperand(rm, operand);
+    emitUint8(immediate.value() & 0xFF);
+  }
+}
+
+void AssemblerX8632::emitComplex(Type Ty, int rm, const Operand &operand,
+                                 const Immediate &immediate) {
+  assert(rm >= 0 && rm < 8);
+  if (immediate.is_int8()) {
+    // Use sign-extended 8-bit immediate.
+    emitUint8(0x83);
+    emitOperand(rm, operand);
+    emitUint8(immediate.value() & 0xFF);
+  } else if (operand.IsRegister(RegX8632::Encoded_Reg_eax)) {
+    // Use short form if the destination is eax.
+    emitUint8(0x05 + (rm << 3));
+    emitImmediate(Ty, immediate);
+  } else {
+    emitUint8(0x81);
+    emitOperand(rm, operand);
+    emitImmediate(Ty, immediate);
+  }
+}
+
+void AssemblerX8632::emitLabel(Label *label, intptr_t instruction_size) {
+  if (label->IsBound()) {
+    intptr_t offset = label->Position() - Buffer.size();
+    assert(offset <= 0);
+    emitInt32(offset - instruction_size);
+  } else {
+    emitLabelLink(label);
+  }
+}
+
+void AssemblerX8632::emitLabelLink(Label *Label) {
+  assert(!Label->IsBound());
+  intptr_t Position = Buffer.size();
+  emitInt32(Label->position_);
+  if (!getPreliminary())
+    Label->LinkTo(Position);
+}
+
+void AssemblerX8632::emitNearLabelLink(Label *label) {
+  assert(!label->IsBound());
+  intptr_t position = Buffer.size();
+  emitUint8(0);
+  if (!getPreliminary())
+    label->NearLinkTo(position);
+}
+
+void AssemblerX8632::emitGenericShift(int rm, Type Ty, GPRRegister reg,
+                                      const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (imm.value() == 1) {
+    emitUint8(isByteSizedArithType(Ty) ? 0xD0 : 0xD1);
+    emitOperand(rm, Operand(reg));
+  } else {
+    emitUint8(isByteSizedArithType(Ty) ? 0xC0 : 0xC1);
+    emitOperand(rm, Operand(reg));
+    emitUint8(imm.value() & 0xFF);
+  }
+}
+
+void AssemblerX8632::emitGenericShift(int rm, Type Ty, const Operand &operand,
+                                      GPRRegister shifter) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(shifter == RegX8632::Encoded_Reg_ecx);
+  (void)shifter;
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(isByteSizedArithType(Ty) ? 0xD2 : 0xD3);
+  emitOperand(rm, operand);
+}
+
+} // end of namespace X8632
+} // end of namespace Ice
diff --git a/src/IceAssemblerX8632.h b/src/IceAssemblerX8632.h
index ad79489..6c22a24 100644
--- a/src/IceAssemblerX8632.h
+++ b/src/IceAssemblerX8632.h
@@ -14,9 +14,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Assembler class for x86-32.
-//
+///
+/// \file
+/// This file implements the Assembler class for x86-32.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEASSEMBLERX8632_H
diff --git a/src/IceAssemblerX8664.cpp b/src/IceAssemblerX8664.cpp
index e56ada6..910924d 100644
--- a/src/IceAssemblerX8664.cpp
+++ b/src/IceAssemblerX8664.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Assembler class for x86-64.
-//
+///
+/// \file
+/// This file implements the Assembler class for x86-64.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceAssemblerX8664.h"
diff --git a/src/IceAssemblerX8664.h b/src/IceAssemblerX8664.h
index 0deda30..f2ffd7f 100644
--- a/src/IceAssemblerX8664.h
+++ b/src/IceAssemblerX8664.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Assembler class for x86-64.
-//
+///
+/// \file
+/// This file implements the Assembler class for x86-64.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEASSEMBLERX8664_H
diff --git a/src/IceBrowserCompileServer.cpp b/src/IceBrowserCompileServer.cpp
index 5a5b970..03127a9 100644
--- a/src/IceBrowserCompileServer.cpp
+++ b/src/IceBrowserCompileServer.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the browser-based compile server.
-//
+///
+/// \file
+/// This file defines the browser-based compile server.
+///
 //===----------------------------------------------------------------------===//
 
 // Can only compile this with the NaCl compiler (needs irt.h, and the
diff --git a/src/IceBrowserCompileServer.h b/src/IceBrowserCompileServer.h
index f15ddaf..e76b820 100644
--- a/src/IceBrowserCompileServer.h
+++ b/src/IceBrowserCompileServer.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the browser-specific compile server.
-//
+///
+/// \file
+/// This file declares the browser-specific compile server.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEBROWSERCOMPILESERVER_H
@@ -30,12 +31,12 @@
 
 namespace Ice {
 
-// The browser variant of the compile server.
-// Compared to the commandline version, this version gets compile
-// requests over IPC. Each compile request will have a slimmed down
-// version of argc, argv while other flags are set to defaults that
-// make sense in the browser case. The output file is specified via
-// a posix FD, and input bytes are pushed to the server.
+/// The browser variant of the compile server.
+/// Compared to the commandline version, this version gets compile
+/// requests over IPC. Each compile request will have a slimmed down
+/// version of argc, argv while other flags are set to defaults that
+/// make sense in the browser case. The output file is specified via
+/// a posix FD, and input bytes are pushed to the server.
 class BrowserCompileServer : public CompileServer {
   BrowserCompileServer() = delete;
   BrowserCompileServer(const BrowserCompileServer &) = delete;
@@ -52,21 +53,21 @@
 
   ErrorCode &getErrorCode() final;
 
-  // Parse and set up the flags for compile jobs.
+  /// Parse and set up the flags for compile jobs.
   void getParsedFlags(uint32_t NumThreads, int argc, char **argv);
 
-  // Creates the streams + context and starts the compile thread,
-  // handing off the streams + context.
+  /// Creates the streams + context and starts the compile thread,
+  /// handing off the streams + context.
   void startCompileThread(int OutFD);
 
-  // Call to push more bytes to the current input stream.
-  // Returns false on success and true on error.
+  /// Call to push more bytes to the current input stream.
+  /// Returns false on success and true on error.
   bool pushInputBytes(const void *Data, size_t NumBytes);
 
-  // Notify the input stream of EOF.
+  /// Notify the input stream of EOF.
   void endInputStream();
 
-  // Wait for the compile thread to complete then reset the state.
+  /// Wait for the compile thread to complete then reset the state.
   void waitForCompileThread() {
     CompileThread.join();
     if (Ctx->getErrorStatus()->value())
@@ -94,12 +95,12 @@
     std::string Buffer;
     llvm::raw_string_ostream StrBuf;
   };
-  // This currently only handles a single compile request, hence one copy
-  // of the state.
+  /// This currently only handles a single compile request, hence one copy
+  /// of the state.
   std::unique_ptr<GlobalContext> Ctx;
-  // A borrowed reference to the current InputStream. The compiler owns
-  // the actual reference so the server must be careful not to access
-  // after the compiler is done.
+  /// A borrowed reference to the current InputStream. The compiler owns
+  /// the actual reference so the server must be careful not to access
+  /// after the compiler is done.
   llvm::QueueStreamer *InputStream = nullptr;
   std::unique_ptr<Ostream> LogStream;
   std::unique_ptr<llvm::raw_fd_ostream> EmitStream;
diff --git a/src/IceBuildDefs.h b/src/IceBuildDefs.h
index 6af41f1..42e66b8 100644
--- a/src/IceBuildDefs.h
+++ b/src/IceBuildDefs.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines constexpr functions to query various #define values.
-//
+///
+/// \file
+/// This file defines constexpr functions to query various #define values.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEBUILDDEFS_H
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index d3b0973..f7b1a8c 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Cfg class, including constant pool
-// management.
-//
+///
+/// \file
+/// This file implements the Cfg class, including constant pool
+/// management.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceCfg.h"
diff --git a/src/IceCfg.h b/src/IceCfg.h
index 800bf40..08b4e97 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the Cfg class, which represents the control flow
-// graph and the overall per-function compilation context.
-//
+///
+/// \file
+/// This file declares the Cfg class, which represents the control flow
+/// graph and the overall per-function compilation context.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECFG_H
@@ -35,12 +36,12 @@
                                      uint32_t SequenceNumber) {
     return std::unique_ptr<Cfg>(new Cfg(Ctx, SequenceNumber));
   }
-  // Gets a pointer to the current thread's Cfg.
+  /// Gets a pointer to the current thread's Cfg.
   static const Cfg *getCurrentCfg() { return ICE_TLS_GET_FIELD(CurrentCfg); }
   static void setCurrentCfg(const Cfg *Func) {
     ICE_TLS_SET_FIELD(CurrentCfg, Func);
   }
-  // Gets a pointer to the current thread's Cfg's allocator.
+  /// Gets a pointer to the current thread's Cfg's allocator.
   static ArenaAllocator<> *getCurrentCfgAllocator() {
     assert(ICE_TLS_GET_FIELD(CurrentCfg));
     return ICE_TLS_GET_FIELD(CurrentCfg)->Allocator.get();
@@ -49,44 +50,54 @@
   GlobalContext *getContext() const { return Ctx; }
   uint32_t getSequenceNumber() const { return SequenceNumber; }
 
-  // Returns true if any of the specified options in the verbose mask
-  // are set.  If the argument is omitted, it checks if any verbose
-  // options at all are set.
+  /// Returns true if any of the specified options in the verbose mask
+  /// are set.  If the argument is omitted, it checks if any verbose
+  /// options at all are set.
   bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
   void setVerbose(VerboseMask Mask) { VMask = Mask; }
 
-  // Manage the name and return type of the function being translated.
+  /// \name Manage the name and return type of the function being translated.
+  /// @{
   void setFunctionName(const IceString &Name) { FunctionName = Name; }
   IceString getFunctionName() const { return FunctionName; }
   void setReturnType(Type Ty) { ReturnType = Ty; }
+  /// @}
 
-  // Manage the "internal" attribute of the function.
+  /// \name Manage the "internal" attribute of the function.
+  /// @{
   void setInternal(bool Internal) { IsInternalLinkage = Internal; }
   bool getInternal() const { return IsInternalLinkage; }
+  /// @}
 
-  // Translation error flagging.  If support for some construct is
-  // known to be missing, instead of an assertion failure, setError()
-  // should be called and the error should be propagated back up.
-  // This way, we can gracefully fail to translate and let a fallback
-  // translator handle the function.
+  /// \name Manage errors.
+  /// @{
+
+  /// Translation error flagging.  If support for some construct is
+  /// known to be missing, instead of an assertion failure, setError()
+  /// should be called and the error should be propagated back up.
+  /// This way, we can gracefully fail to translate and let a fallback
+  /// translator handle the function.
   void setError(const IceString &Message);
   bool hasError() const { return HasError; }
   IceString getError() const { return ErrorMessage; }
+  /// @}
 
-  // Manage nodes (a.k.a. basic blocks, CfgNodes).
+  /// \name Manage nodes (a.k.a. basic blocks, CfgNodes).
+  /// @{
   void setEntryNode(CfgNode *EntryNode) { Entry = EntryNode; }
   CfgNode *getEntryNode() const { return Entry; }
-  // Create a node and append it to the end of the linearized list.
+  /// Create a node and append it to the end of the linearized list.
   CfgNode *makeNode();
   SizeT getNumNodes() const { return Nodes.size(); }
   const NodeList &getNodes() const { return Nodes; }
+  /// @}
 
   typedef int32_t IdentifierIndexType;
-  // Adds a name to the list and returns its index, suitable for the
-  // argument to getIdentifierName().  No checking for duplicates is
-  // done.  This is generally used for node names and variable names
-  // to avoid embedding a std::string inside an arena-allocated
-  // object.
+  /// Adds a name to the list and returns its index, suitable for the
+  /// argument to getIdentifierName().  No checking for duplicates is
+  /// done.  This is generally used for node names and variable names
+  /// to avoid embedding a std::string inside an arena-allocated
+  /// object.
   IdentifierIndexType addIdentifierName(const IceString &Name) {
     IdentifierIndexType Index = IdentifierNames.size();
     IdentifierNames.push_back(Name);
@@ -97,13 +108,17 @@
   }
   enum { IdentifierIndexInvalid = -1 };
 
-  // Manage instruction numbering.
+  /// \name Manage instruction numbering.
+  /// @{
   InstNumberT newInstNumber() { return NextInstNumber++; }
   InstNumberT getNextInstNumber() const { return NextInstNumber; }
+  /// @}
 
-  // Manage Variables.
-  // Create a new Variable with a particular type and an optional
-  // name.  The Node argument is the node where the variable is defined.
+  /// \name Manage Variables.
+  /// @{
+
+  /// Create a new Variable with a particular type and an optional
+  /// name.  The Node argument is the node where the variable is defined.
   // TODO(jpp): untemplate this with two separate methods: makeVariable and
   // makeSpillVariable.
   template <typename T = Variable> T *makeVariable(Type Ty) {
@@ -114,15 +129,19 @@
   }
   SizeT getNumVariables() const { return Variables.size(); }
   const VarList &getVariables() const { return Variables; }
+  /// @}
 
-  // Manage arguments to the function.
+  /// \name Manage arguments to the function.
+  /// @{
   void addArg(Variable *Arg);
   const VarList &getArgs() const { return Args; }
   VarList &getArgs() { return Args; }
   void addImplicitArg(Variable *Arg);
   const VarList &getImplicitArgs() const { return ImplicitArgs; }
+  /// @}
 
-  // Miscellaneous accessors.
+  /// \name Miscellaneous accessors.
+  /// @{
   TargetLowering *getTarget() const { return Target.get(); }
   VariablesMetadata *getVMetadata() const { return VMetadata.get(); }
   Liveness *getLiveness() const { return Live.get(); }
@@ -136,16 +155,17 @@
   bool hasComputedFrame() const;
   bool getFocusedTiming() const { return FocusedTiming; }
   void setFocusedTiming() { FocusedTiming = true; }
+  /// @}
 
-  // Returns true if Var is a global variable that is used by the profiling
-  // code.
+  /// Returns true if Var is a global variable that is used by the profiling
+  /// code.
   static bool isProfileGlobal(const VariableDeclaration &Var);
 
-  // Passes over the CFG.
+  /// Passes over the CFG.
   void translate();
-  // After the CFG is fully constructed, iterate over the nodes and
-  // compute the predecessor and successor edges, in the form of
-  // CfgNode::InEdges[] and CfgNode::OutEdges[].
+  /// After the CFG is fully constructed, iterate over the nodes and
+  /// compute the predecessor and successor edges, in the form of
+  /// CfgNode::InEdges[] and CfgNode::OutEdges[].
   void computeInOutEdges();
   void renumberInstructions();
   void placePhiLoads();
@@ -164,11 +184,14 @@
   void contractEmptyNodes();
   void doBranchOpt();
 
-  // Manage the CurrentNode field, which is used for validating the
-  // Variable::DefNode field during dumping/emitting.
+  /// \name  Manage the CurrentNode field.
+  /// CurrentNode is used for validating the Variable::DefNode field during
+  /// dumping/emitting.
+  /// @{
   void setCurrentNode(const CfgNode *Node) { CurrentNode = Node; }
   void resetCurrentNode() { setCurrentNode(nullptr); }
   const CfgNode *getCurrentNode() const { return CurrentNode; }
+  /// @}
 
   void emit();
   void emitIAS();
@@ -176,20 +199,20 @@
                              const Assembler *Asm);
   void dump(const IceString &Message = "");
 
-  // Allocate data of type T using the per-Cfg allocator.
+  /// Allocate data of type T using the per-Cfg allocator.
   template <typename T> T *allocate() { return Allocator->Allocate<T>(); }
 
-  // Allocate an array of data of type T using the per-Cfg allocator.
+  /// Allocate an array of data of type T using the per-Cfg allocator.
   template <typename T> T *allocateArrayOf(size_t NumElems) {
     return Allocator->Allocate<T>(NumElems);
   }
 
-  // Deallocate data that was allocated via allocate<T>().
+  /// Deallocate data that was allocated via allocate<T>().
   template <typename T> void deallocate(T *Object) {
     Allocator->Deallocate(Object);
   }
 
-  // Deallocate data that was allocated via allocateArrayOf<T>().
+  /// Deallocate data that was allocated via allocateArrayOf<T>().
   template <typename T> void deallocateArrayOf(T *Array) {
     Allocator->Deallocate(Array);
   }
@@ -197,17 +220,17 @@
 private:
   Cfg(GlobalContext *Ctx, uint32_t SequenceNumber);
 
-  // Adds a call to the ProfileSummary runtime function as the first instruction
-  // in this CFG's entry block.
+  /// Adds a call to the ProfileSummary runtime function as the first
+  /// instruction in this CFG's entry block.
   void addCallToProfileSummary();
 
-  // Iterates over the basic blocks in this CFG, adding profiling code to each
-  // one of them. It returns a list with all the globals that the profiling code
-  // needs to be defined.
+  /// Iterates over the basic blocks in this CFG, adding profiling code to each
+  /// one of them. It returns a list with all the globals that the profiling
+  /// code needs to be defined.
   void profileBlocks();
 
   GlobalContext *Ctx;
-  uint32_t SequenceNumber; // output order for emission
+  uint32_t SequenceNumber; /// output order for emission
   VerboseMask VMask;
   IceString FunctionName = "";
   Type ReturnType = IceType_void;
@@ -215,31 +238,31 @@
   bool HasError = false;
   bool FocusedTiming = false;
   IceString ErrorMessage = "";
-  CfgNode *Entry = nullptr; // entry basic block
-  NodeList Nodes;           // linearized node list; Entry should be first
+  CfgNode *Entry = nullptr; /// entry basic block
+  NodeList Nodes;           /// linearized node list; Entry should be first
   std::vector<IceString> IdentifierNames;
   InstNumberT NextInstNumber;
   VarList Variables;
-  VarList Args;         // subset of Variables, in argument order
-  VarList ImplicitArgs; // subset of Variables
+  VarList Args;         /// subset of Variables, in argument order
+  VarList ImplicitArgs; /// subset of Variables
   std::unique_ptr<ArenaAllocator<>> Allocator;
   std::unique_ptr<Liveness> Live;
   std::unique_ptr<TargetLowering> Target;
   std::unique_ptr<VariablesMetadata> VMetadata;
   std::unique_ptr<Assembler> TargetAssembler;
-  // Globals required by this CFG. Mostly used for the profiler's globals.
+  /// Globals required by this CFG. Mostly used for the profiler's globals.
   std::unique_ptr<VariableDeclarationList> GlobalInits;
 
-  // CurrentNode is maintained during dumping/emitting just for
-  // validating Variable::DefNode.  Normally, a traversal over
-  // CfgNodes maintains this, but before global operations like
-  // register allocation, resetCurrentNode() should be called to avoid
-  // spurious validation failures.
+  /// CurrentNode is maintained during dumping/emitting just for
+  /// validating Variable::DefNode.  Normally, a traversal over
+  /// CfgNodes maintains this, but before global operations like
+  /// register allocation, resetCurrentNode() should be called to avoid
+  /// spurious validation failures.
   const CfgNode *CurrentNode = nullptr;
 
-  // Maintain a pointer in TLS to the current Cfg being translated.
-  // This is primarily for accessing its allocator statelessly, but
-  // other uses are possible.
+  /// Maintain a pointer in TLS to the current Cfg being translated.
+  /// This is primarily for accessing its allocator statelessly, but
+  /// other uses are possible.
   ICE_TLS_DECLARE_FIELD(const Cfg *, CurrentCfg);
 
 public:
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 5ff6936..3f8b39b 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the CfgNode class, including the complexities
-// of instruction insertion and in-edge calculation.
-//
+///
+/// \file
+/// This file implements the CfgNode class, including the complexities
+/// of instruction insertion and in-edge calculation.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceCfgNode.h"
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index 3afc446..4f3d7d0 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the CfgNode class, which represents a single
-// basic block as its instruction list, in-edge list, and out-edge
-// list.
-//
+///
+/// \file
+/// This file declares the CfgNode class, which represents a single
+/// basic block as its instruction list, in-edge list, and out-edge
+/// list.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECFGNODE_H
@@ -31,7 +32,7 @@
     return new (Func->allocate<CfgNode>()) CfgNode(Func, LabelIndex);
   }
 
-  // Access the label number and name for this node.
+  /// Access the label number and name for this node.
   SizeT getIndex() const { return Number; }
   void resetIndex(SizeT NewNumber) { Number = NewNumber; }
   IceString getName() const;
@@ -45,34 +46,42 @@
     return ".L" + Func->getFunctionName() + "$" + getName();
   }
 
-  // The HasReturn flag indicates that this node contains a return
-  // instruction and therefore needs an epilog.
+  /// The HasReturn flag indicates that this node contains a return
+  /// instruction and therefore needs an epilog.
   void setHasReturn() { HasReturn = true; }
   bool getHasReturn() const { return HasReturn; }
 
   void setNeedsPlacement(bool Value) { NeedsPlacement = Value; }
   bool needsPlacement() const { return NeedsPlacement; }
 
-  // Access predecessor and successor edge lists.
+  /// \name Access predecessor and successor edge lists.
+  /// @{
   const NodeList &getInEdges() const { return InEdges; }
   const NodeList &getOutEdges() const { return OutEdges; }
+  /// @}
 
-  // Manage the instruction list.
+  /// \name Manage the instruction list.
+  /// @{
   InstList &getInsts() { return Insts; }
   PhiList &getPhis() { return Phis; }
   void appendInst(Inst *Inst);
   void renumberInstructions();
-  // Rough and generally conservative estimate of the number of
-  // instructions in the block.  It is updated when an instruction is
-  // added, but not when deleted.  It is recomputed during
-  // renumberInstructions().
+  /// Rough and generally conservative estimate of the number of
+  /// instructions in the block.  It is updated when an instruction is
+  /// added, but not when deleted.  It is recomputed during
+  /// renumberInstructions().
   InstNumberT getInstCountEstimate() const { return InstCountEstimate; }
+  /// @}
 
-  // Add a predecessor edge to the InEdges list for each of this
-  // node's successors.
+  /// \name Manage predecessors and successors.
+  /// @{
+
+  /// Add a predecessor edge to the InEdges list for each of this
+  /// node's successors.
   void computePredecessors();
   void computeSuccessors();
   CfgNode *splitIncomingEdge(CfgNode *Pred, SizeT InEdgeIndex);
+  /// @}
 
   void placePhiLoads();
   void placePhiStores();
@@ -96,16 +105,16 @@
 private:
   CfgNode(Cfg *Func, SizeT LabelIndex);
   Cfg *const Func;
-  SizeT Number; // label index
+  SizeT Number; /// label index
   Cfg::IdentifierIndexType NameIndex =
-      Cfg::IdentifierIndexInvalid; // index into Cfg::NodeNames table
-  bool HasReturn = false;          // does this block need an epilog?
+      Cfg::IdentifierIndexInvalid; /// index into Cfg::NodeNames table
+  bool HasReturn = false;          /// does this block need an epilog?
   bool NeedsPlacement = false;
-  InstNumberT InstCountEstimate = 0; // rough instruction count estimate
-  NodeList InEdges;                  // in no particular order
-  NodeList OutEdges;                 // in no particular order
-  PhiList Phis;                      // unordered set of phi instructions
-  InstList Insts;                    // ordered list of non-phi instructions
+  InstNumberT InstCountEstimate = 0; /// rough instruction count estimate
+  NodeList InEdges;                  /// in no particular order
+  NodeList OutEdges;                 /// in no particular order
+  PhiList Phis;                      /// unordered set of phi instructions
+  InstList Insts;                    /// ordered list of non-phi instructions
 };
 
 } // end of namespace Ice
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index b5893c1..f1aebd2 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines commandline flags parsing.
-// This currently relies on llvm::cl to parse.  In the future, the minimal
-// build can have a simpler parser.
-//
+///
+/// \file
+/// This file defines commandline flags parsing.
+/// This currently relies on llvm::cl to parse.  In the future, the minimal
+/// build can have a simpler parser.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceClFlags.h"
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 2e2cea1..549bf16 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares command line flags controlling translation.
-//
+///
+/// \file
+/// This file declares command line flags controlling translation.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECLFLAGS_H
diff --git a/src/IceClFlagsExtra.h b/src/IceClFlagsExtra.h
index 8d5b118..b105138 100644
--- a/src/IceClFlagsExtra.h
+++ b/src/IceClFlagsExtra.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares command line flags primarily used for non-minimal builds.
-//
+///
+/// \file
+/// This file declares command line flags primarily used for non-minimal builds.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECLFLAGSEXTRA_H
diff --git a/src/IceCompileServer.cpp b/src/IceCompileServer.cpp
index d36cdfd..23874f0 100644
--- a/src/IceCompileServer.cpp
+++ b/src/IceCompileServer.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the basic commandline-based compile server.
-//
+///
+/// \file
+/// This file defines the basic commandline-based compile server.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceCompileServer.h"
diff --git a/src/IceCompileServer.h b/src/IceCompileServer.h
index b5093df..e027cbb 100644
--- a/src/IceCompileServer.h
+++ b/src/IceCompileServer.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the compile server. Given a compiler implementation,
-// it dispatches compile requests to the implementation.
-//
+///
+/// \file
+/// This file declares the compile server. Given a compiler implementation,
+/// it dispatches compile requests to the implementation.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECOMPILESERVER_H
@@ -26,17 +27,17 @@
 
 namespace Ice {
 
-// A CompileServer awaits compile requests, and dispatches the requests
-// to a given Compiler. Each request is paired with an input stream,
-// a context (which has the output stream), and a set of arguments.
-// The CompileServer takes over the current thread to listen to requests,
-// and compile requests are handled on separate threads.
-//
-// Currently, this only handles a single request.
-//
-// When run on the commandline, it receives and therefore dispatches
-// the request immediately.  When run in the browser, it blocks waiting
-// for a request.
+/// A CompileServer awaits compile requests, and dispatches the requests
+/// to a given Compiler. Each request is paired with an input stream,
+/// a context (which has the output stream), and a set of arguments.
+/// The CompileServer takes over the current thread to listen to requests,
+/// and compile requests are handled on separate threads.
+///
+/// Currently, this only handles a single request.
+///
+/// When run on the commandline, it receives and therefore dispatches
+/// the request immediately.  When run in the browser, it blocks waiting
+/// for a request.
 class CompileServer {
   CompileServer() = delete;
   CompileServer(const CompileServer &) = delete;
@@ -59,7 +60,7 @@
   ErrorCode LastError;
 };
 
-// Commandline variant of the compile server.
+/// Commandline variant of the compile server.
 class CLCompileServer : public CompileServer {
   CLCompileServer() = delete;
   CLCompileServer(const CLCompileServer &) = delete;
diff --git a/src/IceCompiler.cpp b/src/IceCompiler.cpp
index 7764b71..59f18d2 100644
--- a/src/IceCompiler.cpp
+++ b/src/IceCompiler.cpp
@@ -6,13 +6,14 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines a driver for translating PNaCl bitcode into native code.
-// It can either directly parse the binary bitcode file, or use LLVM routines to
-// parse a textual bitcode file into LLVM IR and then convert LLVM IR into ICE.
-// In either case, the high-level ICE is then compiled down to native code, as
-// either an ELF object file or a textual asm file.
-//
+///
+/// \file
+/// This file defines a driver for translating PNaCl bitcode into native code.
+/// It can either directly parse the binary bitcode file, or use LLVM routines
+/// to parse a textual bitcode file into LLVM IR and then convert LLVM IR into
+/// ICE. In either case, the high-level ICE is then compiled down to native
+/// code, as either an ELF object file or a textual asm file.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceCompiler.h"
diff --git a/src/IceCompiler.h b/src/IceCompiler.h
index 460c327..e121dbb 100644
--- a/src/IceCompiler.h
+++ b/src/IceCompiler.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the driver for translating bitcode to native code.
-//
+///
+/// \file
+/// This file declares the driver for translating bitcode to native code.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECOMPILER_H
@@ -24,7 +25,7 @@
 
 class ClFlagsExtra;
 
-// A compiler driver. It may be called to handle a single compile request.
+/// A compiler driver. It may be called to handle a single compile request.
 class Compiler {
   Compiler(const Compiler &) = delete;
   Compiler &operator=(const Compiler &) = delete;
@@ -32,8 +33,8 @@
 public:
   Compiler() = default;
 
-  // Run the compiler with the given GlobalContext for compilation
-  // state.  Upon error, the Context's error status will be set.
+  /// Run the compiler with the given GlobalContext for compilation
+  /// state.  Upon error, the Context's error status will be set.
   void run(const ClFlagsExtra &ExtraFlags, GlobalContext &Ctx,
            std::unique_ptr<llvm::DataStreamer> &&InputStream);
 };
diff --git a/src/IceConditionCodesARM32.h b/src/IceConditionCodesARM32.h
index b98c770..d897a44 100644
--- a/src/IceConditionCodesARM32.h
+++ b/src/IceConditionCodesARM32.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the condition codes for ARM32.
-//
+///
+/// \file
+/// This file declares the condition codes for ARM32.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECONDITIONCODESARM32_H
@@ -25,8 +26,8 @@
   CondARM32 &operator=(const CondARM32 &) = delete;
 
 public:
-  // An enum of codes used for conditional instructions. The enum value
-  // should match the value used to encode operands in binary instructions.
+  /// An enum of codes used for conditional instructions. The enum value
+  /// should match the value used to encode operands in binary instructions.
   enum Cond {
 #define X(tag, encode, opp, emit) tag = encode,
     ICEINSTARM32COND_TABLE
diff --git a/src/IceConditionCodesX8632.h b/src/IceConditionCodesX8632.h
index 77dd4c7..eb09687 100644
--- a/src/IceConditionCodesX8632.h
+++ b/src/IceConditionCodesX8632.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the condition codes for x86-32.
-//
+///
+/// \file
+/// This file declares the condition codes for x86-32.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECONDITIONCODESX8632_H
@@ -25,8 +26,8 @@
   CondX86 &operator=(const CondX86 &) = delete;
 
 public:
-  // An enum of condition codes used for branches and cmov. The enum value
-  // should match the value used to encode operands in binary instructions.
+  /// An enum of condition codes used for branches and cmov. The enum value
+  /// should match the value used to encode operands in binary instructions.
   enum BrCond {
 #define X(tag, encode, opp, dump, emit) tag encode,
     ICEINSTX8632BR_TABLE
@@ -34,9 +35,9 @@
         Br_None
   };
 
-  // An enum of condition codes relevant to the CMPPS instruction. The enum
-  // value should match the value used to encode operands in binary
-  // instructions.
+  /// An enum of condition codes relevant to the CMPPS instruction. The enum
+  /// value should match the value used to encode operands in binary
+  /// instructions.
   enum CmppsCond {
 #define X(tag, emit) tag,
     ICEINSTX8632CMPPS_TABLE
diff --git a/src/IceConditionCodesX8664.h b/src/IceConditionCodesX8664.h
index ec30f88..06155ea 100644
--- a/src/IceConditionCodesX8664.h
+++ b/src/IceConditionCodesX8664.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the condition codes for x86-64.
-//
+///
+/// \file
+/// This file declares the condition codes for x86-64.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECONDITIONCODESX8664_H
@@ -20,8 +21,8 @@
 namespace Ice {
 
 class CondX8664 {
-  // An enum of condition codes used for branches and cmov. The enum value
-  // should match the value used to encode operands in binary instructions.
+  /// An enum of condition codes used for branches and cmov. The enum value
+  /// should match the value used to encode operands in binary instructions.
   enum BrCond {
 #define X(tag, encode, opp, dump, emit) tag encode,
     ICEINSTX8664BR_TABLE
@@ -29,9 +30,9 @@
         Br_None
   };
 
-  // An enum of condition codes relevant to the CMPPS instruction. The enum
-  // value should match the value used to encode operands in binary
-  // instructions.
+  /// An enum of condition codes relevant to the CMPPS instruction. The enum
+  /// value should match the value used to encode operands in binary
+  /// instructions.
   enum CmppsCond {
 #define X(tag, emit) tag,
     ICEINSTX8664CMPPS_TABLE
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index d389a02..0fe6f10 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the LLVM to ICE converter.
-//
+///
+/// \file
+/// This file implements the LLVM to ICE converter.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceConverter.h"
diff --git a/src/IceConverter.h b/src/IceConverter.h
index 84389d3..f2dbb3a 100644
--- a/src/IceConverter.h
+++ b/src/IceConverter.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the LLVM to ICE converter.
-//
+///
+/// \file
+/// This file declares the LLVM to ICE converter.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICECONVERTER_H
@@ -41,8 +42,8 @@
 
   llvm::Module *getModule() const { return Mod; }
 
-  // Returns the global declaration associated with the corresponding
-  // global value V. If no such global address, generates fatal error.
+  /// Returns the global declaration associated with the corresponding
+  /// global value V. If no such global address, generates fatal error.
   GlobalDeclaration *getGlobalDeclaration(const llvm::GlobalValue *V);
 
 private:
@@ -51,22 +52,22 @@
       GlobalDeclarationMapType;
   GlobalDeclarationMapType GlobalDeclarationMap;
 
-  // Walks module and generates names for unnamed globals using prefix
-  // getFlags().DefaultGlobalPrefix, if the prefix is non-empty.
+  /// Walks module and generates names for unnamed globals using prefix
+  /// getFlags().DefaultGlobalPrefix, if the prefix is non-empty.
   void nameUnnamedGlobalVariables(llvm::Module *Mod);
 
-  // Walks module and generates names for unnamed functions using
-  // prefix getFlags().DefaultFunctionPrefix, if the prefix is
-  // non-empty.
+  /// Walks module and generates names for unnamed functions using
+  /// prefix getFlags().DefaultFunctionPrefix, if the prefix is
+  /// non-empty.
   void nameUnnamedFunctions(llvm::Module *Mod);
 
-  // Converts functions to ICE, and then machine code.
+  /// Converts functions to ICE, and then machine code.
   void convertFunctions();
 
-  // Converts globals to ICE, and then machine code.
+  /// Converts globals to ICE, and then machine code.
   void convertGlobals(llvm::Module *Mod);
 
-  // Installs global declarations into GlobalDeclarationMap.
+  /// Installs global declarations into GlobalDeclarationMap.
   void installGlobalDeclarations(llvm::Module *Mod);
 };
 
diff --git a/src/IceDefs.h b/src/IceDefs.h
index 2ee41a7..17e2fb1 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares various useful types and classes that have widespread use
-// across Subzero.  Every Subzero source file is expected to include IceDefs.h.
-//
+///
+/// \file
+/// This file declares various useful types and classes that have widespread use
+/// across Subzero.  Every Subzero source file is expected to include IceDefs.h.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEDEFS_H
@@ -147,18 +148,18 @@
 
 typedef std::vector<VariableDeclaration *> VariableDeclarationList;
 
-// SizeT is for holding small-ish limits like number of source
-// operands in an instruction.  It is used instead of size_t (which
-// may be 64-bits wide) when we want to save space.
+/// SizeT is for holding small-ish limits like number of source
+/// operands in an instruction.  It is used instead of size_t (which
+/// may be 64-bits wide) when we want to save space.
 typedef uint32_t SizeT;
 
-// InstNumberT is for holding an instruction number.  Instruction
-// numbers are used for representing Variable live ranges.
+/// InstNumberT is for holding an instruction number.  Instruction
+/// numbers are used for representing Variable live ranges.
 typedef int32_t InstNumberT;
 
-// A LiveBeginEndMapEntry maps a Variable::Number value to an
-// Inst::Number value, giving the instruction number that begins or
-// ends a variable's live range.
+/// A LiveBeginEndMapEntry maps a Variable::Number value to an
+/// Inst::Number value, giving the instruction number that begins or
+/// ends a variable's live range.
 typedef std::pair<SizeT, InstNumberT> LiveBeginEndMapEntry;
 typedef std::vector<LiveBeginEndMapEntry,
                     CfgLocalAllocator<LiveBeginEndMapEntry>> LiveBeginEndMap;
@@ -167,9 +168,9 @@
 typedef uint32_t TimerStackIdT;
 typedef uint32_t TimerIdT;
 
-// Use alignas(MaxCacheLineSize) to isolate variables/fields that
-// might be contended while multithreading.  Assumes the maximum cache
-// line size is 64.
+/// Use alignas(MaxCacheLineSize) to isolate variables/fields that
+/// might be contended while multithreading.  Assumes the maximum cache
+/// line size is 64.
 enum { MaxCacheLineSize = 64 };
 // Use ICE_CACHELINE_BOUNDARY to force the next field in a declaration
 // list to be aligned to the next cache line.
@@ -178,26 +179,26 @@
 #define ICE_CACHELINE_BOUNDARY                                                 \
   __attribute__((aligned(MaxCacheLineSize + 0))) int : 0
 
-// PNaCl is ILP32, so theoretically we should only need 32-bit offsets.
+/// PNaCl is ILP32, so theoretically we should only need 32-bit offsets.
 typedef int32_t RelocOffsetT;
 enum { RelocAddrSize = 4 };
 
 enum LivenessMode {
-  // Basic version of live-range-end calculation.  Marks the last uses
-  // of variables based on dataflow analysis.  Records the set of
-  // live-in and live-out variables for each block.  Identifies and
-  // deletes dead instructions (primarily stores).
+  /// Basic version of live-range-end calculation.  Marks the last uses
+  /// of variables based on dataflow analysis.  Records the set of
+  /// live-in and live-out variables for each block.  Identifies and
+  /// deletes dead instructions (primarily stores).
   Liveness_Basic,
 
-  // In addition to Liveness_Basic, also calculate the complete
-  // live range for each variable in a form suitable for interference
-  // calculation and register allocation.
+  /// In addition to Liveness_Basic, also calculate the complete
+  /// live range for each variable in a form suitable for interference
+  /// calculation and register allocation.
   Liveness_Intervals
 };
 
 enum RegAllocKind {
-  RAK_Global, // full, global register allocation
-  RAK_InfOnly // allocation only for infinite-weight Variables
+  RAK_Global, /// full, global register allocation
+  RAK_InfOnly /// allocation only for infinite-weight Variables
 };
 
 enum VerboseItem {
@@ -221,9 +222,9 @@
 typedef uint32_t VerboseMask;
 
 enum FileType {
-  FT_Elf, // ELF .o file
-  FT_Asm, // Assembly .s file
-  FT_Iasm // "Integrated assembler" .byte-style .s file
+  FT_Elf, /// ELF .o file
+  FT_Asm, /// Assembly .s file
+  FT_Iasm /// "Integrated assembler" .byte-style .s file
 };
 
 typedef llvm::raw_ostream Ostream;
@@ -233,10 +234,10 @@
 
 enum ErrorCodes { EC_None = 0, EC_Args, EC_Bitcode, EC_Translation };
 
-// Wrapper around std::error_code for allowing multiple errors to be
-// folded into one.  The current implementation keeps track of the
-// first error, which is likely to be the most useful one, and this
-// could be extended to e.g. collect a vector of errors.
+/// Wrapper around std::error_code for allowing multiple errors to be
+/// folded into one.  The current implementation keeps track of the
+/// first error, which is likely to be the most useful one, and this
+/// could be extended to e.g. collect a vector of errors.
 class ErrorCode : public std::error_code {
   ErrorCode(const ErrorCode &) = delete;
   ErrorCode &operator=(const ErrorCode &) = delete;
@@ -255,7 +256,7 @@
   bool HasError = false;
 };
 
-// Reverse range adaptors written in terms of llvm::make_range().
+/// Reverse range adaptors written in terms of llvm::make_range().
 template <typename T>
 llvm::iterator_range<typename T::const_reverse_iterator>
 reverse_range(const T &Container) {
@@ -266,7 +267,7 @@
   return llvm::make_range(Container.rbegin(), Container.rend());
 }
 
-// Options for pooling and randomization of immediates
+/// Options for pooling and randomization of immediates.
 enum RandomizeAndPoolImmediatesEnum { RPI_None, RPI_Randomize, RPI_Pool };
 
 } // end of namespace Ice
diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp
index eca3403..37b1d58 100644
--- a/src/IceELFObjectWriter.cpp
+++ b/src/IceELFObjectWriter.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the writer for ELF relocatable object files.
-//
+///
+/// \file
+/// This file defines the writer for ELF relocatable object files.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceELFObjectWriter.h"
diff --git a/src/IceELFObjectWriter.h b/src/IceELFObjectWriter.h
index 9356ee3..0562c9b 100644
--- a/src/IceELFObjectWriter.h
+++ b/src/IceELFObjectWriter.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// Abstraction for a writer that is responsible for writing an ELF file.
-//
+///
+/// \file
+/// Abstraction for a writer that is responsible for writing an ELF file.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEELFOBJECTWRITER_H
@@ -23,26 +24,26 @@
 
 namespace Ice {
 
-// Higher level ELF object writer.  Manages section information and writes
-// the final ELF object.  The object writer will write to file the code
-// and data as it is being defined (rather than keep a copy).
-// After all definitions are written out, it will finalize the bookkeeping
-// sections and write them out.  Expected usage:
-//
-// (1) writeInitialELFHeader (invoke once)
-// (2) writeDataSection      (may be invoked multiple times, as long as
-//                            SectionSuffix is unique)
-// (3) writeFunctionCode     (must invoke once per function)
-// (4) writeConstantPool     (must invoke once per pooled primitive type)
-// (5) setUndefinedSyms      (invoke once)
-// (6) writeNonUserSections  (invoke once)
-//
-// The requirement for writeDataSection to be invoked only once can
-// be relaxed if using -fdata-sections. The requirement to invoke only once
-// without -fdata-sections is so that variables that belong to each possible
-// SectionType are contiguous in the file. With -fdata-sections, each global
-// variable is in a separate section and therefore the sections will be
-// trivially contiguous.
+/// Higher level ELF object writer.  Manages section information and writes
+/// the final ELF object.  The object writer will write to file the code
+/// and data as it is being defined (rather than keep a copy).
+/// After all definitions are written out, it will finalize the bookkeeping
+/// sections and write them out.  Expected usage:
+///
+/// (1) writeInitialELFHeader (invoke once)
+/// (2) writeDataSection      (may be invoked multiple times, as long as
+///                            SectionSuffix is unique)
+/// (3) writeFunctionCode     (must invoke once per function)
+/// (4) writeConstantPool     (must invoke once per pooled primitive type)
+/// (5) setUndefinedSyms      (invoke once)
+/// (6) writeNonUserSections  (invoke once)
+///
+/// The requirement for writeDataSection to be invoked only once can
+/// be relaxed if using -fdata-sections. The requirement to invoke only once
+/// without -fdata-sections is so that variables that belong to each possible
+/// SectionType are contiguous in the file. With -fdata-sections, each global
+/// variable is in a separate section and therefore the sections will be
+/// trivially contiguous.
 class ELFObjectWriter {
   ELFObjectWriter() = delete;
   ELFObjectWriter(const ELFObjectWriter &) = delete;
@@ -51,39 +52,39 @@
 public:
   ELFObjectWriter(GlobalContext &Ctx, ELFStreamer &Out);
 
-  // Write the initial ELF header. This is just to reserve space in the ELF
-  // file. Reserving space allows the other functions to write text
-  // and data directly to the file and get the right file offsets.
+  /// Write the initial ELF header. This is just to reserve space in the ELF
+  /// file. Reserving space allows the other functions to write text
+  /// and data directly to the file and get the right file offsets.
   void writeInitialELFHeader();
 
-  // Copy initializer data for globals to file and note the offset and size
-  // of each global's definition in the symbol table.
-  // Use the given target's RelocationKind for any relocations.
+  /// Copy initializer data for globals to file and note the offset and size
+  /// of each global's definition in the symbol table.
+  /// Use the given target's RelocationKind for any relocations.
   void writeDataSection(const VariableDeclarationList &Vars,
                         FixupKind RelocationKind,
                         const IceString &SectionSuffix);
 
-  // Copy data of a function's text section to file and note the offset of the
-  // symbol's definition in the symbol table.
-  // Copy the text fixups for use after all functions are written.
-  // The text buffer and fixups are extracted from the Assembler object.
+  /// Copy data of a function's text section to file and note the offset of the
+  /// symbol's definition in the symbol table.
+  /// Copy the text fixups for use after all functions are written.
+  /// The text buffer and fixups are extracted from the Assembler object.
   void writeFunctionCode(const IceString &FuncName, bool IsInternal,
                          const Assembler *Asm);
 
-  // Queries the GlobalContext for constant pools of the given type
-  // and writes out read-only data sections for those constants. This also
-  // fills the symbol table with labels for each constant pool entry.
+  /// Queries the GlobalContext for constant pools of the given type
+  /// and writes out read-only data sections for those constants. This also
+  /// fills the symbol table with labels for each constant pool entry.
   template <typename ConstType> void writeConstantPool(Type Ty);
 
-  // Populate the symbol table with a list of external/undefined symbols.
+  /// Populate the symbol table with a list of external/undefined symbols.
   void setUndefinedSyms(const ConstantList &UndefSyms);
 
-  // Do final layout and write out the rest of the object file.
-  // Finally, patch up the initial ELF header with the final info.
+  /// Do final layout and write out the rest of the object file.
+  /// Finally, patch up the initial ELF header with the final info.
   void writeNonUserSections();
 
-  // Which type of ELF section a global variable initializer belongs to.
-  // This is used as an array index so should start at 0 and be contiguous.
+  /// Which type of ELF section a global variable initializer belongs to.
+  /// This is used as an array index so should start at 0 and be contiguous.
   enum SectionType { ROData = 0, Data, BSS, NumSectionTypes };
 
 private:
@@ -116,48 +117,48 @@
                    Elf64_Xword ShFlags, Elf64_Xword ShAddralign,
                    Elf64_Xword ShEntsize);
 
-  // Create a relocation section, given the related section
-  // (e.g., .text, .data., .rodata).
+  /// Create a relocation section, given the related section
+  /// (e.g., .text, .data., .rodata).
   ELFRelocationSection *
   createRelocationSection(const ELFSection *RelatedSection);
 
-  // Align the file position before writing out a section's data,
-  // and return the position of the file.
+  /// Align the file position before writing out a section's data,
+  /// and return the position of the file.
   Elf64_Off alignFileOffset(Elf64_Xword Align);
 
-  // Assign an ordering / section numbers to each section.
-  // Fill in other information that is only known near the end
-  // (such as the size, if it wasn't already incrementally updated).
-  // This then collects all sections in the decided order, into one vector,
-  // for conveniently writing out all of the section headers.
+  /// Assign an ordering / section numbers to each section.
+  /// Fill in other information that is only known near the end
+  /// (such as the size, if it wasn't already incrementally updated).
+  /// This then collects all sections in the decided order, into one vector,
+  /// for conveniently writing out all of the section headers.
   void assignSectionNumbersInfo(SectionList &AllSections);
 
-  // This function assigns .foo and .rel.foo consecutive section numbers.
-  // It also sets the relocation section's sh_info field to the related
-  // section's number.
+  /// This function assigns .foo and .rel.foo consecutive section numbers.
+  /// It also sets the relocation section's sh_info field to the related
+  /// section's number.
   template <typename UserSectionList>
   void assignRelSectionNumInPairs(SizeT &CurSectionNumber,
                                   UserSectionList &UserSections,
                                   RelSectionList &RelSections,
                                   SectionList &AllSections);
 
-  // Link the relocation sections to the symbol table.
+  /// Link the relocation sections to the symbol table.
   void assignRelLinkNum(SizeT SymTabNumber, RelSectionList &RelSections);
 
-  // Helper function for writeDataSection. Writes a data section of type
-  // SectionType, given the global variables Vars belonging to that SectionType.
+  /// Helper function for writeDataSection. Writes a data section of type
+  /// SectionType, given the global variables Vars belonging to that SectionType.
   void writeDataOfType(SectionType SectionType,
                        const VariableDeclarationList &Vars,
                        FixupKind RelocationKind,
                        const IceString &SectionSuffix);
 
-  // Write the final relocation sections given the final symbol table.
-  // May also be able to seek around the file and resolve function calls
-  // that are for functions within the same section.
+  /// Write the final relocation sections given the final symbol table.
+  /// May also be able to seek around the file and resolve function calls
+  /// that are for functions within the same section.
   void writeAllRelocationSections();
   void writeRelocationSections(RelSectionList &RelSections);
 
-  // Write the ELF file header with the given information about sections.
+  /// Write the ELF file header with the given information about sections.
   template <bool IsELF64>
   void writeELFHeaderInternal(Elf64_Off SectionHeaderOffset,
                               SizeT SectHeaderStrIndex, SizeT NumSections);
diff --git a/src/IceELFSection.cpp b/src/IceELFSection.cpp
index 8ca5b2d..7893354 100644
--- a/src/IceELFSection.cpp
+++ b/src/IceELFSection.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines how ELF sections are represented.
-//
+///
+/// \file
+/// This file defines how ELF sections are represented.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceELFSection.h"
diff --git a/src/IceELFSection.h b/src/IceELFSection.h
index a79a9fb..0ee3f03 100644
--- a/src/IceELFSection.h
+++ b/src/IceELFSection.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// Representation of ELF sections.
-//
+///
+/// \file
+/// Representation of ELF sections.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEELFSECTION_H
@@ -26,7 +27,7 @@
 class ELFStreamer;
 class ELFStringTableSection;
 
-// Base representation of an ELF section.
+/// Base representation of an ELF section.
 class ELFSection {
   ELFSection() = delete;
   ELFSection(const ELFSection &) = delete;
@@ -35,15 +36,15 @@
 public:
   virtual ~ELFSection() = default;
 
-  // Sentinel value for a section number/index for before the final
-  // section index is actually known. The dummy NULL section will be assigned
-  // number 0, and it is referenced by the dummy 0-th symbol in the symbol
-  // table, so use max() instead of 0.
+  /// Sentinel value for a section number/index for before the final
+  /// section index is actually known. The dummy NULL section will be assigned
+  /// number 0, and it is referenced by the dummy 0-th symbol in the symbol
+  /// table, so use max() instead of 0.
   enum { NoSectionNumber = std::numeric_limits<SizeT>::max() };
 
-  // Constructs an ELF section, filling in fields that will be known
-  // once the *type* of section is decided.  Other fields may be updated
-  // incrementally or only after the program is completely defined.
+  /// Constructs an ELF section, filling in fields that will be known
+  /// once the *type* of section is decided.  Other fields may be updated
+  /// incrementally or only after the program is completely defined.
   ELFSection(const IceString &Name, Elf64_Word ShType, Elf64_Xword ShFlags,
              Elf64_Xword ShAddralign, Elf64_Xword ShEntsize)
       : Name(Name), Header() {
@@ -53,7 +54,7 @@
     Header.sh_entsize = ShEntsize;
   }
 
-  // Set the section number/index after it is finally known.
+  /// Set the section number/index after it is finally known.
   void setNumber(SizeT N) {
     // Should only set the number once: from NoSectionNumber -> N.
     assert(Number == NoSectionNumber);
@@ -79,24 +80,24 @@
 
   Elf64_Xword getSectionAlign() const { return Header.sh_addralign; }
 
-  // Write the section header out with the given streamer.
+  /// Write the section header out with the given streamer.
   template <bool IsELF64> void writeHeader(ELFStreamer &Str);
 
 protected:
-  // Name of the section in convenient string form (instead of a index
-  // into the Section Header String Table, which is not known till later).
+  /// Name of the section in convenient string form (instead of a index
+  /// into the Section Header String Table, which is not known till later).
   const IceString Name;
 
   // The fields of the header. May only be partially initialized, but should
   // be fully initialized before writing.
   Elf64_Shdr Header;
 
-  // The number of the section after laying out sections.
+  /// The number of the section after laying out sections.
   SizeT Number = NoSectionNumber;
 };
 
-// Models text/code sections. Code is written out incrementally and the
-// size of the section is then updated incrementally.
+/// Models text/code sections. Code is written out incrementally and the
+/// size of the section is then updated incrementally.
 class ELFTextSection : public ELFSection {
   ELFTextSection() = delete;
   ELFTextSection(const ELFTextSection &) = delete;
@@ -108,9 +109,9 @@
   void appendData(ELFStreamer &Str, const llvm::StringRef MoreData);
 };
 
-// Models data/rodata sections. Data is written out incrementally and the
-// size of the section is then updated incrementally.
-// Some rodata sections may have fixed entsize and duplicates may be mergeable.
+/// Models data/rodata sections. Data is written out incrementally and the
+/// size of the section is then updated incrementally.
+/// Some rodata sections may have fixed entsize and duplicates may be mergeable.
 class ELFDataSection : public ELFSection {
   ELFDataSection() = delete;
   ELFDataSection(const ELFDataSection &) = delete;
@@ -126,22 +127,22 @@
   void appendRelocationOffset(ELFStreamer &Str, bool IsRela,
                               RelocOffsetT RelocOffset);
 
-  // Pad the next section offset for writing data elements to the requested
-  // alignment. If the section is NOBITS then do not actually write out
-  // the padding and only update the section size.
+  /// Pad the next section offset for writing data elements to the requested
+  /// alignment. If the section is NOBITS then do not actually write out
+  /// the padding and only update the section size.
   void padToAlignment(ELFStreamer &Str, Elf64_Xword Align);
 };
 
-// Model of ELF symbol table entries. Besides keeping track of the fields
-// required for an elf symbol table entry it also tracks the number that
-// represents the symbol's final index in the symbol table.
+/// Model of ELF symbol table entries. Besides keeping track of the fields
+/// required for an elf symbol table entry it also tracks the number that
+/// represents the symbol's final index in the symbol table.
 struct ELFSym {
   Elf64_Sym Sym;
   ELFSection *Section;
   SizeT Number;
 
-  // Sentinel value for symbols that haven't been assigned a number yet.
-  // The dummy 0-th symbol will be assigned number 0, so don't use that.
+  /// Sentinel value for symbols that haven't been assigned a number yet.
+  /// The dummy 0-th symbol will be assigned number 0, so don't use that.
   enum { UnknownNumber = std::numeric_limits<SizeT>::max() };
 
   void setNumber(SizeT N) {
@@ -155,8 +156,8 @@
   }
 };
 
-// Models a symbol table. Symbols may be added up until updateIndices is
-// called. At that point the indices of each symbol will be finalized.
+/// Models a symbol table. Symbols may be added up until updateIndices is
+/// called. At that point the indices of each symbol will be finalized.
 class ELFSymbolTableSection : public ELFSection {
   ELFSymbolTableSection() = delete;
   ELFSymbolTableSection(const ELFSymbolTableSection &) = delete;
@@ -169,16 +170,16 @@
       : ELFSection(Name, ShType, ShFlags, ShAddralign, ShEntsize),
         NullSymbol(nullptr) {}
 
-  // Create initial entry for a symbol when it is defined.
-  // Each entry should only be defined once.
-  // We might want to allow Name to be a dummy name initially, then
-  // get updated to the real thing, since Data initializers are read
-  // before the bitcode's symbol table is read.
+  /// Create initial entry for a symbol when it is defined.
+  /// Each entry should only be defined once.
+  /// We might want to allow Name to be a dummy name initially, then
+  /// get updated to the real thing, since Data initializers are read
+  /// before the bitcode's symbol table is read.
   void createDefinedSym(const IceString &Name, uint8_t Type, uint8_t Binding,
                         ELFSection *Section, RelocOffsetT Offset, SizeT Size);
 
-  // Note that a symbol table entry needs to be created for the given
-  // symbol because it is undefined.
+  /// Note that a symbol table entry needs to be created for the given
+  /// symbol because it is undefined.
   void noteUndefinedSym(const IceString &Name, ELFSection *NullSection);
 
   const ELFSym *findSymbol(const IceString &Name) const;
@@ -212,7 +213,7 @@
   SymMap GlobalSymbols;
 };
 
-// Models a relocation section.
+/// Models a relocation section.
 class ELFRelocationSection : public ELFSection {
   ELFRelocationSection() = delete;
   ELFRelocationSection(const ELFRelocationSection &) = delete;
@@ -230,11 +231,11 @@
     RelatedSection = Section;
   }
 
-  // Track additional relocations which start out relative to offset 0,
-  // but should be adjusted to be relative to BaseOff.
+  /// Track additional relocations which start out relative to offset 0,
+  /// but should be adjusted to be relative to BaseOff.
   void addRelocations(RelocOffsetT BaseOff, const FixupRefList &FixupRefs);
 
-  // Track a single additional relocation.
+  /// Track a single additional relocation.
   void addRelocation(const AssemblerFixup &Fixup) { Fixups.push_back(Fixup); }
 
   size_t getSectionDataSize() const;
@@ -250,12 +251,12 @@
   FixupList Fixups;
 };
 
-// Models a string table.  The user will build the string table by
-// adding strings incrementally.  At some point, all strings should be
-// known and doLayout() should be called. After that, no other
-// strings may be added.  However, the final offsets of the strings
-// can be discovered and used to fill out section headers and symbol
-// table entries.
+/// Models a string table.  The user will build the string table by
+/// adding strings incrementally.  At some point, all strings should be
+/// known and doLayout() should be called. After that, no other
+/// strings may be added.  However, the final offsets of the strings
+/// can be discovered and used to fill out section headers and symbol
+/// table entries.
 class ELFStringTableSection : public ELFSection {
   ELFStringTableSection() = delete;
   ELFStringTableSection(const ELFStringTableSection &) = delete;
@@ -264,18 +265,18 @@
 public:
   using ELFSection::ELFSection;
 
-  // Add a string to the table, in preparation for final layout.
+  /// Add a string to the table, in preparation for final layout.
   void add(const IceString &Str);
 
-  // Finalizes the layout of the string table and fills in the section Data.
+  /// Finalizes the layout of the string table and fills in the section Data.
   void doLayout();
 
-  // The first byte of the string table should be \0, so it is an
-  // invalid index.  Indices start out as unknown until layout is complete.
+  /// The first byte of the string table should be \0, so it is an
+  /// invalid index.  Indices start out as unknown until layout is complete.
   enum { UnknownIndex = 0 };
 
-  // Grabs the final index of a string after layout. Returns UnknownIndex
-  // if the string's index is not found.
+  /// Grabs the final index of a string after layout. Returns UnknownIndex
+  /// if the string's index is not found.
   size_t getIndex(const IceString &Str) const;
 
   llvm::StringRef getSectionData() const {
@@ -289,19 +290,19 @@
 private:
   bool isLaidOut() const { return !StringData.empty(); }
 
-  // Strings can share a string table entry if they share the same
-  // suffix.  E.g., "pop" and "lollipop" can both use the characters
-  // in "lollipop", but "pops" cannot, and "unpop" cannot either.
-  // Though, "pop", "lollipop", and "unpop" share "pop" as the suffix,
-  // "pop" can only share the characters with one of them.
+  /// Strings can share a string table entry if they share the same
+  /// suffix.  E.g., "pop" and "lollipop" can both use the characters
+  /// in "lollipop", but "pops" cannot, and "unpop" cannot either.
+  /// Though, "pop", "lollipop", and "unpop" share "pop" as the suffix,
+  /// "pop" can only share the characters with one of them.
   struct SuffixComparator {
     bool operator()(const IceString &StrA, const IceString &StrB) const;
   };
 
   typedef std::map<IceString, size_t, SuffixComparator> StringToIndexType;
 
-  // Track strings to their index.  Index will be UnknownIndex if not
-  // yet laid out.
+  /// Track strings to their index.  Index will be UnknownIndex if not
+  /// yet laid out.
   StringToIndexType StringToIndexMap;
 
   typedef std::vector<uint8_t> RawDataType;
diff --git a/src/IceELFStreamer.h b/src/IceELFStreamer.h
index 50e4dd7..93051b9 100644
--- a/src/IceELFStreamer.h
+++ b/src/IceELFStreamer.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// Interface for serializing bits for common ELF types (words, extended words,
-// etc.), based on the ELF Class.
-//
+///
+/// \file
+/// Interface for serializing bits for common ELF types (words, extended words,
+/// etc.), based on the ELF Class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEELFSTREAMER_H
@@ -19,8 +20,8 @@
 
 namespace Ice {
 
-// Low level writer that can that can handle ELFCLASS32/64.
-// Little endian only for now.
+/// Low level writer that can that can handle ELFCLASS32/64.
+/// Little endian only for now.
 class ELFStreamer {
   ELFStreamer() = delete;
   ELFStreamer(const ELFStreamer &) = delete;
diff --git a/src/IceFixups.cpp b/src/IceFixups.cpp
index 9301910..de00d23 100644
--- a/src/IceFixups.cpp
+++ b/src/IceFixups.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the AssemblerFixup class, a very basic
-// target-independent representation of a fixup or relocation.
-//
+///
+/// \file
+/// This file implements the AssemblerFixup class, a very basic
+/// target-independent representation of a fixup or relocation.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceFixups.h"
diff --git a/src/IceFixups.h b/src/IceFixups.h
index 9ec72c3..f040918 100644
--- a/src/IceFixups.h
+++ b/src/IceFixups.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares generic fixup types.
-//
+///
+/// \file
+/// This file declares generic fixup types.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEFIXUPS_H
@@ -18,12 +19,12 @@
 
 namespace Ice {
 
-// Each target and container format has a different namespace of relocations.
-// This holds the specific target+container format's relocation number.
+/// Each target and container format has a different namespace of relocations.
+/// This holds the specific target+container format's relocation number.
 typedef uint32_t FixupKind;
 
-// Assembler fixups are positions in generated code/data that hold relocation
-// information that needs to be processed before finalizing the code/data.
+/// Assembler fixups are positions in generated code/data that hold relocation
+/// information that needs to be processed before finalizing the code/data.
 struct AssemblerFixup {
   AssemblerFixup &operator=(const AssemblerFixup &) = delete;
 
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 3473b8c..51ca3f0 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines aspects of the compilation that persist across
-// multiple functions.
-//
+///
+/// \file
+/// This file defines aspects of the compilation that persist across
+/// multiple functions.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceGlobalContext.h"
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index 1e23a36..d19249d 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares aspects of the compilation that persist across
-// multiple functions.
-//
+///
+/// \file
+/// This file declares aspects of the compilation that persist across
+/// multiple functions.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H
@@ -38,7 +39,7 @@
 class EmitterWorkItem;
 class FuncSigType;
 
-// LockedPtr is a way to provide automatically locked access to some object.
+/// LockedPtr is a way to provide automatically locked access to some object.
 template <typename T> class LockedPtr {
   LockedPtr() = delete;
   LockedPtr(const LockedPtr &) = delete;
@@ -65,7 +66,7 @@
   GlobalContext(const GlobalContext &) = delete;
   GlobalContext &operator=(const GlobalContext &) = delete;
 
-  // CodeStats collects rudimentary statistics during translation.
+  /// CodeStats collects rudimentary statistics during translation.
   class CodeStats {
     CodeStats(const CodeStats &) = delete;
     CodeStats &operator=(const CodeStats &) = default;
@@ -102,18 +103,18 @@
     std::array<uint32_t, CS_NUM> Stats;
   };
 
-  // TimerList is a vector of TimerStack objects, with extra methods
-  // to initialize and merge these vectors.
+  /// TimerList is a vector of TimerStack objects, with extra methods
+  /// to initialize and merge these vectors.
   class TimerList : public std::vector<TimerStack> {
     TimerList(const TimerList &) = delete;
     TimerList &operator=(const TimerList &) = delete;
 
   public:
     TimerList() = default;
-    // initInto() initializes a target list of timers based on the
-    // current list.  In particular, it creates the same number of
-    // timers, in the same order, with the same names, but initially
-    // empty of timing data.
+    /// initInto() initializes a target list of timers based on the
+    /// current list.  In particular, it creates the same number of
+    /// timers, in the same order, with the same names, but initially
+    /// empty of timing data.
     void initInto(TimerList &Dest) const {
       if (!BuildDefs::dump())
         return;
@@ -135,8 +136,8 @@
     }
   };
 
-  // ThreadContext contains thread-local data.  This data can be
-  // combined/reduced as needed after all threads complete.
+  /// ThreadContext contains thread-local data.  This data can be
+  /// combined/reduced as needed after all threads complete.
   class ThreadContext {
     ThreadContext(const ThreadContext &) = delete;
     ThreadContext &operator=(const ThreadContext &) = delete;
@@ -149,24 +150,24 @@
   };
 
 public:
-  // The dump stream is a log stream while emit is the stream code
-  // is emitted to. The error stream is strictly for logging errors.
+  /// The dump stream is a log stream while emit is the stream code
+  /// is emitted to. The error stream is strictly for logging errors.
   GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
                 ELFStreamer *ELFStreamer, const ClFlags &Flags);
   ~GlobalContext();
 
-  //
-  // The dump, error, and emit streams need to be used by only one
-  // thread at a time.  This is done by exclusively reserving the
-  // streams via lockStr() and unlockStr().  The OstreamLocker class
-  // can be used to conveniently manage this.
-  //
-  // The model is that a thread grabs the stream lock, then does an
-  // arbitrary amount of work during which far-away callees may grab
-  // the stream and do something with it, and finally the thread
-  // releases the stream lock.  This allows large chunks of output to
-  // be dumped or emitted without risking interleaving from multiple
-  // threads.
+  ///
+  /// The dump, error, and emit streams need to be used by only one
+  /// thread at a time.  This is done by exclusively reserving the
+  /// streams via lockStr() and unlockStr().  The OstreamLocker class
+  /// can be used to conveniently manage this.
+  ///
+  /// The model is that a thread grabs the stream lock, then does an
+  /// arbitrary amount of work during which far-away callees may grab
+  /// the stream and do something with it, and finally the thread
+  /// releases the stream lock.  This allows large chunks of output to
+  /// be dumped or emitted without risking interleaving from multiple
+  /// threads.
   void lockStr() { StrLock.lock(); }
   void unlockStr() { StrLock.unlock(); }
   Ostream &getStrDump() { return *StrDump; }
@@ -177,10 +178,10 @@
     return LockedPtr<ErrorCode>(&ErrorStatus, &ErrorStatusLock);
   }
 
-  // When emitting assembly, we allow a string to be prepended to
-  // names of translated functions.  This makes it easier to create an
-  // execution test against a reference translator like llc, with both
-  // translators using the same bitcode as input.
+  /// When emitting assembly, we allow a string to be prepended to
+  /// names of translated functions.  This makes it easier to create an
+  /// execution test against a reference translator like llc, with both
+  /// translators using the same bitcode as input.
   IceString mangleName(const IceString &Name) const;
 
   // Manage Constants.
@@ -194,18 +195,18 @@
   Constant *getConstantInt64(int64_t ConstantInt64);
   Constant *getConstantFloat(float Value);
   Constant *getConstantDouble(double Value);
-  // Returns a symbolic constant.
+  /// Returns a symbolic constant.
   Constant *getConstantSym(RelocOffsetT Offset, const IceString &Name,
                            bool SuppressMangling);
   Constant *getConstantExternSym(const IceString &Name);
-  // Returns an undef.
+  /// Returns an undef.
   Constant *getConstantUndef(Type Ty);
-  // Returns a zero value.
+  /// Returns a zero value.
   Constant *getConstantZero(Type Ty);
-  // getConstantPool() returns a copy of the constant pool for
-  // constants of a given type.
+  /// getConstantPool() returns a copy of the constant pool for
+  /// constants of a given type.
   ConstantList getConstantPool(Type Ty);
-  // Returns a copy of the list of external symbols.
+  /// Returns a copy of the list of external symbols.
   ConstantList getConstantExternSyms();
 
   const ClFlags &getFlags() const { return Flags; }
@@ -214,11 +215,11 @@
     return getFlags().getDisableIRGeneration();
   }
 
-  // Allocate data of type T using the global allocator. We allow entities
-  // allocated from this global allocator to be either trivially or
-  // non-trivially destructible. We optimize the case when T is trivially
-  // destructible by not registering a destructor. Destructors will be invoked
-  // during GlobalContext destruction in the reverse object creation order.
+  /// Allocate data of type T using the global allocator. We allow entities
+  /// allocated from this global allocator to be either trivially or
+  /// non-trivially destructible. We optimize the case when T is trivially
+  /// destructible by not registering a destructor. Destructors will be invoked
+  /// during GlobalContext destruction in the reverse object creation order.
   template <typename T>
   typename std::enable_if<std::is_trivially_destructible<T>::value, T>::type *
   allocate() {
@@ -241,7 +242,7 @@
 
   ELFObjectWriter *getObjectWriter() const { return ObjectWriter.get(); }
 
-  // Reset stats at the beginning of a function.
+  /// Reset stats at the beginning of a function.
   void resetStats() {
     if (BuildDefs::dump())
       ICE_TLS_GET_FIELD(TLS)->StatsFunction.reset();
@@ -283,7 +284,7 @@
     Tls->StatsCumulative.update(CodeStats::CS_NumFills);
   }
 
-  // Number of Randomized or Pooled Immediates
+  /// Number of Randomized or Pooled Immediates
   void statsUpdateRPImms() {
     if (!getFlags().getDumpStats())
       return;
@@ -292,44 +293,44 @@
     Tls->StatsCumulative.update(CodeStats::CS_NumRPImms);
   }
 
-  // These are predefined TimerStackIdT values.
+  /// These are predefined TimerStackIdT values.
   enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num };
 
-  // newTimerStackID() creates a new TimerStack in the global space.
-  // It does not affect any TimerStack objects in TLS.
+  /// newTimerStackID() creates a new TimerStack in the global space.
+  /// It does not affect any TimerStack objects in TLS.
   TimerStackIdT newTimerStackID(const IceString &Name);
-  // dumpTimers() dumps the global timer data.  As such, one probably
-  // wants to call mergeTimerStacks() as a prerequisite.
+  /// dumpTimers() dumps the global timer data.  As such, one probably
+  /// wants to call mergeTimerStacks() as a prerequisite.
   void dumpTimers(TimerStackIdT StackID = TSK_Default,
                   bool DumpCumulative = true);
-  // The following methods affect only the calling thread's TLS timer
-  // data.
+  /// The following methods affect only the calling thread's TLS timer
+  /// data.
   TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
   void pushTimer(TimerIdT ID, TimerStackIdT StackID);
   void popTimer(TimerIdT ID, TimerStackIdT StackID);
   void resetTimer(TimerStackIdT StackID);
   void setTimerName(TimerStackIdT StackID, const IceString &NewName);
 
-  // This is the first work item sequence number that the parser
-  // produces, and correspondingly the first sequence number that the
-  // emitter thread will wait for.  Start numbering at 1 to leave room
-  // for a sentinel, in case e.g. we wish to inject items with a
-  // special sequence number that may be executed out of order.
+  /// This is the first work item sequence number that the parser
+  /// produces, and correspondingly the first sequence number that the
+  /// emitter thread will wait for.  Start numbering at 1 to leave room
+  /// for a sentinel, in case e.g. we wish to inject items with a
+  /// special sequence number that may be executed out of order.
   static uint32_t getFirstSequenceNumber() { return 1; }
-  // Adds a newly parsed and constructed function to the Cfg work
-  // queue.  Notifies any idle workers that a new function is
-  // available for translating.  May block if the work queue is too
-  // large, in order to control memory footprint.
+  /// Adds a newly parsed and constructed function to the Cfg work
+  /// queue.  Notifies any idle workers that a new function is
+  /// available for translating.  May block if the work queue is too
+  /// large, in order to control memory footprint.
   void optQueueBlockingPush(std::unique_ptr<Cfg> Func);
-  // Takes a Cfg from the work queue for translating.  May block if
-  // the work queue is currently empty.  Returns nullptr if there is
-  // no more work - the queue is empty and either end() has been
-  // called or the Sequential flag was set.
+  /// Takes a Cfg from the work queue for translating.  May block if
+  /// the work queue is currently empty.  Returns nullptr if there is
+  /// no more work - the queue is empty and either end() has been
+  /// called or the Sequential flag was set.
   std::unique_ptr<Cfg> optQueueBlockingPop();
-  // Notifies that no more work will be added to the work queue.
+  /// Notifies that no more work will be added to the work queue.
   void optQueueNotifyEnd() { OptQ.notifyEnd(); }
 
-  // Emit file header for output file.
+  /// Emit file header for output file.
   void emitFileHeader();
 
   void lowerConstants();
@@ -394,44 +395,44 @@
     }
   }
 
-  // Translation thread startup routine.
+  /// Translation thread startup routine.
   void translateFunctionsWrapper(ThreadContext *MyTLS) {
     ICE_TLS_SET_FIELD(TLS, MyTLS);
     translateFunctions();
   }
-  // Translate functions from the Cfg queue until the queue is empty.
+  /// Translate functions from the Cfg queue until the queue is empty.
   void translateFunctions();
 
-  // Emitter thread startup routine.
+  /// Emitter thread startup routine.
   void emitterWrapper(ThreadContext *MyTLS) {
     ICE_TLS_SET_FIELD(TLS, MyTLS);
     emitItems();
   }
-  // Emit functions and global initializers from the emitter queue
-  // until the queue is empty.
+  /// Emit functions and global initializers from the emitter queue
+  /// until the queue is empty.
   void emitItems();
 
-  // Uses DataLowering to lower Globals. Side effects:
-  //  - discards the initializer list for the global variable in Globals.
-  //  - clears the Globals array.
+  /// Uses DataLowering to lower Globals. Side effects:
+  ///  - discards the initializer list for the global variable in Globals.
+  ///  - clears the Globals array.
   void lowerGlobals(const IceString &SectionSuffix);
 
-  // Lowers the profile information.
+  /// Lowers the profile information.
   void lowerProfileData();
 
-  // Utility function to match a symbol name against a match string.
-  // This is used in a few cases where we want to take some action on
-  // a particular function or symbol based on a command-line argument,
-  // such as changing the verbose level for a particular function.  An
-  // empty Match argument means match everything.  Returns true if
-  // there is a match.
+  /// Utility function to match a symbol name against a match string.
+  /// This is used in a few cases where we want to take some action on
+  /// a particular function or symbol based on a command-line argument,
+  /// such as changing the verbose level for a particular function.  An
+  /// empty Match argument means match everything.  Returns true if
+  /// there is a match.
   static bool matchSymbolName(const IceString &SymbolName,
                               const IceString &Match) {
     return Match.empty() || Match == SymbolName;
   }
 
-  // Return the randomization cookie for diversification.
-  // Initialize the cookie if necessary
+  /// Return the randomization cookie for diversification.
+  /// Initialize the cookie if necessary
   uint32_t getRandomizationCookie() const { return RandomizationCookie; }
 
 private:
@@ -470,12 +471,12 @@
   TimerList Timers;
 
   ICE_CACHELINE_BOUNDARY;
-  // StrLock is a global lock on the dump and emit output streams.
+  /// StrLock is a global lock on the dump and emit output streams.
   typedef std::mutex StrLockType;
   StrLockType StrLock;
-  Ostream *StrDump;  // Stream for dumping / diagnostics
-  Ostream *StrEmit;  // Stream for code emission
-  Ostream *StrError; // Stream for logging errors.
+  Ostream *StrDump;  /// Stream for dumping / diagnostics
+  Ostream *StrEmit;  /// Stream for code emission
+  Ostream *StrError; /// Stream for logging errors.
 
   ICE_CACHELINE_BOUNDARY;
 
@@ -490,8 +491,8 @@
   // emitItems(), or in IceCompiler::run before the compilation is over.)
   // TODO(jpp): move to EmitterContext.
   std::unique_ptr<TargetDataLowering> DataLowering;
-  // If !HasEmittedCode, SubZero will accumulate all Globals (which are "true"
-  // program global variables) until the first code WorkItem is seen.
+  /// If !HasEmittedCode, SubZero will accumulate all Globals (which are "true"
+  /// program global variables) until the first code WorkItem is seen.
   // TODO(jpp): move to EmitterContext.
   bool HasSeenCode = false;
   // TODO(jpp): move to EmitterContext.
@@ -548,9 +549,9 @@
   static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); }
 };
 
-// Helper class to push and pop a timer marker.  The constructor
-// pushes a marker, and the destructor pops it.  This is for
-// convenient timing of regions of code.
+/// Helper class to push and pop a timer marker.  The constructor
+/// pushes a marker, and the destructor pops it.  This is for
+/// convenient timing of regions of code.
 class TimerMarker {
   TimerMarker() = delete;
   TimerMarker(const TimerMarker &) = delete;
@@ -585,8 +586,8 @@
   bool Active = false;
 };
 
-// Helper class for locking the streams and then automatically
-// unlocking them.
+/// Helper class for locking the streams and then automatically
+/// unlocking them.
 class OstreamLocker {
 private:
   OstreamLocker() = delete;
diff --git a/src/IceGlobalInits.cpp b/src/IceGlobalInits.cpp
index 1b5085c..c95456c 100644
--- a/src/IceGlobalInits.cpp
+++ b/src/IceGlobalInits.cpp
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the notion of function declarations, global
-// variable declarations, and the corresponding variable initializers
-// in Subzero.
-//
+///
+/// \file
+/// This file implements the notion of function declarations, global
+/// variable declarations, and the corresponding variable initializers
+/// in Subzero.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceGlobalInits.h"
diff --git a/src/IceGlobalInits.h b/src/IceGlobalInits.h
index 08f4fe3..fb86f7c 100644
--- a/src/IceGlobalInits.h
+++ b/src/IceGlobalInits.h
@@ -6,12 +6,13 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the representation of function declarations,
-// global variable declarations, and the corresponding variable
-// initializers in Subzero. Global variable initializers are
-// represented as a sequence of simple initializers.
-//
+///
+/// \file
+/// This file declares the representation of function declarations,
+/// global variable declarations, and the corresponding variable
+/// initializers in Subzero. Global variable initializers are
+/// represented as a sequence of simple initializers.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEGLOBALINITS_H
@@ -79,8 +80,8 @@
   /// Returns true if when emitting names, we should suppress mangling.
   virtual bool getSuppressMangling() const = 0;
 
-  // Mangles name for cross tests, unless external and not defined locally
-  // (so that relocations accross pnacl-sz and pnacl-llc will work).
+  /// Mangles name for cross tests, unless external and not defined locally
+  /// (so that relocations accross pnacl-sz and pnacl-llc will work).
   virtual IceString mangleName(GlobalContext *Ctx) const {
     return getSuppressMangling() ? Name : Ctx->mangleName(Name);
   }
@@ -95,8 +96,8 @@
   llvm::GlobalValue::LinkageTypes Linkage;
 };
 
-// Models a function declaration. This includes the type signature of
-// the function, its calling conventions, and its linkage.
+/// Models a function declaration. This includes the type signature of
+/// the function, its calling conventions, and its linkage.
 class FunctionDeclaration : public GlobalDeclaration {
   FunctionDeclaration() = delete;
   FunctionDeclaration(const FunctionDeclaration &) = delete;
@@ -113,7 +114,7 @@
   }
   const FuncSigType &getSignature() const { return Signature; }
   llvm::CallingConv::ID getCallingConv() const { return CallingConv; }
-  // isProto implies that there isn't a (local) definition for the function.
+  /// isProto implies that there isn't a (local) definition for the function.
   bool isProto() const { return IsProto; }
   static bool classof(const GlobalDeclaration *Addr) {
     return Addr->getKind() == FunctionDeclarationKind;
@@ -169,7 +170,7 @@
     const InitializerKind Kind;
   };
 
-  // Models the data in a data initializer.
+  /// Models the data in a data initializer.
   typedef std::vector<char> DataVecType;
 
   /// Defines a sequence of byte values as a data initializer.
@@ -205,7 +206,7 @@
         Contents[i] = Str[i];
     }
 
-    // The byte contents of the data initializer.
+    /// The byte contents of the data initializer.
     DataVecType Contents;
   };
 
@@ -230,7 +231,7 @@
     explicit ZeroInitializer(SizeT Size)
         : Initializer(ZeroInitializerKind), Size(Size) {}
 
-    // The number of bytes to be zero initialized.
+    /// The number of bytes to be zero initialized.
     SizeT Size;
   };
 
@@ -262,7 +263,7 @@
           Offset(Offset) {} // The global declaration used in the relocation.
 
     const GlobalDeclaration *Declaration;
-    // The offset to add to the relocation.
+    /// The offset to add to the relocation.
     const RelocOffsetT Offset;
   };
 
@@ -324,14 +325,14 @@
   void discardInitializers() { Initializers = nullptr; }
 
 private:
-  // list of initializers for the declared variable.
+  /// List of initializers for the declared variable.
   std::unique_ptr<InitializerListType> Initializers;
   bool HasInitializer;
-  // The alignment of the declared variable.
+  /// The alignment of the declared variable.
   uint32_t Alignment;
-  // True if a declared (global) constant.
+  /// True if a declared (global) constant.
   bool IsConstant;
-  // If set to true, force getSuppressMangling() to return true.
+  /// If set to true, force getSuppressMangling() to return true.
   bool ForceSuppressMangling;
 
   VariableDeclaration()
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 317232a..ecfb1b6 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Inst class, primarily the various
-// subclass constructors and dump routines.
-//
+///
+/// \file
+/// This file implements the Inst class, primarily the various
+/// subclass constructors and dump routines.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceInst.h"
diff --git a/src/IceInst.h b/src/IceInst.h
index 7aad054..cfa6dd3 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the Inst class and its target-independent
-// subclasses, which represent the high-level Vanilla ICE instructions
-// and map roughly 1:1 to LLVM instructions.
-//
+///
+/// \file
+/// This file declares the Inst class and its target-independent
+/// subclasses, which represent the high-level Vanilla ICE instructions
+/// and map roughly 1:1 to LLVM instructions.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEINST_H
@@ -31,10 +32,10 @@
 
 namespace Ice {
 
-// Base instruction class for ICE.  Inst has two subclasses:
-// InstHighLevel and InstTarget.  High-level ICE instructions inherit
-// from InstHighLevel, and low-level (target-specific) ICE
-// instructions inherit from InstTarget.
+/// Base instruction class for ICE.  Inst has two subclasses:
+/// InstHighLevel and InstTarget.  High-level ICE instructions inherit
+/// from InstHighLevel, and low-level (target-specific) ICE
+/// instructions inherit from InstTarget.
 class Inst : public llvm::ilist_node<Inst> {
   Inst() = delete;
   Inst(const Inst &) = delete;
@@ -105,8 +106,8 @@
   bool isLastUse(const Operand *Src) const;
   void spliceLivenessInfo(Inst *OrigInst, Inst *SpliceAssn);
 
-  // Returns a list of out-edges corresponding to a terminator
-  // instruction, which is the last instruction of the block.
+  /// Returns a list of out-edges corresponding to a terminator
+  /// instruction, which is the last instruction of the block.
   virtual NodeList getTerminatorEdges() const {
     // All valid terminator instructions override this method.  For
     // the default implementation, we assert in case some CfgNode
@@ -116,10 +117,10 @@
     return NodeList();
   }
   virtual bool isUnconditionalBranch() const { return false; }
-  // If the instruction is a branch-type instruction with OldNode as a
-  // target, repoint it to NewNode and return true, otherwise return
-  // false.  Only repoint one instance, even if the instruction has
-  // multiple instances of OldNode as a target.
+  /// If the instruction is a branch-type instruction with OldNode as a
+  /// target, repoint it to NewNode and return true, otherwise return
+  /// false.  Only repoint one instance, even if the instruction has
+  /// multiple instances of OldNode as a target.
   virtual bool repointEdge(CfgNode *OldNode, CfgNode *NewNode) {
     (void)OldNode;
     (void)NewNode;
@@ -129,19 +130,19 @@
   virtual bool isSimpleAssign() const { return false; }
 
   void livenessLightweight(Cfg *Func, LivenessBV &Live);
-  // Calculates liveness for this instruction.  Returns true if this
-  // instruction is (tentatively) still live and should be retained,
-  // and false if this instruction is (tentatively) dead and should be
-  // deleted.  The decision is tentative until the liveness dataflow
-  // algorithm has converged, and then a separate pass permanently
-  // deletes dead instructions.
+  /// Calculates liveness for this instruction.  Returns true if this
+  /// instruction is (tentatively) still live and should be retained,
+  /// and false if this instruction is (tentatively) dead and should be
+  /// deleted.  The decision is tentative until the liveness dataflow
+  /// algorithm has converged, and then a separate pass permanently
+  /// deletes dead instructions.
   bool liveness(InstNumberT InstNumber, LivenessBV &Live, Liveness *Liveness,
                 LiveBeginEndMap *LiveBegin, LiveBeginEndMap *LiveEnd);
 
-  // Get the number of native instructions that this instruction
-  // ultimately emits.  By default, high-level instructions don't
-  // result in any native instructions, and a target-specific
-  // instruction results in a single native instruction.
+  /// Get the number of native instructions that this instruction
+  /// ultimately emits.  By default, high-level instructions don't
+  /// result in any native instructions, and a target-specific
+  /// instruction results in a single native instruction.
   virtual uint32_t getEmitInstCount() const { return 0; }
   // TODO(stichnot): Change Inst back to abstract once the g++ build
   // issue is fixed.  llvm::ilist<Ice::Inst> doesn't work under g++
@@ -179,26 +180,26 @@
       LiveRangesEnded |= (((LREndedBits)1u) << VarIndex);
   }
   void resetLastUses() { LiveRangesEnded = 0; }
-  // The destroy() method lets the instruction cleanly release any
-  // memory that was allocated via the Cfg's allocator.
+  /// The destroy() method lets the instruction cleanly release any
+  /// memory that was allocated via the Cfg's allocator.
   virtual void destroy(Cfg *Func) { Func->deallocateArrayOf<Operand *>(Srcs); }
 
   const InstKind Kind;
-  // Number is the instruction number for describing live ranges.
+  /// Number is the instruction number for describing live ranges.
   InstNumberT Number;
-  // Deleted means irrevocably deleted.
+  /// Deleted means irrevocably deleted.
   bool Deleted = false;
-  // Dead means one of two things depending on context: (1) pending
-  // deletion after liveness analysis converges, or (2) marked for
-  // deletion during lowering due to a folded bool operation.
+  /// Dead means one of two things depending on context: (1) pending
+  /// deletion after liveness analysis converges, or (2) marked for
+  /// deletion during lowering due to a folded bool operation.
   bool Dead = false;
-  // HasSideEffects means the instruction is something like a function
-  // call or a volatile load that can't be removed even if its Dest
-  // variable is not live.
+  /// HasSideEffects means the instruction is something like a function
+  /// call or a volatile load that can't be removed even if its Dest
+  /// variable is not live.
   bool HasSideEffects = false;
-  // IsDestNonKillable means that liveness analysis shouldn't consider
-  // this instruction to kill the Dest variable.  This is used when
-  // lowering produces two assignments to the same variable.
+  /// IsDestNonKillable means that liveness analysis shouldn't consider
+  /// this instruction to kill the Dest variable.  This is used when
+  /// lowering produces two assignments to the same variable.
   bool IsDestNonKillable = false;
 
   Variable *Dest;
@@ -206,15 +207,15 @@
   SizeT NumSrcs = 0;
   Operand **Srcs;
 
-  // LiveRangesEnded marks which Variables' live ranges end in this
-  // instruction.  An instruction can have an arbitrary number of
-  // source operands (e.g. a call instruction), and each source
-  // operand can contain 0 or 1 Variable (and target-specific operands
-  // could contain more than 1 Variable).  All the variables in an
-  // instruction are conceptually flattened and each variable is
-  // mapped to one bit position of the LiveRangesEnded bit vector.
-  // Only the first CHAR_BIT * sizeof(LREndedBits) variables are
-  // tracked this way.
+  /// LiveRangesEnded marks which Variables' live ranges end in this
+  /// instruction.  An instruction can have an arbitrary number of
+  /// source operands (e.g. a call instruction), and each source
+  /// operand can contain 0 or 1 Variable (and target-specific operands
+  /// could contain more than 1 Variable).  All the variables in an
+  /// instruction are conceptually flattened and each variable is
+  /// mapped to one bit position of the LiveRangesEnded bit vector.
+  /// Only the first CHAR_BIT * sizeof(LREndedBits) variables are
+  /// tracked this way.
   typedef uint32_t LREndedBits; // only first 32 src operands tracked, sorry
   LREndedBits LiveRangesEnded;
 };
@@ -235,9 +236,9 @@
   }
 };
 
-// Alloca instruction.  This captures the size in bytes as getSrc(0),
-// and the required alignment in bytes.  The alignment must be either
-// 0 (no alignment required) or a power of 2.
+/// Alloca instruction.  This captures the size in bytes as getSrc(0),
+/// and the required alignment in bytes.  The alignment must be either
+/// 0 (no alignment required) or a power of 2.
 class InstAlloca : public InstHighLevel {
   InstAlloca() = delete;
   InstAlloca(const InstAlloca &) = delete;
@@ -261,8 +262,8 @@
   const uint32_t AlignInBytes;
 };
 
-// Binary arithmetic instruction.  The source operands are captured in
-// getSrc(0) and getSrc(1).
+/// Binary arithmetic instruction.  The source operands are captured in
+/// getSrc(0) and getSrc(1).
 class InstArithmetic : public InstHighLevel {
   InstArithmetic() = delete;
   InstArithmetic(const InstArithmetic &) = delete;
@@ -296,12 +297,12 @@
   const OpKind Op;
 };
 
-// Assignment instruction.  The source operand is captured in
-// getSrc(0).  This is not part of the LLVM bitcode, but is a useful
-// abstraction for some of the lowering.  E.g., if Phi instruction
-// lowering happens before target lowering, or for representing an
-// Inttoptr instruction, or as an intermediate step for lowering a
-// Load instruction.
+/// Assignment instruction.  The source operand is captured in
+/// getSrc(0).  This is not part of the LLVM bitcode, but is a useful
+/// abstraction for some of the lowering.  E.g., if Phi instruction
+/// lowering happens before target lowering, or for representing an
+/// Inttoptr instruction, or as an intermediate step for lowering a
+/// Load instruction.
 class InstAssign : public InstHighLevel {
   InstAssign() = delete;
   InstAssign(const InstAssign &) = delete;
@@ -319,22 +320,22 @@
   InstAssign(Cfg *Func, Variable *Dest, Operand *Source);
 };
 
-// Branch instruction.  This represents both conditional and
-// unconditional branches.
+/// Branch instruction.  This represents both conditional and
+/// unconditional branches.
 class InstBr : public InstHighLevel {
   InstBr() = delete;
   InstBr(const InstBr &) = delete;
   InstBr &operator=(const InstBr &) = delete;
 
 public:
-  // Create a conditional branch.  If TargetTrue==TargetFalse, it is
-  // optimized to an unconditional branch.
+  /// Create a conditional branch.  If TargetTrue==TargetFalse, it is
+  /// optimized to an unconditional branch.
   static InstBr *create(Cfg *Func, Operand *Source, CfgNode *TargetTrue,
                         CfgNode *TargetFalse) {
     return new (Func->allocate<InstBr>())
         InstBr(Func, Source, TargetTrue, TargetFalse);
   }
-  // Create an unconditional branch.
+  /// Create an unconditional branch.
   static InstBr *create(Cfg *Func, CfgNode *Target) {
     return new (Func->allocate<InstBr>()) InstBr(Func, Target);
   }
@@ -356,17 +357,17 @@
   static bool classof(const Inst *Inst) { return Inst->getKind() == Br; }
 
 private:
-  // Conditional branch
+  /// Conditional branch
   InstBr(Cfg *Func, Operand *Source, CfgNode *TargetTrue, CfgNode *TargetFalse);
-  // Unconditional branch
+  /// Unconditional branch
   InstBr(Cfg *Func, CfgNode *Target);
 
-  CfgNode *TargetFalse; // Doubles as unconditional branch target
-  CfgNode *TargetTrue;  // nullptr if unconditional branch
+  CfgNode *TargetFalse; /// Doubles as unconditional branch target
+  CfgNode *TargetTrue;  /// nullptr if unconditional branch
 };
 
-// Call instruction.  The call target is captured as getSrc(0), and
-// arg I is captured as getSrc(I+1).
+/// Call instruction.  The call target is captured as getSrc(0), and
+/// arg I is captured as getSrc(I+1).
 class InstCall : public InstHighLevel {
   InstCall() = delete;
   InstCall(const InstCall &) = delete;
@@ -375,9 +376,9 @@
 public:
   static InstCall *create(Cfg *Func, SizeT NumArgs, Variable *Dest,
                           Operand *CallTarget, bool HasTailCall) {
-    // Set HasSideEffects to true so that the call instruction can't be
-    // dead-code eliminated. IntrinsicCalls can override this if the
-    // particular intrinsic is deletable and has no side-effects.
+    /// Set HasSideEffects to true so that the call instruction can't be
+    /// dead-code eliminated. IntrinsicCalls can override this if the
+    /// particular intrinsic is deletable and has no side-effects.
     const bool HasSideEffects = true;
     const InstKind Kind = Inst::Call;
     return new (Func->allocate<InstCall>()) InstCall(
@@ -404,7 +405,7 @@
   bool HasTailCall;
 };
 
-// Cast instruction (a.k.a. conversion operation).
+/// Cast instruction (a.k.a. conversion operation).
 class InstCast : public InstHighLevel {
   InstCast() = delete;
   InstCast(const InstCast &) = delete;
@@ -435,7 +436,7 @@
   const OpKind CastKind;
 };
 
-// ExtractElement instruction.
+/// ExtractElement instruction.
 class InstExtractElement : public InstHighLevel {
   InstExtractElement() = delete;
   InstExtractElement(const InstExtractElement &) = delete;
@@ -458,8 +459,8 @@
                      Operand *Source2);
 };
 
-// Floating-point comparison instruction.  The source operands are
-// captured in getSrc(0) and getSrc(1).
+/// Floating-point comparison instruction.  The source operands are
+/// captured in getSrc(0) and getSrc(1).
 class InstFcmp : public InstHighLevel {
   InstFcmp() = delete;
   InstFcmp(const InstFcmp &) = delete;
@@ -489,8 +490,8 @@
   const FCond Condition;
 };
 
-// Integer comparison instruction.  The source operands are captured
-// in getSrc(0) and getSrc(1).
+/// Integer comparison instruction.  The source operands are captured
+/// in getSrc(0) and getSrc(1).
 class InstIcmp : public InstHighLevel {
   InstIcmp() = delete;
   InstIcmp(const InstIcmp &) = delete;
@@ -520,7 +521,7 @@
   const ICond Condition;
 };
 
-// InsertElement instruction.
+/// InsertElement instruction.
 class InstInsertElement : public InstHighLevel {
   InstInsertElement() = delete;
   InstInsertElement(const InstInsertElement &) = delete;
@@ -543,8 +544,8 @@
                     Operand *Source2, Operand *Source3);
 };
 
-// Call to an intrinsic function.  The call target is captured as getSrc(0),
-// and arg I is captured as getSrc(I+1).
+/// Call to an intrinsic function.  The call target is captured as getSrc(0),
+/// and arg I is captured as getSrc(I+1).
 class InstIntrinsicCall : public InstCall {
   InstIntrinsicCall() = delete;
   InstIntrinsicCall(const InstIntrinsicCall &) = delete;
@@ -573,7 +574,7 @@
   const Intrinsics::IntrinsicInfo Info;
 };
 
-// Load instruction.  The source address is captured in getSrc(0).
+/// Load instruction.  The source address is captured in getSrc(0).
 class InstLoad : public InstHighLevel {
   InstLoad() = delete;
   InstLoad(const InstLoad &) = delete;
@@ -594,8 +595,8 @@
   InstLoad(Cfg *Func, Variable *Dest, Operand *SourceAddr);
 };
 
-// Phi instruction.  For incoming edge I, the node is Labels[I] and
-// the Phi source operand is getSrc(I).
+/// Phi instruction.  For incoming edge I, the node is Labels[I] and
+/// the Phi source operand is getSrc(I).
 class InstPhi : public InstHighLevel {
   InstPhi() = delete;
   InstPhi(const InstPhi &) = delete;
@@ -621,15 +622,15 @@
     Inst::destroy(Func);
   }
 
-  // Labels[] duplicates the InEdges[] information in the enclosing
-  // CfgNode, but the Phi instruction is created before InEdges[]
-  // is available, so it's more complicated to share the list.
+  /// Labels[] duplicates the InEdges[] information in the enclosing
+  /// CfgNode, but the Phi instruction is created before InEdges[]
+  /// is available, so it's more complicated to share the list.
   CfgNode **Labels;
 };
 
-// Ret instruction.  The return value is captured in getSrc(0), but if
-// there is no return value (void-type function), then
-// getSrcSize()==0 and hasRetValue()==false.
+/// Ret instruction.  The return value is captured in getSrc(0), but if
+/// there is no return value (void-type function), then
+/// getSrcSize()==0 and hasRetValue()==false.
 class InstRet : public InstHighLevel {
   InstRet() = delete;
   InstRet(const InstRet &) = delete;
@@ -652,7 +653,7 @@
   InstRet(Cfg *Func, Operand *RetValue);
 };
 
-// Select instruction.  The condition, true, and false operands are captured.
+/// Select instruction.  The condition, true, and false operands are captured.
 class InstSelect : public InstHighLevel {
   InstSelect() = delete;
   InstSelect(const InstSelect &) = delete;
@@ -675,8 +676,8 @@
              Operand *Source2);
 };
 
-// Store instruction.  The address operand is captured, along with the
-// data operand to be stored into the address.
+/// Store instruction.  The address operand is captured, along with the
+/// data operand to be stored into the address.
 class InstStore : public InstHighLevel {
   InstStore() = delete;
   InstStore(const InstStore &) = delete;
@@ -700,8 +701,8 @@
   InstStore(Cfg *Func, Operand *Data, Operand *Addr);
 };
 
-// Switch instruction.  The single source operand is captured as
-// getSrc(0).
+/// Switch instruction.  The single source operand is captured as
+/// getSrc(0).
 class InstSwitch : public InstHighLevel {
   InstSwitch() = delete;
   InstSwitch(const InstSwitch &) = delete;
@@ -739,13 +740,13 @@
   }
 
   CfgNode *LabelDefault;
-  SizeT NumCases;   // not including the default case
-  uint64_t *Values; // size is NumCases
-  CfgNode **Labels; // size is NumCases
+  SizeT NumCases;   /// not including the default case
+  uint64_t *Values; /// size is NumCases
+  CfgNode **Labels; /// size is NumCases
 };
 
-// Unreachable instruction.  This is a terminator instruction with no
-// operands.
+/// Unreachable instruction.  This is a terminator instruction with no
+/// operands.
 class InstUnreachable : public InstHighLevel {
   InstUnreachable() = delete;
   InstUnreachable(const InstUnreachable &) = delete;
@@ -765,8 +766,8 @@
   explicit InstUnreachable(Cfg *Func);
 };
 
-// BundleLock instruction.  There are no operands.  Contains an option
-// indicating whether align_to_end is specified.
+/// BundleLock instruction.  There are no operands.  Contains an option
+/// indicating whether align_to_end is specified.
 class InstBundleLock : public InstHighLevel {
   InstBundleLock() = delete;
   InstBundleLock(const InstBundleLock &) = delete;
@@ -791,7 +792,7 @@
   InstBundleLock(Cfg *Func, Option BundleOption);
 };
 
-// BundleUnlock instruction.  There are no operands.
+/// BundleUnlock instruction.  There are no operands.
 class InstBundleUnlock : public InstHighLevel {
   InstBundleUnlock() = delete;
   InstBundleUnlock(const InstBundleUnlock &) = delete;
@@ -812,18 +813,18 @@
   explicit InstBundleUnlock(Cfg *Func);
 };
 
-// FakeDef instruction.  This creates a fake definition of a variable,
-// which is how we represent the case when an instruction produces
-// multiple results.  This doesn't happen with high-level ICE
-// instructions, but might with lowered instructions.  For example,
-// this would be a way to represent condition flags being modified by
-// an instruction.
-//
-// It's generally useful to set the optional source operand to be the
-// dest variable of the instruction that actually produces the FakeDef
-// dest.  Otherwise, the original instruction could be dead-code
-// eliminated if its dest operand is unused, and therefore the FakeDef
-// dest wouldn't be properly initialized.
+/// FakeDef instruction.  This creates a fake definition of a variable,
+/// which is how we represent the case when an instruction produces
+/// multiple results.  This doesn't happen with high-level ICE
+/// instructions, but might with lowered instructions.  For example,
+/// this would be a way to represent condition flags being modified by
+/// an instruction.
+///
+/// It's generally useful to set the optional source operand to be the
+/// dest variable of the instruction that actually produces the FakeDef
+/// dest.  Otherwise, the original instruction could be dead-code
+/// eliminated if its dest operand is unused, and therefore the FakeDef
+/// dest wouldn't be properly initialized.
 class InstFakeDef : public InstHighLevel {
   InstFakeDef() = delete;
   InstFakeDef(const InstFakeDef &) = delete;
@@ -843,11 +844,11 @@
   InstFakeDef(Cfg *Func, Variable *Dest, Variable *Src);
 };
 
-// FakeUse instruction.  This creates a fake use of a variable, to
-// keep the instruction that produces that variable from being
-// dead-code eliminated.  This is useful in a variety of lowering
-// situations.  The FakeUse instruction has no dest, so it can itself
-// never be dead-code eliminated.
+/// FakeUse instruction.  This creates a fake use of a variable, to
+/// keep the instruction that produces that variable from being
+/// dead-code eliminated.  This is useful in a variety of lowering
+/// situations.  The FakeUse instruction has no dest, so it can itself
+/// never be dead-code eliminated.
 class InstFakeUse : public InstHighLevel {
   InstFakeUse() = delete;
   InstFakeUse(const InstFakeUse &) = delete;
@@ -866,16 +867,16 @@
   InstFakeUse(Cfg *Func, Variable *Src);
 };
 
-// FakeKill instruction.  This "kills" a set of variables by modeling
-// a trivial live range at this instruction for each (implicit)
-// variable.  The primary use is to indicate that scratch registers
-// are killed after a call, so that the register allocator won't
-// assign a scratch register to a variable whose live range spans a
-// call.
-//
-// The FakeKill instruction also holds a pointer to the instruction
-// that kills the set of variables, so that if that linked instruction
-// gets dead-code eliminated, the FakeKill instruction will as well.
+/// FakeKill instruction.  This "kills" a set of variables by modeling
+/// a trivial live range at this instruction for each (implicit)
+/// variable.  The primary use is to indicate that scratch registers
+/// are killed after a call, so that the register allocator won't
+/// assign a scratch register to a variable whose live range spans a
+/// call.
+///
+/// The FakeKill instruction also holds a pointer to the instruction
+/// that kills the set of variables, so that if that linked instruction
+/// gets dead-code eliminated, the FakeKill instruction will as well.
 class InstFakeKill : public InstHighLevel {
   InstFakeKill() = delete;
   InstFakeKill(const InstFakeKill &) = delete;
@@ -894,12 +895,12 @@
 private:
   InstFakeKill(Cfg *Func, const Inst *Linked);
 
-  // This instruction is ignored if Linked->isDeleted() is true.
+  /// This instruction is ignored if Linked->isDeleted() is true.
   const Inst *Linked;
 };
 
-// The Target instruction is the base class for all target-specific
-// instructions.
+/// The Target instruction is the base class for all target-specific
+/// instructions.
 class InstTarget : public Inst {
   InstTarget() = delete;
   InstTarget(const InstTarget &) = delete;
@@ -923,8 +924,8 @@
 
 namespace llvm {
 
-// Override the default ilist traits so that Inst's private ctor and
-// deleted dtor aren't invoked.
+/// Override the default ilist traits so that Inst's private ctor and
+/// deleted dtor aren't invoked.
 template <>
 struct ilist_traits<Ice::Inst> : public ilist_default_traits<Ice::Inst> {
   Ice::Inst *createSentinel() const {
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 0476934..72178ac 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the InstARM32 and OperandARM32 classes,
-// primarily the constructors and the dump()/emit() methods.
-//
+///
+/// \file
+/// This file implements the InstARM32 and OperandARM32 classes,
+/// primarily the constructors and the dump()/emit() methods.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceInstARM32.h"
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index caef19f..d12c4ff 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the InstARM32 and OperandARM32 classes and
-// their subclasses.  This represents the machine instructions and
-// operands used for ARM32 code selection.
-//
+///
+/// \file
+/// This file declares the InstARM32 and OperandARM32 classes and
+/// their subclasses.  This represents the machine instructions and
+/// operands used for ARM32 code selection.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEINSTARM32_H
@@ -26,8 +27,8 @@
 
 class TargetARM32;
 
-// OperandARM32 extends the Operand hierarchy.  Its subclasses are
-// OperandARM32Mem and OperandARM32Flex.
+/// OperandARM32 extends the Operand hierarchy.  Its subclasses are
+/// OperandARM32Mem and OperandARM32Flex.
 class OperandARM32 : public Operand {
   OperandARM32() = delete;
   OperandARM32(const OperandARM32 &) = delete;
@@ -61,16 +62,16 @@
       : Operand(static_cast<OperandKind>(Kind), Ty) {}
 };
 
-// OperandARM32Mem represents a memory operand in any of the various ARM32
-// addressing modes.
+/// OperandARM32Mem represents a memory operand in any of the various ARM32
+/// addressing modes.
 class OperandARM32Mem : public OperandARM32 {
   OperandARM32Mem() = delete;
   OperandARM32Mem(const OperandARM32Mem &) = delete;
   OperandARM32Mem &operator=(const OperandARM32Mem &) = delete;
 
 public:
-  // Memory operand addressing mode.
-  // The enum value also carries the encoding.
+  /// Memory operand addressing mode.
+  /// The enum value also carries the encoding.
   // TODO(jvoung): unify with the assembler.
   enum AddrMode {
     // bit encoding P U W
@@ -82,21 +83,21 @@
     NegPostIndex = (0 | 0 | 0) << 21 // negative post-indexed with writeback
   };
 
-  // Provide two constructors.
-  // NOTE: The Variable-typed operands have to be registers.
-  //
-  // (1) Reg + Imm. The Immediate actually has a limited number of bits
-  // for encoding, so check canHoldOffset first. It cannot handle
-  // general Constant operands like ConstantRelocatable, since a relocatable
-  // can potentially take up too many bits.
+  /// Provide two constructors.
+  /// NOTE: The Variable-typed operands have to be registers.
+  ///
+  /// (1) Reg + Imm. The Immediate actually has a limited number of bits
+  /// for encoding, so check canHoldOffset first. It cannot handle
+  /// general Constant operands like ConstantRelocatable, since a relocatable
+  /// can potentially take up too many bits.
   static OperandARM32Mem *create(Cfg *Func, Type Ty, Variable *Base,
                                  ConstantInteger32 *ImmOffset,
                                  AddrMode Mode = Offset) {
     return new (Func->allocate<OperandARM32Mem>())
         OperandARM32Mem(Func, Ty, Base, ImmOffset, Mode);
   }
-  // (2) Reg +/- Reg with an optional shift of some kind and amount.
-  // Note that this mode is disallowed in the NaCl sandbox.
+  /// (2) Reg +/- Reg with an optional shift of some kind and amount.
+  /// Note that this mode is disallowed in the NaCl sandbox.
   static OperandARM32Mem *create(Cfg *Func, Type Ty, Variable *Base,
                                  Variable *Index, ShiftKind ShiftOp = kNoShift,
                                  uint16_t ShiftAmt = 0,
@@ -129,10 +130,10 @@
     return Operand->getKind() == static_cast<OperandKind>(kMem);
   }
 
-  // Return true if a load/store instruction for an element of type Ty
-  // can encode the Offset directly in the immediate field of the 32-bit
-  // ARM instruction. For some types, if the load is Sign extending, then
-  // the range is reduced.
+  /// Return true if a load/store instruction for an element of type Ty
+  /// can encode the Offset directly in the immediate field of the 32-bit
+  /// ARM instruction. For some types, if the load is Sign extending, then
+  /// the range is reduced.
   static bool canHoldOffset(Type Ty, bool SignExt, int32_t Offset);
 
 private:
@@ -149,10 +150,10 @@
   AddrMode Mode;
 };
 
-// OperandARM32Flex represent the "flexible second operand" for
-// data-processing instructions. It can be a rotatable 8-bit constant, or
-// a register with an optional shift operand. The shift amount can even be
-// a third register.
+/// OperandARM32Flex represent the "flexible second operand" for
+/// data-processing instructions. It can be a rotatable 8-bit constant, or
+/// a register with an optional shift operand. The shift amount can even be
+/// a third register.
 class OperandARM32Flex : public OperandARM32 {
   OperandARM32Flex() = delete;
   OperandARM32Flex(const OperandARM32Flex &) = delete;
@@ -168,14 +169,14 @@
   OperandARM32Flex(OperandKindARM32 Kind, Type Ty) : OperandARM32(Kind, Ty) {}
 };
 
-// Rotated immediate variant.
+/// Rotated immediate variant.
 class OperandARM32FlexImm : public OperandARM32Flex {
   OperandARM32FlexImm() = delete;
   OperandARM32FlexImm(const OperandARM32FlexImm &) = delete;
   OperandARM32FlexImm &operator=(const OperandARM32FlexImm &) = delete;
 
 public:
-  // Immed_8 rotated by an even number of bits (2 * RotateAmt).
+  /// Immed_8 rotated by an even number of bits (2 * RotateAmt).
   static OperandARM32FlexImm *create(Cfg *Func, Type Ty, uint32_t Imm,
                                      uint32_t RotateAmt) {
     return new (Func->allocate<OperandARM32FlexImm>())
@@ -190,8 +191,8 @@
     return Operand->getKind() == static_cast<OperandKind>(kFlexImm);
   }
 
-  // Return true if the Immediate can fit in the ARM flexible operand.
-  // Fills in the out-params RotateAmt and Immed_8 if Immediate fits.
+  /// Return true if the Immediate can fit in the ARM flexible operand.
+  /// Fills in the out-params RotateAmt and Immed_8 if Immediate fits.
   static bool canHoldImm(uint32_t Immediate, uint32_t *RotateAmt,
                          uint32_t *Immed_8);
 
@@ -205,14 +206,14 @@
   uint32_t RotateAmt;
 };
 
-// Shifted register variant.
+/// Shifted register variant.
 class OperandARM32FlexReg : public OperandARM32Flex {
   OperandARM32FlexReg() = delete;
   OperandARM32FlexReg(const OperandARM32FlexReg &) = delete;
   OperandARM32FlexReg &operator=(const OperandARM32FlexReg &) = delete;
 
 public:
-  // Register with immediate/reg shift amount and shift operation.
+  /// Register with immediate/reg shift amount and shift operation.
   static OperandARM32FlexReg *create(Cfg *Func, Type Ty, Variable *Reg,
                                      ShiftKind ShiftOp, Operand *ShiftAmt) {
     return new (Func->allocate<OperandARM32FlexReg>())
@@ -229,7 +230,7 @@
 
   Variable *getReg() const { return Reg; }
   ShiftKind getShiftOp() const { return ShiftOp; }
-  // ShiftAmt can represent an immediate or a register.
+  /// ShiftAmt can represent an immediate or a register.
   Operand *getShiftAmt() const { return ShiftAmt; }
 
 private:
@@ -241,9 +242,9 @@
   Operand *ShiftAmt;
 };
 
-// Base class for ARM instructions. While most ARM instructions can be
-// conditionally executed, a few of them are not predicable (halt,
-// memory barriers, etc.).
+/// Base class for ARM instructions. While most ARM instructions can be
+/// conditionally executed, a few of them are not predicable (halt,
+/// memory barriers, etc.).
 class InstARM32 : public InstTarget {
   InstARM32() = delete;
   InstARM32(const InstARM32 &) = delete;
@@ -304,7 +305,7 @@
   }
 };
 
-// A predicable ARM instruction.
+/// A predicable ARM instruction.
 class InstARM32Pred : public InstARM32 {
   InstARM32Pred() = delete;
   InstARM32Pred(const InstARM32Pred &) = delete;
@@ -321,7 +322,7 @@
   static const char *predString(CondARM32::Cond Predicate);
   void dumpOpcodePred(Ostream &Str, const char *Opcode, Type Ty) const;
 
-  // Shared emit routines for common forms of instructions.
+  /// Shared emit routines for common forms of instructions.
   static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Inst,
                              const Cfg *Func);
   static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst,
@@ -343,7 +344,7 @@
   return Stream;
 }
 
-// Instructions of the form x := op(y).
+/// Instructions of the form x := op(y).
 template <InstARM32::InstKindARM32 K>
 class InstARM32UnaryopGPR : public InstARM32Pred {
   InstARM32UnaryopGPR() = delete;
@@ -387,7 +388,7 @@
   static const char *Opcode;
 };
 
-// Instructions of the form x := x op y.
+/// Instructions of the form x := x op y.
 template <InstARM32::InstKindARM32 K>
 class InstARM32TwoAddrGPR : public InstARM32Pred {
   InstARM32TwoAddrGPR() = delete;
@@ -395,7 +396,7 @@
   InstARM32TwoAddrGPR &operator=(const InstARM32TwoAddrGPR &) = delete;
 
 public:
-  // Dest must be a register.
+  /// Dest must be a register.
   static InstARM32TwoAddrGPR *create(Cfg *Func, Variable *Dest, Operand *Src,
                                      CondARM32::Cond Predicate) {
     return new (Func->allocate<InstARM32TwoAddrGPR>())
@@ -433,8 +434,8 @@
   static const char *Opcode;
 };
 
-// Base class for assignment instructions.
-// These can be tested for redundancy (and elided if redundant).
+/// Base class for assignment instructions.
+/// These can be tested for redundancy (and elided if redundant).
 template <InstARM32::InstKindARM32 K>
 class InstARM32Movlike : public InstARM32Pred {
   InstARM32Movlike() = delete;
@@ -475,8 +476,8 @@
   static const char *Opcode;
 };
 
-// Instructions of the form x := y op z. May have the side-effect of setting
-// status flags.
+/// Instructions of the form x := y op z. May have the side-effect of setting
+/// status flags.
 template <InstARM32::InstKindARM32 K>
 class InstARM32ThreeAddrGPR : public InstARM32Pred {
   InstARM32ThreeAddrGPR() = delete;
@@ -484,8 +485,8 @@
   InstARM32ThreeAddrGPR &operator=(const InstARM32ThreeAddrGPR &) = delete;
 
 public:
-  // Create an ordinary binary-op instruction like add, and sub.
-  // Dest and Src1 must be registers.
+  /// Create an ordinary binary-op instruction like add, and sub.
+  /// Dest and Src1 must be registers.
   static InstARM32ThreeAddrGPR *create(Cfg *Func, Variable *Dest,
                                        Variable *Src0, Operand *Src1,
                                        CondARM32::Cond Predicate,
@@ -633,12 +634,12 @@
 typedef InstARM32ThreeAddrGPR<InstARM32::Sdiv> InstARM32Sdiv;
 typedef InstARM32ThreeAddrGPR<InstARM32::Sub> InstARM32Sub;
 typedef InstARM32ThreeAddrGPR<InstARM32::Udiv> InstARM32Udiv;
-// Move instruction (variable <- flex). This is more of a pseudo-inst.
-// If var is a register, then we use "mov". If var is stack, then we use
-// "str" to store to the stack.
+/// Move instruction (variable <- flex). This is more of a pseudo-inst.
+/// If var is a register, then we use "mov". If var is stack, then we use
+/// "str" to store to the stack.
 typedef InstARM32Movlike<InstARM32::Mov> InstARM32Mov;
-// MovT leaves the bottom bits alone so dest is also a source.
-// This helps indicate that a previous MovW setting dest is not dead code.
+/// MovT leaves the bottom bits alone so dest is also a source.
+/// This helps indicate that a previous MovW setting dest is not dead code.
 typedef InstARM32TwoAddrGPR<InstARM32::Movt> InstARM32Movt;
 typedef InstARM32UnaryopGPR<InstARM32::Movw> InstARM32Movw;
 typedef InstARM32UnaryopGPR<InstARM32::Mvn> InstARM32Mvn;
@@ -677,14 +678,14 @@
   SizeT Number; // used for unique label generation.
 };
 
-// Direct branch instruction.
+/// Direct branch instruction.
 class InstARM32Br : public InstARM32Pred {
   InstARM32Br() = delete;
   InstARM32Br(const InstARM32Br &) = delete;
   InstARM32Br &operator=(const InstARM32Br &) = delete;
 
 public:
-  // Create a conditional branch to one of two nodes.
+  /// Create a conditional branch to one of two nodes.
   static InstARM32Br *create(Cfg *Func, CfgNode *TargetTrue,
                              CfgNode *TargetFalse, CondARM32::Cond Predicate) {
     assert(Predicate != CondARM32::AL);
@@ -692,16 +693,16 @@
     return new (Func->allocate<InstARM32Br>())
         InstARM32Br(Func, TargetTrue, TargetFalse, NoLabel, Predicate);
   }
-  // Create an unconditional branch to a node.
+  /// Create an unconditional branch to a node.
   static InstARM32Br *create(Cfg *Func, CfgNode *Target) {
     constexpr CfgNode *NoCondTarget = nullptr;
     constexpr InstARM32Label *NoLabel = nullptr;
     return new (Func->allocate<InstARM32Br>())
         InstARM32Br(Func, NoCondTarget, Target, NoLabel, CondARM32::AL);
   }
-  // Create a non-terminator conditional branch to a node, with a
-  // fallthrough to the next instruction in the current node.  This is
-  // used for switch lowering.
+  /// Create a non-terminator conditional branch to a node, with a
+  /// fallthrough to the next instruction in the current node.  This is
+  /// used for switch lowering.
   static InstARM32Br *create(Cfg *Func, CfgNode *Target,
                              CondARM32::Cond Predicate) {
     assert(Predicate != CondARM32::AL);
@@ -750,18 +751,18 @@
   const InstARM32Label *Label; // Intra-block branch target
 };
 
-// AdjustStack instruction - subtracts SP by the given amount and
-// updates the stack offset during code emission.
+/// AdjustStack instruction - subtracts SP by the given amount and
+/// updates the stack offset during code emission.
 class InstARM32AdjustStack : public InstARM32 {
   InstARM32AdjustStack() = delete;
   InstARM32AdjustStack(const InstARM32AdjustStack &) = delete;
   InstARM32AdjustStack &operator=(const InstARM32AdjustStack &) = delete;
 
 public:
-  // Note: We need both Amount and SrcAmount. If Amount is too large then
-  // it needs to be copied to a register (so SrcAmount could be a register).
-  // However, we also need the numeric Amount for bookkeeping, and it's
-  // hard to pull that from the generic SrcAmount operand.
+  /// Note: We need both Amount and SrcAmount. If Amount is too large then
+  /// it needs to be copied to a register (so SrcAmount could be a register).
+  /// However, we also need the numeric Amount for bookkeeping, and it's
+  /// hard to pull that from the generic SrcAmount operand.
   static InstARM32AdjustStack *create(Cfg *Func, Variable *SP, SizeT Amount,
                                       Operand *SrcAmount) {
     return new (Func->allocate<InstARM32AdjustStack>())
@@ -778,9 +779,9 @@
   const SizeT Amount;
 };
 
-// Call instruction (bl/blx).  Arguments should have already been pushed.
-// Technically bl and the register form of blx can be predicated, but we'll
-// leave that out until needed.
+/// Call instruction (bl/blx).  Arguments should have already been pushed.
+/// Technically bl and the register form of blx can be predicated, but we'll
+/// leave that out until needed.
 class InstARM32Call : public InstARM32 {
   InstARM32Call() = delete;
   InstARM32Call(const InstARM32Call &) = delete;
@@ -801,14 +802,14 @@
   InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
 };
 
-// Load instruction.
+/// Load instruction.
 class InstARM32Ldr : public InstARM32Pred {
   InstARM32Ldr() = delete;
   InstARM32Ldr(const InstARM32Ldr &) = delete;
   InstARM32Ldr &operator=(const InstARM32Ldr &) = delete;
 
 public:
-  // Dest must be a register.
+  /// Dest must be a register.
   static InstARM32Ldr *create(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
                               CondARM32::Cond Predicate) {
     return new (Func->allocate<InstARM32Ldr>())
@@ -824,8 +825,8 @@
                CondARM32::Cond Predicate);
 };
 
-// Pop into a list of GPRs. Technically this can be predicated, but we don't
-// need that functionality.
+/// Pop into a list of GPRs. Technically this can be predicated, but we don't
+/// need that functionality.
 class InstARM32Pop : public InstARM32 {
   InstARM32Pop() = delete;
   InstARM32Pop(const InstARM32Pop &) = delete;
@@ -846,8 +847,8 @@
   VarList Dests;
 };
 
-// Push a list of GPRs. Technically this can be predicated, but we don't
-// need that functionality.
+/// Push a list of GPRs. Technically this can be predicated, but we don't
+/// need that functionality.
 class InstARM32Push : public InstARM32 {
   InstARM32Push() = delete;
   InstARM32Push(const InstARM32Push &) = delete;
@@ -866,15 +867,15 @@
   InstARM32Push(Cfg *Func, const VarList &Srcs);
 };
 
-// Ret pseudo-instruction.  This is actually a "bx" instruction with
-// an "lr" register operand, but epilogue lowering will search for a Ret
-// instead of a generic "bx". This instruction also takes a Source
-// operand (for non-void returning functions) for liveness analysis, though
-// a FakeUse before the ret would do just as well.
-//
-// NOTE: Even though "bx" can be predicated, for now leave out the predication
-// since it's not yet known to be useful for Ret. That may complicate finding
-// the terminator instruction if it's not guaranteed to be executed.
+/// Ret pseudo-instruction.  This is actually a "bx" instruction with
+/// an "lr" register operand, but epilogue lowering will search for a Ret
+/// instead of a generic "bx". This instruction also takes a Source
+/// operand (for non-void returning functions) for liveness analysis, though
+/// a FakeUse before the ret would do just as well.
+///
+/// NOTE: Even though "bx" can be predicated, for now leave out the predication
+/// since it's not yet known to be useful for Ret. That may complicate finding
+/// the terminator instruction if it's not guaranteed to be executed.
 class InstARM32Ret : public InstARM32 {
   InstARM32Ret() = delete;
   InstARM32Ret(const InstARM32Ret &) = delete;
@@ -894,15 +895,15 @@
   InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source);
 };
 
-// Store instruction. It's important for liveness that there is no Dest
-// operand (OperandARM32Mem instead of Dest Variable).
+/// Store instruction. It's important for liveness that there is no Dest
+/// operand (OperandARM32Mem instead of Dest Variable).
 class InstARM32Str : public InstARM32Pred {
   InstARM32Str() = delete;
   InstARM32Str(const InstARM32Str &) = delete;
   InstARM32Str &operator=(const InstARM32Str &) = delete;
 
 public:
-  // Value must be a register.
+  /// Value must be a register.
   static InstARM32Str *create(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
                               CondARM32::Cond Predicate) {
     return new (Func->allocate<InstARM32Str>())
@@ -936,14 +937,14 @@
   explicit InstARM32Trap(Cfg *Func);
 };
 
-// Unsigned Multiply Long: d.lo, d.hi := x * y
+/// Unsigned Multiply Long: d.lo, d.hi := x * y
 class InstARM32Umull : public InstARM32Pred {
   InstARM32Umull() = delete;
   InstARM32Umull(const InstARM32Umull &) = delete;
   InstARM32Umull &operator=(const InstARM32Umull &) = delete;
 
 public:
-  // Everything must be a register.
+  /// Everything must be a register.
   static InstARM32Umull *create(Cfg *Func, Variable *DestLo, Variable *DestHi,
                                 Variable *Src0, Variable *Src1,
                                 CondARM32::Cond Predicate) {
diff --git a/src/IceInstMIPS32.h b/src/IceInstMIPS32.h
index 0c7c2d8..17c2583 100644
--- a/src/IceInstMIPS32.h
+++ b/src/IceInstMIPS32.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the InstMIPS32 and OperandMIPS32 classes and
-// their subclasses.  This represents the machine instructions and
-// operands used for MIPS32 code selection.
-//
+///
+/// \file
+/// This file declares the InstMIPS32 and OperandMIPS32 classes and
+/// their subclasses.  This represents the machine instructions and
+/// operands used for MIPS32 code selection.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEINSTMIPS32_H
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 2866430..fd33c55 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the InstX8632 and OperandX8632 classes,
-// primarily the constructors and the dump()/emit() methods.
-//
+///
+/// \file
+/// This file implements the InstX8632 and OperandX8632 classes,
+/// primarily the constructors and the dump()/emit() methods.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceInstX8632.h"
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 0145293..03a5205 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the InstX8632 and OperandX8632 classes and
-// their subclasses.  This represents the machine instructions and
-// operands used for x86-32 code selection.
-//
+///
+/// \file
+/// This file declares the InstX8632 and OperandX8632 classes and
+/// their subclasses.  This represents the machine instructions and
+/// operands used for x86-32 code selection.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEINSTX8632_H
@@ -28,8 +29,8 @@
 
 class TargetX8632;
 
-// OperandX8632 extends the Operand hierarchy.  Its subclasses are
-// OperandX8632Mem and VariableSplit.
+/// OperandX8632 extends the Operand hierarchy.  Its subclasses are
+/// OperandX8632Mem and VariableSplit.
 class OperandX8632 : public Operand {
   OperandX8632() = delete;
   OperandX8632(const OperandX8632 &) = delete;
@@ -48,9 +49,9 @@
       : Operand(static_cast<OperandKind>(Kind), Ty) {}
 };
 
-// OperandX8632Mem represents the m32 addressing mode, with optional
-// base and index registers, a constant offset, and a fixed shift
-// value for the index register.
+/// OperandX8632Mem represents the m32 addressing mode, with optional
+/// base and index registers, a constant offset, and a fixed shift
+/// value for the index register.
 class OperandX8632Mem : public OperandX8632 {
   OperandX8632Mem() = delete;
   OperandX8632Mem(const OperandX8632Mem &) = delete;
@@ -99,18 +100,18 @@
   Variable *Index;
   uint16_t Shift;
   SegmentRegisters SegmentReg : 16;
-  // A flag to show if this memory operand is a randomized one.
-  // Randomized memory operands are generated in
-  // TargetX8632::randomizeOrPoolImmediate()
+  /// A flag to show if this memory operand is a randomized one.
+  /// Randomized memory operands are generated in
+  /// TargetX8632::randomizeOrPoolImmediate()
   bool Randomized;
 };
 
-// VariableSplit is a way to treat an f64 memory location as a pair
-// of i32 locations (Low and High).  This is needed for some cases
-// of the Bitcast instruction.  Since it's not possible for integer
-// registers to access the XMM registers and vice versa, the
-// lowering forces the f64 to be spilled to the stack and then
-// accesses through the VariableSplit.
+/// VariableSplit is a way to treat an f64 memory location as a pair
+/// of i32 locations (Low and High).  This is needed for some cases
+/// of the Bitcast instruction.  Since it's not possible for integer
+/// registers to access the XMM registers and vice versa, the
+/// lowering forces the f64 to be spilled to the stack and then
+/// accesses through the VariableSplit.
 class VariableSplit : public OperandX8632 {
   VariableSplit() = delete;
   VariableSplit(const VariableSplit &) = delete;
@@ -145,11 +146,11 @@
   Portion Part;
 };
 
-// SpillVariable decorates a Variable by linking it to another
-// Variable.  When stack frame offsets are computed, the SpillVariable
-// is given a distinct stack slot only if its linked Variable has a
-// register.  If the linked Variable has a stack slot, then the
-// Variable and SpillVariable share that slot.
+/// SpillVariable decorates a Variable by linking it to another
+/// Variable.  When stack frame offsets are computed, the SpillVariable
+/// is given a distinct stack slot only if its linked Variable has a
+/// register.  If the linked Variable has a stack slot, then the
+/// Variable and SpillVariable share that slot.
 class SpillVariable : public Variable {
   SpillVariable() = delete;
   SpillVariable(const SpillVariable &) = delete;
@@ -284,9 +285,9 @@
   getOppositeCondition(X8632::Traits::Cond::BrCond Cond);
   void dump(const Cfg *Func) const override;
 
-  // Shared emit routines for common forms of instructions.
-  // See the definition of emitTwoAddress() for a description of
-  // ShiftHack.
+  /// Shared emit routines for common forms of instructions.
+  /// See the definition of emitTwoAddress() for a description of
+  /// ShiftHack.
   static void emitTwoAddress(const char *Opcode, const Inst *Inst,
                              const Cfg *Func, bool ShiftHack = false);
 
@@ -302,16 +303,16 @@
   static bool isClassof(const Inst *Inst, InstKindX8632 MyKind) {
     return Inst->getKind() == static_cast<InstKind>(MyKind);
   }
-  // Most instructions that operate on vector arguments require vector
-  // memory operands to be fully aligned (16-byte alignment for PNaCl
-  // vector types).  The stack frame layout and call ABI ensure proper
-  // alignment for stack operands, but memory operands (originating
-  // from load/store bitcode instructions) only have element-size
-  // alignment guarantees.  This function validates that none of the
-  // operands is a memory operand of vector type, calling
-  // report_fatal_error() if one is found.  This function should be
-  // called during emission, and maybe also in the ctor (as long as
-  // that fits the lowering style).
+  /// Most instructions that operate on vector arguments require vector
+  /// memory operands to be fully aligned (16-byte alignment for PNaCl
+  /// vector types).  The stack frame layout and call ABI ensure proper
+  /// alignment for stack operands, but memory operands (originating
+  /// from load/store bitcode instructions) only have element-size
+  /// alignment guarantees.  This function validates that none of the
+  /// operands is a memory operand of vector type, calling
+  /// report_fatal_error() if one is found.  This function should be
+  /// called during emission, and maybe also in the ctor (as long as
+  /// that fits the lowering style).
   void validateVectorAddrMode() const {
     if (getDest())
       validateVectorAddrModeOpnd(getDest());
@@ -328,12 +329,12 @@
   }
 };
 
-// InstX8632FakeRMW represents a non-atomic read-modify-write operation on a
-// memory location.  An InstX8632FakeRMW is a "fake" instruction in that it
-// still needs to be lowered to some actual RMW instruction.
-//
-// If A is some memory address, D is some data value to apply, and OP is an
-// arithmetic operator, the instruction operates as: (*A) = (*A) OP D
+/// InstX8632FakeRMW represents a non-atomic read-modify-write operation on a
+/// memory location.  An InstX8632FakeRMW is a "fake" instruction in that it
+/// still needs to be lowered to some actual RMW instruction.
+///
+/// If A is some memory address, D is some data value to apply, and OP is an
+/// arithmetic operator, the instruction operates as: (*A) = (*A) OP D
 class InstX8632FakeRMW : public InstX8632 {
   InstX8632FakeRMW() = delete;
   InstX8632FakeRMW(const InstX8632FakeRMW &) = delete;
@@ -361,44 +362,44 @@
                    InstArithmetic::OpKind Op, Variable *Beacon);
 };
 
-// InstX8632Label represents an intra-block label that is the target
-// of an intra-block branch.  The offset between the label and the
-// branch must be fit into one byte (considered "near").  These are
-// used for lowering i1 calculations, Select instructions, and 64-bit
-// compares on a 32-bit architecture, without basic block splitting.
-// Basic block splitting is not so desirable for several reasons, one
-// of which is the impact on decisions based on whether a variable's
-// live range spans multiple basic blocks.
-//
-// Intra-block control flow must be used with caution.  Consider the
-// sequence for "c = (a >= b ? x : y)".
-//     cmp a, b
-//     br lt, L1
-//     mov c, x
-//     jmp L2
-//   L1:
-//     mov c, y
-//   L2:
-//
-// Labels L1 and L2 are intra-block labels.  Without knowledge of the
-// intra-block control flow, liveness analysis will determine the "mov
-// c, x" instruction to be dead.  One way to prevent this is to insert
-// a "FakeUse(c)" instruction anywhere between the two "mov c, ..."
-// instructions, e.g.:
-//
-//     cmp a, b
-//     br lt, L1
-//     mov c, x
-//     jmp L2
-//     FakeUse(c)
-//   L1:
-//     mov c, y
-//   L2:
-//
-// The down-side is that "mov c, x" can never be dead-code eliminated
-// even if there are no uses of c.  As unlikely as this situation is,
-// it may be prevented by running dead code elimination before
-// lowering.
+/// InstX8632Label represents an intra-block label that is the target
+/// of an intra-block branch.  The offset between the label and the
+/// branch must be fit into one byte (considered "near").  These are
+/// used for lowering i1 calculations, Select instructions, and 64-bit
+/// compares on a 32-bit architecture, without basic block splitting.
+/// Basic block splitting is not so desirable for several reasons, one
+/// of which is the impact on decisions based on whether a variable's
+/// live range spans multiple basic blocks.
+///
+/// Intra-block control flow must be used with caution.  Consider the
+/// sequence for "c = (a >= b ? x : y)".
+///     cmp a, b
+///     br lt, L1
+///     mov c, x
+///     jmp L2
+///   L1:
+///     mov c, y
+///   L2:
+///
+/// Labels L1 and L2 are intra-block labels.  Without knowledge of the
+/// intra-block control flow, liveness analysis will determine the "mov
+/// c, x" instruction to be dead.  One way to prevent this is to insert
+/// a "FakeUse(c)" instruction anywhere between the two "mov c, ..."
+/// instructions, e.g.:
+///
+///     cmp a, b
+///     br lt, L1
+///     mov c, x
+///     jmp L2
+///     FakeUse(c)
+///   L1:
+///     mov c, y
+///   L2:
+///
+/// The down-side is that "mov c, x" can never be dead-code eliminated
+/// even if there are no uses of c.  As unlikely as this situation is,
+/// it may be prevented by running dead code elimination before
+/// lowering.
 class InstX8632Label : public InstX8632 {
   InstX8632Label() = delete;
   InstX8632Label(const InstX8632Label &) = delete;
@@ -418,17 +419,17 @@
 private:
   InstX8632Label(Cfg *Func, TargetX8632 *Target);
 
-  SizeT Number; // used for unique label generation.
+  SizeT Number; /// used for unique label generation.
 };
 
-// Conditional and unconditional branch instruction.
+/// Conditional and unconditional branch instruction.
 class InstX8632Br : public InstX8632 {
   InstX8632Br() = delete;
   InstX8632Br(const InstX8632Br &) = delete;
   InstX8632Br &operator=(const InstX8632Br &) = delete;
 
 public:
-  // Create a conditional branch to a node.
+  /// Create a conditional branch to a node.
   static InstX8632Br *create(Cfg *Func, CfgNode *TargetTrue,
                              CfgNode *TargetFalse,
                              X8632::Traits::Cond::BrCond Condition) {
@@ -437,16 +438,16 @@
     return new (Func->allocate<InstX8632Br>())
         InstX8632Br(Func, TargetTrue, TargetFalse, NoLabel, Condition);
   }
-  // Create an unconditional branch to a node.
+  /// Create an unconditional branch to a node.
   static InstX8632Br *create(Cfg *Func, CfgNode *Target) {
     const CfgNode *NoCondTarget = nullptr;
     const InstX8632Label *NoLabel = nullptr;
     return new (Func->allocate<InstX8632Br>()) InstX8632Br(
         Func, NoCondTarget, Target, NoLabel, X8632::Traits::Cond::Br_None);
   }
-  // Create a non-terminator conditional branch to a node, with a
-  // fallthrough to the next instruction in the current node.  This is
-  // used for switch lowering.
+  /// Create a non-terminator conditional branch to a node, with a
+  /// fallthrough to the next instruction in the current node.  This is
+  /// used for switch lowering.
   static InstX8632Br *create(Cfg *Func, CfgNode *Target,
                              X8632::Traits::Cond::BrCond Condition) {
     assert(Condition != X8632::Traits::Cond::Br_None);
@@ -455,8 +456,8 @@
     return new (Func->allocate<InstX8632Br>())
         InstX8632Br(Func, Target, NoUncondTarget, NoLabel, Condition);
   }
-  // Create a conditional intra-block branch (or unconditional, if
-  // Condition==Br_None) to a label in the current block.
+  /// Create a conditional intra-block branch (or unconditional, if
+  /// Condition==Br_None) to a label in the current block.
   static InstX8632Br *create(Cfg *Func, InstX8632Label *Label,
                              X8632::Traits::Cond::BrCond Condition) {
     const CfgNode *NoCondTarget = nullptr;
@@ -494,12 +495,12 @@
   X8632::Traits::Cond::BrCond Condition;
   const CfgNode *TargetTrue;
   const CfgNode *TargetFalse;
-  const InstX8632Label *Label; // Intra-block branch target
+  const InstX8632Label *Label; /// Intra-block branch target
 };
 
-// Jump to a target outside this function, such as tailcall, nacljump,
-// naclret, unreachable.  This is different from a Branch instruction
-// in that there is no intra-function control flow to represent.
+/// Jump to a target outside this function, such as tailcall, nacljump,
+/// naclret, unreachable.  This is different from a Branch instruction
+/// in that there is no intra-function control flow to represent.
 class InstX8632Jmp : public InstX8632 {
   InstX8632Jmp() = delete;
   InstX8632Jmp(const InstX8632Jmp &) = delete;
@@ -519,8 +520,8 @@
   InstX8632Jmp(Cfg *Func, Operand *Target);
 };
 
-// AdjustStack instruction - subtracts esp by the given amount and
-// updates the stack offset during code emission.
+/// AdjustStack instruction - subtracts esp by the given amount and
+/// updates the stack offset during code emission.
 class InstX8632AdjustStack : public InstX8632 {
   InstX8632AdjustStack() = delete;
   InstX8632AdjustStack(const InstX8632AdjustStack &) = delete;
@@ -541,7 +542,7 @@
   SizeT Amount;
 };
 
-// Call instruction.  Arguments should have already been pushed.
+/// Call instruction.  Arguments should have already been pushed.
 class InstX8632Call : public InstX8632 {
   InstX8632Call() = delete;
   InstX8632Call(const InstX8632Call &) = delete;
@@ -562,14 +563,14 @@
   InstX8632Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
 };
 
-// Emit a one-operand (GPR) instruction.
+/// Emit a one-operand (GPR) instruction.
 void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Var,
                     const X8632::AssemblerX8632::GPREmitterOneOp &Emitter);
 void emitIASAsAddrOpTyGPR(
     const Cfg *Func, Type Ty, const Operand *Op0, const Operand *Op1,
     const X8632::AssemblerX8632::GPREmitterAddrOp &Emitter);
 
-// Instructions of the form x := op(x).
+/// Instructions of the form x := op(x).
 template <InstX8632::InstKindX8632 K>
 class InstX8632InplaceopGPR : public InstX8632 {
   InstX8632InplaceopGPR() = delete;
@@ -615,14 +616,14 @@
   static const X8632::AssemblerX8632::GPREmitterOneOp Emitter;
 };
 
-// Emit a two-operand (GPR) instruction, where the dest operand is a
-// Variable that's guaranteed to be a register.
+/// Emit a two-operand (GPR) instruction, where the dest operand is a
+/// Variable that's guaranteed to be a register.
 template <bool VarCanBeByte = true, bool SrcCanBeByte = true>
 void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Dst,
                        const Operand *Src,
                        const X8632::AssemblerX8632::GPREmitterRegOp &Emitter);
 
-// Instructions of the form x := op(y).
+/// Instructions of the form x := op(y).
 template <InstX8632::InstKindX8632 K>
 class InstX8632UnaryopGPR : public InstX8632 {
   InstX8632UnaryopGPR() = delete;
@@ -736,7 +737,7 @@
   InstX8632BinopGPRShift &operator=(const InstX8632BinopGPRShift &) = delete;
 
 public:
-  // Create a binary-op GPR shift instruction.
+  /// Create a binary-op GPR shift instruction.
   static InstX8632BinopGPRShift *create(Cfg *Func, Variable *Dest,
                                         Operand *Source) {
     return new (Func->allocate<InstX8632BinopGPRShift>())
@@ -781,7 +782,7 @@
   InstX8632BinopGPR &operator=(const InstX8632BinopGPR &) = delete;
 
 public:
-  // Create an ordinary binary-op instruction like add or sub.
+  /// Create an ordinary binary-op instruction like add or sub.
   static InstX8632BinopGPR *create(Cfg *Func, Variable *Dest, Operand *Source) {
     return new (Func->allocate<InstX8632BinopGPR>())
         InstX8632BinopGPR(Func, Dest, Source);
@@ -825,7 +826,7 @@
   InstX8632BinopRMW &operator=(const InstX8632BinopRMW &) = delete;
 
 public:
-  // Create an ordinary binary-op instruction like add or sub.
+  /// Create an ordinary binary-op instruction like add or sub.
   static InstX8632BinopRMW *create(Cfg *Func, OperandX8632Mem *DestSrc0,
                                    Operand *Src1) {
     return new (Func->allocate<InstX8632BinopRMW>())
@@ -868,7 +869,7 @@
   InstX8632BinopXmm &operator=(const InstX8632BinopXmm &) = delete;
 
 public:
-  // Create an XMM binary-op instruction like addss or addps.
+  /// Create an XMM binary-op instruction like addss or addps.
   static InstX8632BinopXmm *create(Cfg *Func, Variable *Dest, Operand *Source) {
     return new (Func->allocate<InstX8632BinopXmm>())
         InstX8632BinopXmm(Func, Dest, Source);
@@ -920,7 +921,7 @@
   InstX8632BinopXmmShift &operator=(const InstX8632BinopXmmShift &) = delete;
 
 public:
-  // Create an XMM binary-op shift operation.
+  /// Create an XMM binary-op shift operation.
   static InstX8632BinopXmmShift *create(Cfg *Func, Variable *Dest,
                                         Operand *Source) {
     return new (Func->allocate<InstX8632BinopXmmShift>())
@@ -968,7 +969,7 @@
   InstX8632Ternop &operator=(const InstX8632Ternop &) = delete;
 
 public:
-  // Create a ternary-op instruction like div or idiv.
+  /// Create a ternary-op instruction like div or idiv.
   static InstX8632Ternop *create(Cfg *Func, Variable *Dest, Operand *Source1,
                                  Operand *Source2) {
     return new (Func->allocate<InstX8632Ternop>())
@@ -1008,7 +1009,7 @@
   static const char *Opcode;
 };
 
-// Instructions of the form x := y op z
+/// Instructions of the form x := y op z
 template <InstX8632::InstKindX8632 K>
 class InstX8632ThreeAddressop : public InstX8632 {
   InstX8632ThreeAddressop() = delete;
@@ -1055,7 +1056,7 @@
   static const char *Opcode;
 };
 
-// Base class for assignment instructions
+/// Base class for assignment instructions
 template <InstX8632::InstKindX8632 K>
 class InstX8632Movlike : public InstX8632 {
   InstX8632Movlike() = delete;
@@ -1098,18 +1099,18 @@
 typedef InstX8632UnaryopGPR<InstX8632::Bsf> InstX8632Bsf;
 typedef InstX8632UnaryopGPR<InstX8632::Bsr> InstX8632Bsr;
 typedef InstX8632UnaryopGPR<InstX8632::Lea> InstX8632Lea;
-// Cbwdq instruction - wrapper for cbw, cwd, and cdq
+/// Cbwdq instruction - wrapper for cbw, cwd, and cdq
 typedef InstX8632UnaryopGPR<InstX8632::Cbwdq> InstX8632Cbwdq;
 typedef InstX8632UnaryopGPR<InstX8632::Movsx> InstX8632Movsx;
 typedef InstX8632UnaryopGPR<InstX8632::Movzx> InstX8632Movzx;
 typedef InstX8632UnaryopXmm<InstX8632::Movd> InstX8632Movd;
 typedef InstX8632UnaryopXmm<InstX8632::Sqrtss> InstX8632Sqrtss;
-// Move/assignment instruction - wrapper for mov/movss/movsd.
+/// Move/assignment instruction - wrapper for mov/movss/movsd.
 typedef InstX8632Movlike<InstX8632::Mov> InstX8632Mov;
-// Move packed - copy 128 bit values between XMM registers, or mem128
-// and XMM registers.
+/// Move packed - copy 128 bit values between XMM registers, or mem128
+/// and XMM registers.
 typedef InstX8632Movlike<InstX8632::Movp> InstX8632Movp;
-// Movq - copy between XMM registers, or mem64 and XMM registers.
+/// Movq - copy between XMM registers, or mem64 and XMM registers.
 typedef InstX8632Movlike<InstX8632::Movq> InstX8632Movq;
 typedef InstX8632BinopGPR<InstX8632::Add> InstX8632Add;
 typedef InstX8632BinopRMW<InstX8632::AddRMW> InstX8632AddRMW;
@@ -1151,13 +1152,13 @@
 typedef InstX8632BinopXmmShift<InstX8632::Psra> InstX8632Psra;
 typedef InstX8632BinopXmm<InstX8632::Pcmpeq, true> InstX8632Pcmpeq;
 typedef InstX8632BinopXmm<InstX8632::Pcmpgt, true> InstX8632Pcmpgt;
-// movss is only a binary operation when the source and dest
-// operands are both registers (the high bits of dest are left untouched).
-// In other cases, it behaves like a copy (mov-like) operation (and the
-// high bits of dest are cleared).
-// InstX8632Movss will assert that both its source and dest operands are
-// registers, so the lowering code should use _mov instead of _movss
-// in cases where a copy operation is intended.
+/// movss is only a binary operation when the source and dest
+/// operands are both registers (the high bits of dest are left untouched).
+/// In other cases, it behaves like a copy (mov-like) operation (and the
+/// high bits of dest are cleared).
+/// InstX8632Movss will assert that both its source and dest operands are
+/// registers, so the lowering code should use _mov instead of _movss
+/// in cases where a copy operation is intended.
 typedef InstX8632BinopXmm<InstX8632::MovssRegs, false> InstX8632MovssRegs;
 typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv;
 typedef InstX8632Ternop<InstX8632::Div> InstX8632Div;
@@ -1169,7 +1170,7 @@
 typedef InstX8632ThreeAddressop<InstX8632::Pextr> InstX8632Pextr;
 typedef InstX8632ThreeAddressop<InstX8632::Pshufd> InstX8632Pshufd;
 
-// Base class for a lockable x86-32 instruction (emits a locked prefix).
+/// Base class for a lockable x86-32 instruction (emits a locked prefix).
 class InstX8632Lockable : public InstX8632 {
   InstX8632Lockable() = delete;
   InstX8632Lockable(const InstX8632Lockable &) = delete;
@@ -1187,7 +1188,7 @@
   }
 };
 
-// Mul instruction - unsigned multiply.
+/// Mul instruction - unsigned multiply.
 class InstX8632Mul : public InstX8632 {
   InstX8632Mul() = delete;
   InstX8632Mul(const InstX8632Mul &) = delete;
@@ -1208,7 +1209,7 @@
   InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
 };
 
-// Shld instruction - shift across a pair of operands.
+/// Shld instruction - shift across a pair of operands.
 class InstX8632Shld : public InstX8632 {
   InstX8632Shld() = delete;
   InstX8632Shld(const InstX8632Shld &) = delete;
@@ -1230,7 +1231,7 @@
                 Variable *Source2);
 };
 
-// Shrd instruction - shift across a pair of operands.
+/// Shrd instruction - shift across a pair of operands.
 class InstX8632Shrd : public InstX8632 {
   InstX8632Shrd() = delete;
   InstX8632Shrd(const InstX8632Shrd &) = delete;
@@ -1252,7 +1253,7 @@
                 Variable *Source2);
 };
 
-// Conditional move instruction.
+/// Conditional move instruction.
 class InstX8632Cmov : public InstX8632 {
   InstX8632Cmov() = delete;
   InstX8632Cmov(const InstX8632Cmov &) = delete;
@@ -1276,8 +1277,8 @@
   X8632::Traits::Cond::BrCond Condition;
 };
 
-// Cmpps instruction - compare packed singled-precision floating point
-// values
+/// Cmpps instruction - compare packed singled-precision floating point
+/// values
 class InstX8632Cmpps : public InstX8632 {
   InstX8632Cmpps() = delete;
   InstX8632Cmpps(const InstX8632Cmpps &) = delete;
@@ -1301,11 +1302,11 @@
   X8632::Traits::Cond::CmppsCond Condition;
 };
 
-// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
-// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
-// If not, ZF is cleared and <dest> is copied to eax (or subregister).
-// <dest> can be a register or memory, while <desired> must be a register.
-// It is the user's responsiblity to mark eax with a FakeDef.
+/// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
+/// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
+/// If not, ZF is cleared and <dest> is copied to eax (or subregister).
+/// <dest> can be a register or memory, while <desired> must be a register.
+/// It is the user's responsiblity to mark eax with a FakeDef.
 class InstX8632Cmpxchg : public InstX8632Lockable {
   InstX8632Cmpxchg() = delete;
   InstX8632Cmpxchg(const InstX8632Cmpxchg &) = delete;
@@ -1327,12 +1328,12 @@
                    Variable *Desired, bool Locked);
 };
 
-// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64>
-// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>.
-// If not, ZF is cleared and <m64> is copied to edx:eax.
-// The caller is responsible for inserting FakeDefs to mark edx
-// and eax as modified.
-// <m64> must be a memory operand.
+/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64>
+/// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>.
+/// If not, ZF is cleared and <m64> is copied to edx:eax.
+/// The caller is responsible for inserting FakeDefs to mark edx
+/// and eax as modified.
+/// <m64> must be a memory operand.
 class InstX8632Cmpxchg8b : public InstX8632Lockable {
   InstX8632Cmpxchg8b() = delete;
   InstX8632Cmpxchg8b(const InstX8632Cmpxchg8b &) = delete;
@@ -1355,10 +1356,10 @@
                      Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked);
 };
 
-// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i}
-// as appropriate.  s=float, d=double, i=int.  X and Y are determined
-// from dest/src types.  Sign and zero extension on the integer
-// operand needs to be done separately.
+/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i}
+/// as appropriate.  s=float, d=double, i=int.  X and Y are determined
+/// from dest/src types.  Sign and zero extension on the integer
+/// operand needs to be done separately.
 class InstX8632Cvt : public InstX8632 {
   InstX8632Cvt() = delete;
   InstX8632Cvt(const InstX8632Cvt &) = delete;
@@ -1382,7 +1383,7 @@
   InstX8632Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant);
 };
 
-// cmp - Integer compare instruction.
+/// cmp - Integer compare instruction.
 class InstX8632Icmp : public InstX8632 {
   InstX8632Icmp() = delete;
   InstX8632Icmp(const InstX8632Icmp &) = delete;
@@ -1402,7 +1403,7 @@
   InstX8632Icmp(Cfg *Func, Operand *Src1, Operand *Src2);
 };
 
-// ucomiss/ucomisd - floating-point compare instruction.
+/// ucomiss/ucomisd - floating-point compare instruction.
 class InstX8632Ucomiss : public InstX8632 {
   InstX8632Ucomiss() = delete;
   InstX8632Ucomiss(const InstX8632Ucomiss &) = delete;
@@ -1422,7 +1423,7 @@
   InstX8632Ucomiss(Cfg *Func, Operand *Src1, Operand *Src2);
 };
 
-// UD2 instruction.
+/// UD2 instruction.
 class InstX8632UD2 : public InstX8632 {
   InstX8632UD2() = delete;
   InstX8632UD2(const InstX8632UD2 &) = delete;
@@ -1441,7 +1442,7 @@
   explicit InstX8632UD2(Cfg *Func);
 };
 
-// Test instruction.
+/// Test instruction.
 class InstX8632Test : public InstX8632 {
   InstX8632Test() = delete;
   InstX8632Test(const InstX8632Test &) = delete;
@@ -1461,7 +1462,7 @@
   InstX8632Test(Cfg *Func, Operand *Source1, Operand *Source2);
 };
 
-// Mfence instruction.
+/// Mfence instruction.
 class InstX8632Mfence : public InstX8632 {
   InstX8632Mfence() = delete;
   InstX8632Mfence(const InstX8632Mfence &) = delete;
@@ -1480,9 +1481,9 @@
   explicit InstX8632Mfence(Cfg *Func);
 };
 
-// This is essentially a "mov" instruction with an OperandX8632Mem
-// operand instead of Variable as the destination.  It's important
-// for liveness that there is no Dest operand.
+/// This is essentially a "mov" instruction with an OperandX8632Mem
+/// operand instead of Variable as the destination.  It's important
+/// for liveness that there is no Dest operand.
 class InstX8632Store : public InstX8632 {
   InstX8632Store() = delete;
   InstX8632Store(const InstX8632Store &) = delete;
@@ -1502,10 +1503,10 @@
   InstX8632Store(Cfg *Func, Operand *Value, OperandX8632 *Mem);
 };
 
-// This is essentially a vector "mov" instruction with an OperandX8632Mem
-// operand instead of Variable as the destination.  It's important
-// for liveness that there is no Dest operand. The source must be an
-// Xmm register, since Dest is mem.
+/// This is essentially a vector "mov" instruction with an OperandX8632Mem
+/// operand instead of Variable as the destination.  It's important
+/// for liveness that there is no Dest operand. The source must be an
+/// Xmm register, since Dest is mem.
 class InstX8632StoreP : public InstX8632 {
   InstX8632StoreP() = delete;
   InstX8632StoreP(const InstX8632StoreP &) = delete;
@@ -1546,7 +1547,7 @@
   InstX8632StoreQ(Cfg *Func, Variable *Value, OperandX8632Mem *Mem);
 };
 
-// Nop instructions of varying length
+/// Nop instructions of varying length
 class InstX8632Nop : public InstX8632 {
   InstX8632Nop() = delete;
   InstX8632Nop(const InstX8632Nop &) = delete;
@@ -1570,7 +1571,7 @@
   NopVariant Variant;
 };
 
-// Fld - load a value onto the x87 FP stack.
+/// Fld - load a value onto the x87 FP stack.
 class InstX8632Fld : public InstX8632 {
   InstX8632Fld() = delete;
   InstX8632Fld(const InstX8632Fld &) = delete;
@@ -1589,7 +1590,7 @@
   InstX8632Fld(Cfg *Func, Operand *Src);
 };
 
-// Fstp - store x87 st(0) into memory and pop st(0).
+/// Fstp - store x87 st(0) into memory and pop st(0).
 class InstX8632Fstp : public InstX8632 {
   InstX8632Fstp() = delete;
   InstX8632Fstp(const InstX8632Fstp &) = delete;
@@ -1644,10 +1645,10 @@
   InstX8632Push(Cfg *Func, Variable *Source);
 };
 
-// Ret instruction.  Currently only supports the "ret" version that
-// does not pop arguments.  This instruction takes a Source operand
-// (for non-void returning functions) for liveness analysis, though
-// a FakeUse before the ret would do just as well.
+/// Ret instruction.  Currently only supports the "ret" version that
+/// does not pop arguments.  This instruction takes a Source operand
+/// (for non-void returning functions) for liveness analysis, though
+/// a FakeUse before the ret would do just as well.
 class InstX8632Ret : public InstX8632 {
   InstX8632Ret() = delete;
   InstX8632Ret(const InstX8632Ret &) = delete;
@@ -1666,7 +1667,7 @@
   InstX8632Ret(Cfg *Func, Variable *Source);
 };
 
-// Conditional set-byte instruction.
+/// Conditional set-byte instruction.
 class InstX8632Setcc : public InstX8632 {
   InstX8632Setcc() = delete;
   InstX8632Setcc(const InstX8632Cmov &) = delete;
@@ -1689,13 +1690,13 @@
   const X8632::Traits::Cond::BrCond Condition;
 };
 
-// Exchanging Add instruction.  Exchanges the first operand (destination
-// operand) with the second operand (source operand), then loads the sum
-// of the two values into the destination operand. The destination may be
-// a register or memory, while the source must be a register.
-//
-// Both the dest and source are updated. The caller should then insert a
-// FakeDef to reflect the second udpate.
+/// Exchanging Add instruction.  Exchanges the first operand (destination
+/// operand) with the second operand (source operand), then loads the sum
+/// of the two values into the destination operand. The destination may be
+/// a register or memory, while the source must be a register.
+///
+/// Both the dest and source are updated. The caller should then insert a
+/// FakeDef to reflect the second udpate.
 class InstX8632Xadd : public InstX8632Lockable {
   InstX8632Xadd() = delete;
   InstX8632Xadd(const InstX8632Xadd &) = delete;
@@ -1716,12 +1717,12 @@
   InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
 };
 
-// Exchange instruction.  Exchanges the first operand (destination
-// operand) with the second operand (source operand). At least one of
-// the operands must be a register (and the other can be reg or mem).
-// Both the Dest and Source are updated. If there is a memory operand,
-// then the instruction is automatically "locked" without the need for
-// a lock prefix.
+/// Exchange instruction.  Exchanges the first operand (destination
+/// operand) with the second operand (source operand). At least one of
+/// the operands must be a register (and the other can be reg or mem).
+/// Both the Dest and Source are updated. If there is a memory operand,
+/// then the instruction is automatically "locked" without the need for
+/// a lock prefix.
 class InstX8632Xchg : public InstX8632 {
   InstX8632Xchg() = delete;
   InstX8632Xchg(const InstX8632Xchg &) = delete;
@@ -1741,9 +1742,9 @@
   InstX8632Xchg(Cfg *Func, Operand *Dest, Variable *Source);
 };
 
-// Declare partial template specializations of emit() methods that
-// already have default implementations.  Without this, there is the
-// possibility of ODR violations and link errors.
+/// Declare partial template specializations of emit() methods that
+/// already have default implementations.  Without this, there is the
+/// possibility of ODR violations and link errors.
 template <> void InstX8632Addss::emit(const Cfg *Func) const;
 template <> void InstX8632Blendvps::emit(const Cfg *Func) const;
 template <> void InstX8632Cbwdq::emit(const Cfg *Func) const;
diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp
index 307745e..b1ca40d 100644
--- a/src/IceIntrinsics.cpp
+++ b/src/IceIntrinsics.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Intrinsics utilities for matching and
-// then dispatching by name.
-//
+///
+/// \file
+/// This file implements the Intrinsics utilities for matching and
+/// then dispatching by name.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceIntrinsics.h"
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 938243f..674c9ab 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the kinds of intrinsics supported by PNaCl.
-//
+///
+/// \file
+/// This file declares the kinds of intrinsics supported by PNaCl.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEINTRINSICS_H
@@ -31,9 +32,9 @@
   Intrinsics();
   ~Intrinsics();
 
-  // Some intrinsics allow overloading by type. This enum collapses all
-  // overloads into a single ID, but the type can still be recovered by the
-  // type of the intrinsic function call's return value and parameters.
+  /// Some intrinsics allow overloading by type. This enum collapses all
+  /// overloads into a single ID, but the type can still be recovered by the
+  /// type of the intrinsic function call's return value and parameters.
   enum IntrinsicID {
     UnknownIntrinsic = 0,
     // Arbitrary (alphabetical) order.
@@ -92,9 +93,9 @@
     MemoryOrderNum // Invalid, keep last.
   };
 
-  // Verify memory ordering rules for atomic intrinsics.  For
-  // AtomicCmpxchg, Order is the "success" ordering and OrderOther is
-  // the "failure" ordering.  Returns true if valid, false if invalid.
+  /// Verify memory ordering rules for atomic intrinsics.  For
+  /// AtomicCmpxchg, Order is the "success" ordering and OrderOther is
+  /// the "failure" ordering.  Returns true if valid, false if invalid.
   // TODO(stichnot,kschimpf): Perform memory order validation in the
   // bitcode reader/parser, allowing LLVM and Subzero to share.  See
   // https://code.google.com/p/nativeclient/issues/detail?id=4126 .
@@ -105,61 +106,61 @@
 
   enum ReturnsTwice { ReturnsTwice_F = 0, ReturnsTwice_T = 1 };
 
-  // Basic attributes related to each intrinsic, that are relevant to
-  // code generation. Perhaps the attributes representation can be shared
-  // with general function calls, but PNaCl currently strips all
-  // attributes from functions.
+  /// Basic attributes related to each intrinsic, that are relevant to
+  /// code generation. Perhaps the attributes representation can be shared
+  /// with general function calls, but PNaCl currently strips all
+  /// attributes from functions.
   struct IntrinsicInfo {
     enum IntrinsicID ID : 30;
     enum SideEffects HasSideEffects : 1;
     enum ReturnsTwice ReturnsTwice : 1;
   };
 
-  // The types of validation values for FullIntrinsicInfo.validateCall.
+  /// The types of validation values for FullIntrinsicInfo.validateCall.
   enum ValidateCallValue {
-    IsValidCall,      // Valid use of instrinsic call.
-    BadReturnType,    // Return type invalid for intrinsic.
-    WrongNumOfArgs,   // Wrong number of arguments for intrinsic.
-    WrongCallArgType, // Argument of wrong type.
+    IsValidCall,      /// Valid use of instrinsic call.
+    BadReturnType,    /// Return type invalid for intrinsic.
+    WrongNumOfArgs,   /// Wrong number of arguments for intrinsic.
+    WrongCallArgType, /// Argument of wrong type.
   };
 
-  // The complete set of information about an intrinsic.
+  /// The complete set of information about an intrinsic.
   struct FullIntrinsicInfo {
-    struct IntrinsicInfo Info; // Information that CodeGen would care about.
+    struct IntrinsicInfo Info; /// Information that CodeGen would care about.
 
     // Sanity check during parsing.
     Type Signature[kMaxIntrinsicParameters];
     uint8_t NumTypes;
 
-    // Validates that type signature of call matches intrinsic.
-    // If WrongArgumentType is returned, ArgIndex is set to corresponding
-    // argument index.
+    /// Validates that type signature of call matches intrinsic.
+    /// If WrongArgumentType is returned, ArgIndex is set to corresponding
+    /// argument index.
     ValidateCallValue validateCall(const Ice::InstCall *Call,
                                    SizeT &ArgIndex) const;
 
-    // Returns the return type of the intrinsic.
+    /// Returns the return type of the intrinsic.
     Type getReturnType() const {
       assert(NumTypes > 1);
       return Signature[0];
     }
 
-    // Returns number of arguments expected.
+    /// Returns number of arguments expected.
     SizeT getNumArgs() const {
       assert(NumTypes > 1);
       return NumTypes - 1;
     }
 
-    // Returns type of Index-th argument.
+    /// Returns type of Index-th argument.
     Type getArgType(SizeT Index) const;
   };
 
-  // Find the information about a given intrinsic, based on function name.  If
-  // the function name does not have the common "llvm." prefix, nullptr is
-  // returned and Error is set to false.  Otherwise, tries to find a reference
-  // to a FullIntrinsicInfo entry (valid for the lifetime of the map).  If
-  // found, sets Error to false and returns the reference.  If not found, sets
-  // Error to true and returns nullptr (indicating an unknown "llvm.foo"
-  // intrinsic).
+  /// Find the information about a given intrinsic, based on function name.  If
+  /// the function name does not have the common "llvm." prefix, nullptr is
+  /// returned and Error is set to false.  Otherwise, tries to find a reference
+  /// to a FullIntrinsicInfo entry (valid for the lifetime of the map).  If
+  /// found, sets Error to false and returns the reference.  If not found, sets
+  /// Error to true and returns nullptr (indicating an unknown "llvm.foo"
+  /// intrinsic).
   const FullIntrinsicInfo *find(const IceString &Name, bool &Error) const;
 
 private:
diff --git a/src/IceLiveness.cpp b/src/IceLiveness.cpp
index f3c4516..551a400 100644
--- a/src/IceLiveness.cpp
+++ b/src/IceLiveness.cpp
@@ -6,17 +6,18 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file provides some of the support for the Liveness class.  In
-// particular, it handles the sparsity representation of the mapping
-// between Variables and CfgNodes.  The idea is that since most
-// variables are used only within a single basic block, we can
-// partition the variables into "local" and "global" sets.  Instead of
-// sizing and indexing vectors according to Variable::Number, we
-// create a mapping such that global variables are mapped to low
-// indexes that are common across nodes, and local variables are
-// mapped to a higher index space that is shared across nodes.
-//
+///
+/// \file
+/// This file provides some of the support for the Liveness class.  In
+/// particular, it handles the sparsity representation of the mapping
+/// between Variables and CfgNodes.  The idea is that since most
+/// variables are used only within a single basic block, we can
+/// partition the variables into "local" and "global" sets.  Instead of
+/// sizing and indexing vectors according to Variable::Number, we
+/// create a mapping such that global variables are mapped to low
+/// indexes that are common across nodes, and local variables are
+/// mapped to a higher index space that is shared across nodes.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceLiveness.h"
diff --git a/src/IceLiveness.h b/src/IceLiveness.h
index 03c63ad..7fcad25 100644
--- a/src/IceLiveness.h
+++ b/src/IceLiveness.h
@@ -6,15 +6,16 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the Liveness and LivenessNode classes,
-// which are used for liveness analysis.  The node-specific
-// information tracked for each Variable includes whether it is
-// live on entry, whether it is live on exit, the instruction number
-// that starts its live range, and the instruction number that ends
-// its live range.  At the Cfg level, the actual live intervals are
-// recorded.
-//
+///
+/// \file
+/// This file declares the Liveness and LivenessNode classes,
+/// which are used for liveness analysis.  The node-specific
+/// information tracked for each Variable includes whether it is
+/// live on entry, whether it is live on exit, the instruction number
+/// that starts its live range, and the instruction number that ends
+/// its live range.  At the Cfg level, the actual live intervals are
+/// recorded.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICELIVENESS_H
@@ -37,12 +38,12 @@
   public:
     LivenessNode() = default;
     LivenessNode(const LivenessNode &) = default;
-    // NumLocals is the number of Variables local to this block.
+    /// NumLocals is the number of Variables local to this block.
     SizeT NumLocals = 0;
-    // NumNonDeadPhis tracks the number of Phi instructions that
-    // Inst::liveness() identified as tentatively live.  If
-    // NumNonDeadPhis changes from the last liveness pass, then liveness
-    // has not yet converged.
+    /// NumNonDeadPhis tracks the number of Phi instructions that
+    /// Inst::liveness() identified as tentatively live.  If
+    /// NumNonDeadPhis changes from the last liveness pass, then liveness
+    /// has not yet converged.
     SizeT NumNonDeadPhis = 0;
     // LiveToVarMap maps a liveness bitvector index to a Variable.  This
     // is generally just for printing/dumping.  The index should be less
@@ -95,7 +96,7 @@
   }
 
 private:
-  // Resize Nodes so that Nodes[Index] is valid.
+  /// Resize Nodes so that Nodes[Index] is valid.
   void resize(SizeT Index) {
     if (Index >= Nodes.size())
       Nodes.resize(Index + 1);
@@ -103,13 +104,13 @@
   Cfg *Func;
   LivenessMode Mode;
   SizeT NumGlobals = 0;
-  // Size of Nodes is Cfg::Nodes.size().
+  /// Size of Nodes is Cfg::Nodes.size().
   std::vector<LivenessNode> Nodes;
-  // VarToLiveMap maps a Variable's Variable::Number to its live index
-  // within its basic block.
+  /// VarToLiveMap maps a Variable's Variable::Number to its live index
+  /// within its basic block.
   std::vector<SizeT> VarToLiveMap;
-  // LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for
-  // non-local variables.
+  /// LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for
+  /// non-local variables.
   std::vector<Variable *> LiveToVarMap;
 };
 
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index 60a73d9..0d739ca 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the Operand class and its target-independent
-// subclasses, primarily for the methods of the Variable class.
-//
+///
+/// \file
+/// This file implements the Operand class and its target-independent
+/// subclasses, primarily for the methods of the Variable class.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceOperand.h"
diff --git a/src/IceOperand.h b/src/IceOperand.h
index b43e2a8..8db3cf1 100644
--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -6,13 +6,14 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the Operand class and its target-independent
-// subclasses.  The main classes are Variable, which represents an
-// LLVM variable that is either register- or stack-allocated, and the
-// Constant hierarchy, which represents integer, floating-point,
-// and/or symbolic constants.
-//
+///
+/// \file
+/// This file declares the Operand class and its target-independent
+/// subclasses.  The main classes are Variable, which represents an
+/// LLVM variable that is either register- or stack-allocated, and the
+/// Constant hierarchy, which represents integer, floating-point,
+/// and/or symbolic constants.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEOPERAND_H
@@ -55,18 +56,22 @@
   OperandKind getKind() const { return Kind; }
   Type getType() const { return Ty; }
 
-  // Every Operand keeps an array of the Variables referenced in
-  // the operand.  This is so that the liveness operations can get
-  // quick access to the variables of interest, without having to dig
-  // so far into the operand.
+  /// Every Operand keeps an array of the Variables referenced in
+  /// the operand.  This is so that the liveness operations can get
+  /// quick access to the variables of interest, without having to dig
+  /// so far into the operand.
   SizeT getNumVars() const { return NumVars; }
   Variable *getVar(SizeT I) const {
     assert(I < getNumVars());
     return Vars[I];
   }
   virtual void emit(const Cfg *Func) const = 0;
-  // The dump(Func,Str) implementation must be sure to handle the
-  // situation where Func==nullptr.
+
+  /// \name Dumping functions.
+  /// @{
+
+  /// The dump(Func,Str) implementation must be sure to handle the
+  /// situation where Func==nullptr.
   virtual void dump(const Cfg *Func, Ostream &Str) const = 0;
   void dump(const Cfg *Func) const {
     if (!BuildDefs::dump())
@@ -78,6 +83,7 @@
     if (BuildDefs::dump())
       dump(nullptr, Str);
   }
+  /// @}
 
 protected:
   Operand(OperandKind Kind, Type Ty) : Ty(Ty), Kind(Kind) {}
@@ -85,7 +91,7 @@
 
   const Type Ty;
   const OperandKind Kind;
-  // Vars and NumVars are initialized by the derived class.
+  /// Vars and NumVars are initialized by the derived class.
   SizeT NumVars = 0;
   Variable **Vars = nullptr;
 };
@@ -96,8 +102,8 @@
   return Str;
 }
 
-// Constant is the abstract base class for constants.  All
-// constants are allocated from a global arena and are pooled.
+/// Constant is the abstract base class for constants.  All
+/// constants are allocated from a global arena and are pooled.
 class Constant : public Operand {
   Constant() = delete;
   Constant(const Constant &) = delete;
@@ -115,9 +121,9 @@
     return Kind >= kConst_Base && Kind <= kConst_Num;
   }
 
-  // Judge if this given immediate should be randomized or pooled
-  // By default should return false, only constant integers should
-  // truly go through this method.
+  /// Judge if this given immediate should be randomized or pooled
+  /// By default should return false, only constant integers should
+  /// truly go through this method.
   virtual bool shouldBeRandomizedOrPooled(const GlobalContext *Ctx) {
     (void)Ctx;
     return false;
@@ -133,16 +139,16 @@
     Vars = nullptr;
     NumVars = 0;
   }
-  // PoolEntryID is an integer that uniquely identifies the constant
-  // within its constant pool.  It is used for building the constant
-  // pool in the object code and for referencing its entries.
+  /// PoolEntryID is an integer that uniquely identifies the constant
+  /// within its constant pool.  It is used for building the constant
+  /// pool in the object code and for referencing its entries.
   const uint32_t PoolEntryID;
-  // Whether we should pool this constant. Usually Float/Double and pooled
-  // Integers should be flagged true.
+  /// Whether we should pool this constant. Usually Float/Double and pooled
+  /// Integers should be flagged true.
   bool shouldBePooled;
 };
 
-// ConstantPrimitive<> wraps a primitive type.
+/// ConstantPrimitive<> wraps a primitive type.
 template <typename T, Operand::OperandKind K>
 class ConstantPrimitive : public Constant {
   ConstantPrimitive() = delete;
@@ -198,7 +204,7 @@
     Str << static_cast<int32_t>(getValue());
 }
 
-// Specialization of the template member function for ConstantInteger32
+/// Specialization of the template member function for ConstantInteger32
 template <>
 bool ConstantInteger32::shouldBeRandomizedOrPooled(const GlobalContext *Ctx);
 
@@ -210,10 +216,10 @@
   Str << static_cast<int64_t>(getValue());
 }
 
-// RelocatableTuple bundles the parameters that are used to
-// construct an ConstantRelocatable.  It is done this way so that
-// ConstantRelocatable can fit into the global constant pool
-// template mechanism.
+/// RelocatableTuple bundles the parameters that are used to
+/// construct an ConstantRelocatable.  It is done this way so that
+/// ConstantRelocatable can fit into the global constant pool
+/// template mechanism.
 class RelocatableTuple {
   RelocatableTuple() = delete;
   RelocatableTuple &operator=(const RelocatableTuple &) = delete;
@@ -231,8 +237,8 @@
 
 bool operator==(const RelocatableTuple &A, const RelocatableTuple &B);
 
-// ConstantRelocatable represents a symbolic constant combined with
-// a fixed offset.
+/// ConstantRelocatable represents a symbolic constant combined with
+/// a fixed offset.
 class ConstantRelocatable : public Constant {
   ConstantRelocatable() = delete;
   ConstantRelocatable(const ConstantRelocatable &) = delete;
@@ -268,14 +274,14 @@
                       bool SuppressMangling, uint32_t PoolEntryID)
       : Constant(kConstRelocatable, Ty, PoolEntryID), Offset(Offset),
         Name(Name), SuppressMangling(SuppressMangling) {}
-  const RelocOffsetT Offset; // fixed offset to add
-  const IceString Name;      // optional for debug/dump
+  const RelocOffsetT Offset; /// fixed offset to add
+  const IceString Name;      /// optional for debug/dump
   bool SuppressMangling;
 };
 
-// ConstantUndef represents an unspecified bit pattern. Although it is
-// legal to lower ConstantUndef to any value, backends should try to
-// make code generation deterministic by lowering ConstantUndefs to 0.
+/// ConstantUndef represents an unspecified bit pattern. Although it is
+/// legal to lower ConstantUndef to any value, backends should try to
+/// make code generation deterministic by lowering ConstantUndefs to 0.
 class ConstantUndef : public Constant {
   ConstantUndef() = delete;
   ConstantUndef(const ConstantUndef &) = delete;
@@ -306,17 +312,17 @@
       : Constant(kConstUndef, Ty, PoolEntryID) {}
 };
 
-// RegWeight is a wrapper for a uint32_t weight value, with a
-// special value that represents infinite weight, and an addWeight()
-// method that ensures that W+infinity=infinity.
+/// RegWeight is a wrapper for a uint32_t weight value, with a
+/// special value that represents infinite weight, and an addWeight()
+/// method that ensures that W+infinity=infinity.
 class RegWeight {
 public:
   RegWeight() = default;
   explicit RegWeight(uint32_t Weight) : Weight(Weight) {}
   RegWeight(const RegWeight &) = default;
   RegWeight &operator=(const RegWeight &) = default;
-  const static uint32_t Inf = ~0; // Force regalloc to give a register
-  const static uint32_t Zero = 0; // Force regalloc NOT to give a register
+  const static uint32_t Inf = ~0; /// Force regalloc to give a register
+  const static uint32_t Zero = 0; /// Force regalloc NOT to give a register
   void addWeight(uint32_t Delta) {
     if (Delta == Inf)
       Weight = Inf;
@@ -337,17 +343,17 @@
 bool operator<=(const RegWeight &A, const RegWeight &B);
 bool operator==(const RegWeight &A, const RegWeight &B);
 
-// LiveRange is a set of instruction number intervals representing
-// a variable's live range.  Generally there is one interval per basic
-// block where the variable is live, but adjacent intervals get
-// coalesced into a single interval.  LiveRange also includes a
-// weight, in case e.g. we want a live range to have higher weight
-// inside a loop.
+/// LiveRange is a set of instruction number intervals representing
+/// a variable's live range.  Generally there is one interval per basic
+/// block where the variable is live, but adjacent intervals get
+/// coalesced into a single interval.  LiveRange also includes a
+/// weight, in case e.g. we want a live range to have higher weight
+/// inside a loop.
 class LiveRange {
 public:
   LiveRange() = default;
-  // Special constructor for building a kill set.  The advantage is
-  // that we can reserve the right amount of space in advance.
+  /// Special constructor for building a kill set.  The advantage is
+  /// that we can reserve the right amount of space in advance.
   explicit LiveRange(const std::vector<InstNumberT> &Kills) {
     Range.reserve(Kills.size());
     for (InstNumberT I : Kills)
@@ -382,27 +388,27 @@
 
 private:
   typedef std::pair<InstNumberT, InstNumberT> RangeElementType;
-  // RangeType is arena-allocated from the Cfg's allocator.
+  /// RangeType is arena-allocated from the Cfg's allocator.
   typedef std::vector<RangeElementType, CfgLocalAllocator<RangeElementType>>
       RangeType;
   RangeType Range;
   RegWeight Weight = RegWeight(0);
-  // TrimmedBegin is an optimization for the overlaps() computation.
-  // Since the linear-scan algorithm always calls it as overlaps(Cur)
-  // and Cur advances monotonically according to live range start, we
-  // can optimize overlaps() by ignoring all segments that end before
-  // the start of Cur's range.  The linear-scan code enables this by
-  // calling trim() on the ranges of interest as Cur advances.  Note
-  // that linear-scan also has to initialize TrimmedBegin at the
-  // beginning by calling untrim().
+  /// TrimmedBegin is an optimization for the overlaps() computation.
+  /// Since the linear-scan algorithm always calls it as overlaps(Cur)
+  /// and Cur advances monotonically according to live range start, we
+  /// can optimize overlaps() by ignoring all segments that end before
+  /// the start of Cur's range.  The linear-scan code enables this by
+  /// calling trim() on the ranges of interest as Cur advances.  Note
+  /// that linear-scan also has to initialize TrimmedBegin at the
+  /// beginning by calling untrim().
   RangeType::const_iterator TrimmedBegin;
 };
 
 Ostream &operator<<(Ostream &Str, const LiveRange &L);
 
-// Variable represents an operand that is register-allocated or
-// stack-allocated.  If it is register-allocated, it will ultimately
-// have a non-negative RegNum field.
+/// Variable represents an operand that is register-allocated or
+/// stack-allocated.  If it is register-allocated, it will ultimately
+/// have a non-negative RegNum field.
 class Variable : public Operand {
   Variable() = delete;
   Variable(const Variable &) = delete;
@@ -486,11 +492,11 @@
     LoVar = Lo;
     HiVar = Hi;
   }
-  // Creates a temporary copy of the variable with a different type.
-  // Used primarily for syntactic correctness of textual assembly
-  // emission.  Note that only basic information is copied, in
-  // particular not IsArgument, IsImplicitArgument, IgnoreLiveness,
-  // RegNumTmp, Weight, Live, LoVar, HiVar, VarsReal.
+  /// Creates a temporary copy of the variable with a different type.
+  /// Used primarily for syntactic correctness of textual assembly
+  /// emission.  Note that only basic information is copied, in
+  /// particular not IsArgument, IsImplicitArgument, IgnoreLiveness,
+  /// RegNumTmp, Weight, Live, LoVar, HiVar, VarsReal.
   Variable *asType(Type Ty);
 
   void emit(const Cfg *Func) const override;
@@ -509,23 +515,23 @@
     Vars[0] = this;
     NumVars = 1;
   }
-  // Number is unique across all variables, and is used as a
-  // (bit)vector index for liveness analysis.
+  /// Number is unique across all variables, and is used as a
+  /// (bit)vector index for liveness analysis.
   const SizeT Number;
   Cfg::IdentifierIndexType NameIndex = Cfg::IdentifierIndexInvalid;
   bool IsArgument = false;
   bool IsImplicitArgument = false;
-  // IgnoreLiveness means that the variable should be ignored when
-  // constructing and validating live ranges.  This is usually
-  // reserved for the stack pointer.
+  /// IgnoreLiveness means that the variable should be ignored when
+  /// constructing and validating live ranges.  This is usually
+  /// reserved for the stack pointer.
   bool IgnoreLiveness = false;
-  // StackOffset is the canonical location on stack (only if
-  // RegNum==NoRegister || IsArgument).
+  /// StackOffset is the canonical location on stack (only if
+  /// RegNum==NoRegister || IsArgument).
   int32_t StackOffset = 0;
-  // RegNum is the allocated register, or NoRegister if it isn't
-  // register-allocated.
+  /// RegNum is the allocated register, or NoRegister if it isn't
+  /// register-allocated.
   int32_t RegNum = NoRegister;
-  // RegNumTmp is the tentative assignment during register allocation.
+  /// RegNumTmp is the tentative assignment during register allocation.
   int32_t RegNumTmp = NoRegister;
   RegWeight Weight = RegWeight(1); // Register allocation priority
   LiveRange Live;
@@ -538,20 +544,20 @@
   // wasteful for a 64-bit target.
   Variable *LoVar = nullptr;
   Variable *HiVar = nullptr;
-  // VarsReal (and Operand::Vars) are set up such that Vars[0] ==
-  // this.
+  /// VarsReal (and Operand::Vars) are set up such that Vars[0] ==
+  /// this.
   Variable *VarsReal[1];
 };
 
 enum MetadataKind {
-  VMK_Uses,       // Track only uses, not defs
-  VMK_SingleDefs, // Track uses+defs, but only record single def
-  VMK_All         // Track uses+defs, including full def list
+  VMK_Uses,       /// Track only uses, not defs
+  VMK_SingleDefs, /// Track uses+defs, but only record single def
+  VMK_All         /// Track uses+defs, including full def list
 };
 typedef std::vector<const Inst *, CfgLocalAllocator<const Inst *>> InstDefList;
 
-// VariableTracking tracks the metadata for a single variable.  It is
-// only meant to be used internally by VariablesMetadata.
+/// VariableTracking tracks the metadata for a single variable.  It is
+/// only meant to be used internally by VariablesMetadata.
 class VariableTracking {
   VariableTracking &operator=(const VariableTracking &) = delete;
 
@@ -582,15 +588,15 @@
   MultiBlockState MultiBlock = MBS_Unknown;
   const CfgNode *SingleUseNode = nullptr;
   const CfgNode *SingleDefNode = nullptr;
-  // All definitions of the variable are collected here, in increasing
-  // order of instruction number.
-  InstDefList Definitions; // Only used if Kind==VMK_All
+  /// All definitions of the variable are collected here, in increasing
+  /// order of instruction number.
+  InstDefList Definitions; /// Only used if Kind==VMK_All
   const Inst *FirstOrSingleDefinition =
-      nullptr; // Is a copy of Definitions[0] if Kind==VMK_All
+      nullptr; /// Is a copy of Definitions[0] if Kind==VMK_All
 };
 
-// VariablesMetadata analyzes and summarizes the metadata for the
-// complete set of Variables.
+/// VariablesMetadata analyzes and summarizes the metadata for the
+/// complete set of Variables.
 class VariablesMetadata {
   VariablesMetadata() = delete;
   VariablesMetadata(const VariablesMetadata &) = delete;
@@ -598,47 +604,47 @@
 
 public:
   explicit VariablesMetadata(const Cfg *Func) : Func(Func) {}
-  // Initialize the state by traversing all instructions/variables in
-  // the CFG.
+  /// Initialize the state by traversing all instructions/variables in
+  /// the CFG.
   void init(MetadataKind TrackingKind);
-  // Add a single node.  This is called by init(), and can be called
-  // incrementally from elsewhere, e.g. after edge-splitting.
+  /// Add a single node.  This is called by init(), and can be called
+  /// incrementally from elsewhere, e.g. after edge-splitting.
   void addNode(CfgNode *Node);
-  // Returns whether the given Variable is tracked in this object.  It
-  // should only return false if changes were made to the CFG after
-  // running init(), in which case the state is stale and the results
-  // shouldn't be trusted (but it may be OK e.g. for dumping).
+  /// Returns whether the given Variable is tracked in this object.  It
+  /// should only return false if changes were made to the CFG after
+  /// running init(), in which case the state is stale and the results
+  /// shouldn't be trusted (but it may be OK e.g. for dumping).
   bool isTracked(const Variable *Var) const {
     return Var->getIndex() < Metadata.size();
   }
 
-  // Returns whether the given Variable has multiple definitions.
+  /// Returns whether the given Variable has multiple definitions.
   bool isMultiDef(const Variable *Var) const;
-  // Returns the first definition instruction of the given Variable.
-  // This is only valid for variables whose definitions are all within
-  // the same block, e.g. T after the lowered sequence "T=B; T+=C;
-  // A=T", for which getFirstDefinition(T) would return the "T=B"
-  // instruction.  For variables with definitions span multiple
-  // blocks, nullptr is returned.
+  /// Returns the first definition instruction of the given Variable.
+  /// This is only valid for variables whose definitions are all within
+  /// the same block, e.g. T after the lowered sequence "T=B; T+=C;
+  /// A=T", for which getFirstDefinition(T) would return the "T=B"
+  /// instruction.  For variables with definitions span multiple
+  /// blocks, nullptr is returned.
   const Inst *getFirstDefinition(const Variable *Var) const;
-  // Returns the definition instruction of the given Variable, when
-  // the variable has exactly one definition.  Otherwise, nullptr is
-  // returned.
+  /// Returns the definition instruction of the given Variable, when
+  /// the variable has exactly one definition.  Otherwise, nullptr is
+  /// returned.
   const Inst *getSingleDefinition(const Variable *Var) const;
-  // Returns the list of all definition instructions of the given
-  // Variable.
+  /// Returns the list of all definition instructions of the given
+  /// Variable.
   const InstDefList &getLatterDefinitions(const Variable *Var) const;
 
-  // Returns whether the given Variable is live across multiple
-  // blocks.  Mainly, this is used to partition Variables into
-  // single-block versus multi-block sets for leveraging sparsity in
-  // liveness analysis, and for implementing simple stack slot
-  // coalescing.  As a special case, function arguments are always
-  // considered multi-block because they are live coming into the
-  // entry block.
+  /// Returns whether the given Variable is live across multiple
+  /// blocks.  Mainly, this is used to partition Variables into
+  /// single-block versus multi-block sets for leveraging sparsity in
+  /// liveness analysis, and for implementing simple stack slot
+  /// coalescing.  As a special case, function arguments are always
+  /// considered multi-block because they are live coming into the
+  /// entry block.
   bool isMultiBlock(const Variable *Var) const;
-  // Returns the node that the given Variable is used in, assuming
-  // isMultiBlock() returns false.  Otherwise, nullptr is returned.
+  /// Returns the node that the given Variable is used in, assuming
+  /// isMultiBlock() returns false.  Otherwise, nullptr is returned.
   const CfgNode *getLocalUseNode(const Variable *Var) const;
 
 private:
diff --git a/src/IceRNG.cpp b/src/IceRNG.cpp
index e3fa8c5..a6b9adf 100644
--- a/src/IceRNG.cpp
+++ b/src/IceRNG.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the random number generator.
-//
+///
+/// \file
+/// This file implements the random number generator.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceRNG.h"
diff --git a/src/IceRNG.h b/src/IceRNG.h
index 83001ad..5ddd97f 100644
--- a/src/IceRNG.h
+++ b/src/IceRNG.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares a random number generator.
-//
+///
+/// \file
+/// This file declares a random number generator.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICERNG_H
@@ -33,9 +34,9 @@
   uint64_t State;
 };
 
-// This class adds additional random number generator utilities. The
-// reason for the wrapper class is that we want to keep the
-// RandomNumberGenerator interface identical to LLVM's.
+/// This class adds additional random number generator utilities. The
+/// reason for the wrapper class is that we want to keep the
+/// RandomNumberGenerator interface identical to LLVM's.
 class RandomNumberGeneratorWrapper {
   RandomNumberGeneratorWrapper() = delete;
   RandomNumberGeneratorWrapper(const RandomNumberGeneratorWrapper &) = delete;
@@ -52,9 +53,9 @@
   RandomNumberGenerator &RNG;
 };
 
-// RandomShuffle is an implementation of std::random_shuffle() that
-// doesn't change across stdlib implementations.  Adapted from a
-// sample implementation at cppreference.com.
+/// RandomShuffle is an implementation of std::random_shuffle() that
+/// doesn't change across stdlib implementations.  Adapted from a
+/// sample implementation at cppreference.com.
 template <class RandomIt, class RandomFunc>
 void RandomShuffle(RandomIt First, RandomIt Last, RandomFunc &&RNG) {
   for (auto i = Last - First - 1; i > 0; --i)
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 1478b0f..400cb7d 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the LinearScan class, which performs the
-// linear-scan register allocation after liveness analysis has been
-// performed.
-//
+///
+/// \file
+/// This file implements the LinearScan class, which performs the
+/// linear-scan register allocation after liveness analysis has been
+/// performed.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceRegAlloc.h"
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
index 1658a7d..89ae715 100644
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the LinearScan data structure used during
-// linear-scan register allocation, which holds the various work
-// queues for the linear-scan algorithm.
-//
+///
+/// \file
+/// This file declares the LinearScan data structure used during
+/// linear-scan register allocation, which holds the various work
+/// queues for the linear-scan algorithm.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEREGALLOC_H
@@ -38,12 +39,12 @@
 
   void initForGlobal();
   void initForInfOnly();
-  // Move an item from the From set to the To set.  From[Index] is
-  // pushed onto the end of To[], then the item is efficiently removed
-  // from From[] by effectively swapping it with the last item in
-  // From[] and then popping it from the back.  As such, the caller is
-  // best off iterating over From[] in reverse order to avoid the need
-  // for special handling of the iterator.
+  /// Move an item from the From set to the To set.  From[Index] is
+  /// pushed onto the end of To[], then the item is efficiently removed
+  /// from From[] by effectively swapping it with the last item in
+  /// From[] and then popping it from the back.  As such, the caller is
+  /// best off iterating over From[] in reverse order to avoid the need
+  /// for special handling of the iterator.
   void moveItem(UnorderedRanges &From, SizeT Index, UnorderedRanges &To) {
     To.push_back(From[Index]);
     From[Index] = From.back();
@@ -52,8 +53,8 @@
 
   Cfg *const Func;
   OrderedRanges Unhandled;
-  // UnhandledPrecolored is a subset of Unhandled, specially collected
-  // for faster processing.
+  /// UnhandledPrecolored is a subset of Unhandled, specially collected
+  /// for faster processing.
   OrderedRanges UnhandledPrecolored;
   UnorderedRanges Active, Inactive, Handled;
   std::vector<InstNumberT> Kills;
diff --git a/src/IceRegistersARM32.h b/src/IceRegistersARM32.h
index 2ad1c8b..39efd7f 100644
--- a/src/IceRegistersARM32.h
+++ b/src/IceRegistersARM32.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the registers and their encodings for ARM32.
-//
+///
+/// \file
+/// This file declares the registers and their encodings for ARM32.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEREGISTERSARM32_H
@@ -22,8 +23,8 @@
 
 namespace RegARM32 {
 
-// An enum of every register. The enum value may not match the encoding
-// used to binary encode register operands in instructions.
+/// An enum of every register. The enum value may not match the encoding
+/// used to binary encode register operands in instructions.
 enum AllRegisters {
 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
           isFP)                                                                \
@@ -36,8 +37,8 @@
 #undef X
 };
 
-// An enum of GPR Registers. The enum value does match the encoding used
-// to binary encode register operands in instructions.
+/// An enum of GPR Registers. The enum value does match the encoding used
+/// to binary encode register operands in instructions.
 enum GPRRegister {
 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
           isFP)                                                                \
diff --git a/src/IceRegistersMIPS32.h b/src/IceRegistersMIPS32.h
index d2180bf..8c75488 100644
--- a/src/IceRegistersMIPS32.h
+++ b/src/IceRegistersMIPS32.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the registers and their encodings for MIPS32.
-//
+///
+/// \file
+/// This file declares the registers and their encodings for MIPS32.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEREGISTERSMIPS32_H
@@ -22,8 +23,8 @@
 
 namespace RegMIPS32 {
 
-// An enum of every register. The enum value may not match the encoding
-// used to binary encode register operands in instructions.
+/// An enum of every register. The enum value may not match the encoding
+/// used to binary encode register operands in instructions.
 enum AllRegisters {
 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
           isFP)                                                                \
@@ -36,8 +37,8 @@
 #undef X
 };
 
-// An enum of GPR Registers. The enum value does match the encoding used
-// to binary encode register operands in instructions.
+/// An enum of GPR Registers. The enum value does match the encoding used
+/// to binary encode register operands in instructions.
 enum GPRRegister {
 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
           isFP)                                                                \
diff --git a/src/IceRegistersX8632.h b/src/IceRegistersX8632.h
index d7bfd6f..7cce661 100644
--- a/src/IceRegistersX8632.h
+++ b/src/IceRegistersX8632.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the registers and their encodings for x86-32.
-//
+///
+/// \file
+/// This file declares the registers and their encodings for x86-32.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEREGISTERSX8632_H
@@ -22,8 +23,8 @@
 
 class RegX8632 {
 public:
-  // An enum of every register. The enum value may not match the encoding
-  // used to binary encode register operands in instructions.
+  /// An enum of every register. The enum value may not match the encoding
+  /// used to binary encode register operands in instructions.
   enum AllRegisters {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
@@ -36,8 +37,8 @@
 #undef X
   };
 
-  // An enum of GPR Registers. The enum value does match the encoding used
-  // to binary encode register operands in instructions.
+  /// An enum of GPR Registers. The enum value does match the encoding used
+  /// to binary encode register operands in instructions.
   enum GPRRegister {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
@@ -47,8 +48,8 @@
         Encoded_Not_GPR = -1
   };
 
-  // An enum of XMM Registers. The enum value does match the encoding used
-  // to binary encode register operands in instructions.
+  /// An enum of XMM Registers. The enum value does match the encoding used
+  /// to binary encode register operands in instructions.
   enum XmmRegister {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
@@ -58,8 +59,8 @@
         Encoded_Not_Xmm = -1
   };
 
-  // An enum of Byte Registers. The enum value does match the encoding used
-  // to binary encode register operands in instructions.
+  /// An enum of Byte Registers. The enum value does match the encoding used
+  /// to binary encode register operands in instructions.
   enum ByteRegister {
 #define X(val, encode) Encoded_##val encode,
     REGX8632_BYTEREG_TABLE
@@ -67,8 +68,8 @@
         Encoded_Not_ByteReg = -1
   };
 
-  // An enum of X87 Stack Registers. The enum value does match the encoding used
-  // to binary encode register operands in instructions.
+  /// An enum of X87 Stack Registers. The enum value does match the encoding used
+  /// to binary encode register operands in instructions.
   enum X87STRegister {
 #define X(val, encode, name) Encoded_##val encode,
     X87ST_REGX8632_TABLE
diff --git a/src/IceRegistersX8664.h b/src/IceRegistersX8664.h
index ceafc86..3e4c868 100644
--- a/src/IceRegistersX8664.h
+++ b/src/IceRegistersX8664.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the registers and their encodings for x86-64.
-//
+///
+/// \file
+/// This file declares the registers and their encodings for x86-64.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEREGISTERSX8664_H
@@ -22,8 +23,8 @@
 
 class RegX8664 {
 public:
-  // An enum of every register. The enum value may not match the encoding
-  // used to binary encode register operands in instructions.
+  /// An enum of every register. The enum value may not match the encoding
+  /// used to binary encode register operands in instructions.
   enum AllRegisters {
 #define X(val, encode, name64, name, name16, name8, scratch, preserved,        \
           stackptr, frameptr, isInt, isFP)                                     \
@@ -36,8 +37,8 @@
 #undef X
   };
 
-  // An enum of GPR Registers. The enum value does match the encoding used
-  // to binary encode register operands in instructions.
+  /// An enum of GPR Registers. The enum value does match the encoding used
+  /// to binary encode register operands in instructions.
   enum GPRRegister {
 #define X(val, encode, name64, name, name16, name8, scratch, preserved,        \
           stackptr, frameptr, isInt, isFP)                                     \
@@ -47,8 +48,8 @@
         Encoded_Not_GPR = -1
   };
 
-  // An enum of XMM Registers. The enum value does match the encoding used
-  // to binary encode register operands in instructions.
+  /// An enum of XMM Registers. The enum value does match the encoding used
+  /// to binary encode register operands in instructions.
   enum XmmRegister {
 #define X(val, encode, name64, name, name16, name8, scratch, preserved,        \
           stackptr, frameptr, isInt, isFP)                                     \
@@ -58,8 +59,8 @@
         Encoded_Not_Xmm = -1
   };
 
-  // An enum of Byte Registers. The enum value does match the encoding used
-  // to binary encode register operands in instructions.
+  /// An enum of Byte Registers. The enum value does match the encoding used
+  /// to binary encode register operands in instructions.
   enum ByteRegister {
 #define X(val, encode) Encoded_##val encode,
     REGX8664_BYTEREG_TABLE
diff --git a/src/IceTLS.h b/src/IceTLS.h
index ee1fe24..6837668 100644
--- a/src/IceTLS.h
+++ b/src/IceTLS.h
@@ -6,12 +6,13 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines macros for working around the lack of support for
-// thread_local in MacOS 10.6.  It assumes std::thread is written in
-// terms of pthread.  Define ICE_THREAD_LOCAL_HACK to enable the
-// pthread workarounds.
-//
+///
+/// \file
+/// This file defines macros for working around the lack of support for
+/// thread_local in MacOS 10.6.  It assumes std::thread is written in
+/// terms of pthread.  Define ICE_THREAD_LOCAL_HACK to enable the
+/// pthread workarounds.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETLS_H
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 33a914d..671bab0 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -6,13 +6,14 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the skeleton of the TargetLowering class,
-// specifically invoking the appropriate lowering method for a given
-// instruction kind and driving global register allocation.  It also
-// implements the non-deleted instruction iteration in
-// LoweringContext.
-//
+///
+/// \file
+/// This file implements the skeleton of the TargetLowering class,
+/// specifically invoking the appropriate lowering method for a given
+/// instruction kind and driving global register allocation.  It also
+/// implements the non-deleted instruction iteration in
+/// LoweringContext.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceTargetLowering.h"
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index b543208..64672c4 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -6,16 +6,17 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the TargetLowering, LoweringContext, and
-// TargetDataLowering classes.  TargetLowering is an abstract class
-// used to drive the translation/lowering process.  LoweringContext
-// maintains a context for lowering each instruction, offering
-// conveniences such as iterating over non-deleted instructions.
-// TargetDataLowering is an abstract class used to drive the
-// lowering/emission of global initializers, external global
-// declarations, and internal constant pools.
-//
+///
+/// \file
+/// This file declares the TargetLowering, LoweringContext, and
+/// TargetDataLowering classes.  TargetLowering is an abstract class
+/// used to drive the translation/lowering process.  LoweringContext
+/// maintains a context for lowering each instruction, offering
+/// conveniences such as iterating over non-deleted instructions.
+/// TargetDataLowering is an abstract class used to drive the
+/// lowering/emission of global initializers, external global
+/// declarations, and internal constant pools.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETARGETLOWERING_H
@@ -28,12 +29,12 @@
 
 namespace Ice {
 
-// LoweringContext makes it easy to iterate through non-deleted
-// instructions in a node, and insert new (lowered) instructions at
-// the current point.  Along with the instruction list container and
-// associated iterators, it holds the current node, which is needed
-// when inserting new instructions in order to track whether variables
-// are used as single-block or multi-block.
+/// LoweringContext makes it easy to iterate through non-deleted
+/// instructions in a node, and insert new (lowered) instructions at
+/// the current point.  Along with the instruction list container and
+/// associated iterators, it holds the current node, which is needed
+/// when inserting new instructions in order to track whether variables
+/// are used as single-block or multi-block.
 class LoweringContext {
   LoweringContext(const LoweringContext &) = delete;
   LoweringContext &operator=(const LoweringContext &) = delete;
@@ -66,24 +67,24 @@
   void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
 
 private:
-  // Node is the argument to Inst::updateVars().
+  /// Node is the argument to Inst::updateVars().
   CfgNode *Node = nullptr;
   Inst *LastInserted = nullptr;
-  // Cur points to the current instruction being considered.  It is
-  // guaranteed to point to a non-deleted instruction, or to be End.
+  /// Cur points to the current instruction being considered.  It is
+  /// guaranteed to point to a non-deleted instruction, or to be End.
   InstList::iterator Cur;
-  // Next doubles as a pointer to the next valid instruction (if any),
-  // and the new-instruction insertion point.  It is also updated for
-  // the caller in case the lowering consumes more than one high-level
-  // instruction.  It is guaranteed to point to a non-deleted
-  // instruction after Cur, or to be End.  TODO: Consider separating
-  // the notion of "next valid instruction" and "new instruction
-  // insertion point", to avoid confusion when previously-deleted
-  // instructions come between the two points.
+  /// Next doubles as a pointer to the next valid instruction (if any),
+  /// and the new-instruction insertion point.  It is also updated for
+  /// the caller in case the lowering consumes more than one high-level
+  /// instruction.  It is guaranteed to point to a non-deleted
+  /// instruction after Cur, or to be End.  TODO: Consider separating
+  /// the notion of "next valid instruction" and "new instruction
+  /// insertion point", to avoid confusion when previously-deleted
+  /// instructions come between the two points.
   InstList::iterator Next;
-  // Begin is a copy of Insts.begin(), used if iterators are moved backward.
+  /// Begin is a copy of Insts.begin(), used if iterators are moved backward.
   InstList::iterator Begin;
-  // End is a copy of Insts.end(), used if Next needs to be advanced.
+  /// End is a copy of Insts.end(), used if Next needs to be advanced.
   InstList::iterator End;
 
   void skipDeleted(InstList::iterator &I) const;
@@ -129,37 +130,37 @@
     Func->setError("Target doesn't specify O2 lowering steps.");
   }
 
-  // Tries to do address mode optimization on a single instruction.
+  /// Tries to do address mode optimization on a single instruction.
   void doAddressOpt();
-  // Randomly insert NOPs.
+  /// Randomly insert NOPs.
   void doNopInsertion();
-  // Lowers a single non-Phi instruction.
+  /// Lowers a single non-Phi instruction.
   void lower();
-  // Does preliminary lowering of the set of Phi instructions in the
-  // current node.  The main intention is to do what's needed to keep
-  // the unlowered Phi instructions consistent with the lowered
-  // non-Phi instructions, e.g. to lower 64-bit operands on a 32-bit
-  // target.
+  /// Does preliminary lowering of the set of Phi instructions in the
+  /// current node.  The main intention is to do what's needed to keep
+  /// the unlowered Phi instructions consistent with the lowered
+  /// non-Phi instructions, e.g. to lower 64-bit operands on a 32-bit
+  /// target.
   virtual void prelowerPhis() {}
-  // Lowers a list of "parallel" assignment instructions representing
-  // a topological sort of the Phi instructions.
+  /// Lowers a list of "parallel" assignment instructions representing
+  /// a topological sort of the Phi instructions.
   virtual void lowerPhiAssignments(CfgNode *Node,
                                    const AssignList &Assignments) = 0;
-  // Tries to do branch optimization on a single instruction.  Returns
-  // true if some optimization was done.
+  /// Tries to do branch optimization on a single instruction.  Returns
+  /// true if some optimization was done.
   virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
     return false;
   }
 
   virtual SizeT getNumRegisters() const = 0;
-  // Returns a variable pre-colored to the specified physical
-  // register.  This is generally used to get very direct access to
-  // the register such as in the prolog or epilog or for marking
-  // scratch registers as killed by a call.  If a Type is not
-  // provided, a target-specific default type is used.
+  /// Returns a variable pre-colored to the specified physical
+  /// register.  This is generally used to get very direct access to
+  /// the register such as in the prolog or epilog or for marking
+  /// scratch registers as killed by a call.  If a Type is not
+  /// provided, a target-specific default type is used.
   virtual Variable *getPhysicalRegister(SizeT RegNum,
                                         Type Ty = IceType_void) = 0;
-  // Returns a printable name for the register.
+  /// Returns a printable name for the register.
   virtual IceString getRegName(SizeT RegNum, Type Ty) const = 0;
 
   virtual bool hasFramePointer() const { return false; }
@@ -167,8 +168,8 @@
   virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
 
   bool hasComputedFrame() const { return HasComputedFrame; }
-  // Returns true if this function calls a function that has the
-  // "returns twice" attribute.
+  /// Returns true if this function calls a function that has the
+  /// "returns twice" attribute.
   bool callsReturnsTwice() const { return CallsReturnsTwice; }
   void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; }
   int32_t getStackAdjustment() const { return StackAdjustment; }
@@ -196,10 +197,10 @@
       llvm::SmallVectorImpl<int32_t> &Permutation,
       const llvm::SmallBitVector &ExcludeRegisters) const = 0;
 
-  // Save/restore any mutable state for the situation where code
-  // emission needs multiple passes, such as sandboxing or relaxation.
-  // Subclasses may provide their own implementation, but should be
-  // sure to also call the parent class's methods.
+  /// Save/restore any mutable state for the situation where code
+  /// emission needs multiple passes, such as sandboxing or relaxation.
+  /// Subclasses may provide their own implementation, but should be
+  /// sure to also call the parent class's methods.
   virtual void snapshotEmitState() {
     SnapshotStackAdjustment = StackAdjustment;
   }
@@ -219,7 +220,7 @@
   virtual void emit(const ConstantFloat *C) const = 0;
   virtual void emit(const ConstantDouble *C) const = 0;
 
-  // Performs target-specific argument lowering.
+  /// Performs target-specific argument lowering.
   virtual void lowerArguments() = 0;
 
   virtual void initNodeForLowering(CfgNode *) {}
@@ -253,30 +254,30 @@
   virtual void doAddressOptLoad() {}
   virtual void doAddressOptStore() {}
   virtual void randomlyInsertNop(float Probability) = 0;
-  // This gives the target an opportunity to post-process the lowered
-  // expansion before returning.
+  /// This gives the target an opportunity to post-process the lowered
+  /// expansion before returning.
   virtual void postLower() {}
 
-  // Find two-address non-SSA instructions and set the DestNonKillable flag
-  // to keep liveness analysis consistent.
+  /// Find two-address non-SSA instructions and set the DestNonKillable flag
+  /// to keep liveness analysis consistent.
   void inferTwoAddress();
 
-  // Make a pass over the Cfg to determine which variables need stack slots
-  // and place them in a sorted list (SortedSpilledVariables). Among those,
-  // vars, classify the spill variables as local to the basic block vs
-  // global (multi-block) in order to compute the parameters GlobalsSize
-  // and SpillAreaSizeBytes (represents locals or general vars if the
-  // coalescing of locals is disallowed) along with alignments required
-  // for variables in each area. We rely on accurate VMetadata in order to
-  // classify a variable as global vs local (otherwise the variable is
-  // conservatively global). The in-args should be initialized to 0.
-  //
-  // This is only a pre-pass and the actual stack slot assignment is
-  // handled separately.
-  //
-  // There may be target-specific Variable types, which will be handled
-  // by TargetVarHook. If the TargetVarHook returns true, then the variable
-  // is skipped and not considered with the rest of the spilled variables.
+  /// Make a pass over the Cfg to determine which variables need stack slots
+  /// and place them in a sorted list (SortedSpilledVariables). Among those,
+  /// vars, classify the spill variables as local to the basic block vs
+  /// global (multi-block) in order to compute the parameters GlobalsSize
+  /// and SpillAreaSizeBytes (represents locals or general vars if the
+  /// coalescing of locals is disallowed) along with alignments required
+  /// for variables in each area. We rely on accurate VMetadata in order to
+  /// classify a variable as global vs local (otherwise the variable is
+  /// conservatively global). The in-args should be initialized to 0.
+  ///
+  /// This is only a pre-pass and the actual stack slot assignment is
+  /// handled separately.
+  ///
+  /// There may be target-specific Variable types, which will be handled
+  /// by TargetVarHook. If the TargetVarHook returns true, then the variable
+  /// is skipped and not considered with the rest of the spilled variables.
   void getVarStackSlotParams(VarList &SortedSpilledVariables,
                              llvm::SmallBitVector &RegsUsed,
                              size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
@@ -284,9 +285,9 @@
                              uint32_t *LocalsSlotsAlignmentBytes,
                              std::function<bool(Variable *)> TargetVarHook);
 
-  // Calculate the amount of padding needed to align the local and global
-  // areas to the required alignment.  This assumes the globals/locals layout
-  // used by getVarStackSlotParams and assignVarStackSlots.
+  /// Calculate the amount of padding needed to align the local and global
+  /// areas to the required alignment.  This assumes the globals/locals layout
+  /// used by getVarStackSlotParams and assignVarStackSlots.
   void alignStackSpillAreas(uint32_t SpillAreaStartOffset,
                             uint32_t SpillAreaAlignmentBytes,
                             size_t GlobalsSize,
@@ -294,24 +295,24 @@
                             uint32_t *SpillAreaPaddingBytes,
                             uint32_t *LocalsSlotsPaddingBytes);
 
-  // Make a pass through the SortedSpilledVariables and actually assign
-  // stack slots. SpillAreaPaddingBytes takes into account stack alignment
-  // padding. The SpillArea starts after that amount of padding.
-  // This matches the scheme in getVarStackSlotParams, where there may
-  // be a separate multi-block global var spill area and a local var
-  // spill area.
+  /// Make a pass through the SortedSpilledVariables and actually assign
+  /// stack slots. SpillAreaPaddingBytes takes into account stack alignment
+  /// padding. The SpillArea starts after that amount of padding.
+  /// This matches the scheme in getVarStackSlotParams, where there may
+  /// be a separate multi-block global var spill area and a local var
+  /// spill area.
   void assignVarStackSlots(VarList &SortedSpilledVariables,
                            size_t SpillAreaPaddingBytes,
                            size_t SpillAreaSizeBytes,
                            size_t GlobalsAndSubsequentPaddingSize,
                            bool UsesFramePointer);
 
-  // Sort the variables in Source based on required alignment.
-  // The variables with the largest alignment need are placed in the front
-  // of the Dest list.
+  /// Sort the variables in Source based on required alignment.
+  /// The variables with the largest alignment need are placed in the front
+  /// of the Dest list.
   void sortVarsByAlignment(VarList &Dest, const VarList &Source) const;
 
-  // Make a call to an external helper function.
+  /// Make a call to an external helper function.
   InstCall *makeHelperCall(const IceString &Name, Variable *Dest,
                            SizeT MaxSrcs);
 
@@ -325,8 +326,8 @@
   GlobalContext *Ctx;
   bool HasComputedFrame = false;
   bool CallsReturnsTwice = false;
-  // StackAdjustment keeps track of the current stack offset from its
-  // natural location, as arguments are pushed for a function call.
+  /// StackAdjustment keeps track of the current stack offset from its
+  /// natural location, as arguments are pushed for a function call.
   int32_t StackAdjustment = 0;
   SizeT NextLabelNumber = 0;
   LoweringContext Context;
@@ -373,9 +374,9 @@
   int32_t SnapshotStackAdjustment = 0;
 };
 
-// TargetDataLowering is used for "lowering" data including initializers
-// for global variables, and the internal constant pools.  It is separated
-// out from TargetLowering because it does not require a Cfg.
+/// TargetDataLowering is used for "lowering" data including initializers
+/// for global variables, and the internal constant pools.  It is separated
+/// out from TargetLowering because it does not require a Cfg.
 class TargetDataLowering {
   TargetDataLowering() = delete;
   TargetDataLowering(const TargetDataLowering &) = delete;
@@ -393,18 +394,18 @@
   void emitGlobal(const VariableDeclaration &Var,
                   const IceString &SectionSuffix);
 
-  // For now, we assume .long is the right directive for emitting 4 byte
-  // emit global relocations. However, LLVM MIPS usually uses .4byte instead.
-  // Perhaps there is some difference when the location is unaligned.
+  /// For now, we assume .long is the right directive for emitting 4 byte
+  /// emit global relocations. However, LLVM MIPS usually uses .4byte instead.
+  /// Perhaps there is some difference when the location is unaligned.
   static const char *getEmit32Directive() { return ".long"; }
 
   explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
   GlobalContext *Ctx;
 };
 
-// TargetHeaderLowering is used to "lower" the header of an output file.
-// It writes out the target-specific header attributes. E.g., for ARM
-// this writes out the build attributes (float ABI, etc.).
+/// TargetHeaderLowering is used to "lower" the header of an output file.
+/// It writes out the target-specific header attributes. E.g., for ARM
+/// this writes out the build attributes (float ABI, etc.).
 class TargetHeaderLowering {
   TargetHeaderLowering() = delete;
   TargetHeaderLowering(const TargetHeaderLowering &) = delete;
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 85294b0..6639da8 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the TargetLoweringARM32 class, which consists almost
-// entirely of the lowering sequence for each high-level instruction.
-//
+///
+/// \file
+/// This file implements the TargetLoweringARM32 class, which consists almost
+/// entirely of the lowering sequence for each high-level instruction.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceTargetLoweringARM32.h"
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 02d9080..8aa3e11 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the TargetLoweringARM32 class, which implements the
-// TargetLowering interface for the ARM 32-bit architecture.
-//
+///
+/// \file
+/// This file declares the TargetLoweringARM32 class, which implements the
+/// TargetLowering interface for the ARM 32-bit architecture.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
@@ -90,9 +91,9 @@
   void addProlog(CfgNode *Node) override;
   void addEpilog(CfgNode *Node) override;
 
-  // Ensure that a 64-bit Variable has been split into 2 32-bit
-  // Variables, creating them if necessary.  This is needed for all
-  // I64 operations.
+  /// Ensure that a 64-bit Variable has been split into 2 32-bit
+  /// Variables, creating them if necessary.  This is needed for all
+  /// I64 operations.
   void split64(Variable *Var);
   Operand *loOperand(Operand *Operand);
   Operand *hiOperand(Operand *Operand);
@@ -135,10 +136,10 @@
 
   enum OperandLegalization {
     Legal_None = 0,
-    Legal_Reg = 1 << 0,  // physical register, not stack location
-    Legal_Flex = 1 << 1, // A flexible operand2, which can hold rotated
-                         // small immediates, or shifted registers.
-    Legal_Mem = 1 << 2,  // includes [r0, r1 lsl #2] as well as [sp, #12]
+    Legal_Reg = 1 << 0,  /// physical register, not stack location
+    Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated
+                         /// small immediates, or shifted registers.
+    Legal_Mem = 1 << 2,  /// includes [r0, r1 lsl #2] as well as [sp, #12]
     Legal_All = ~Legal_None
   };
   typedef uint32_t LegalMask;
@@ -152,7 +153,7 @@
   Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
   void alignRegisterPow2(Variable *Reg, uint32_t Align);
 
-  // Returns a vector in a register with the given constant entries.
+  /// Returns a vector in a register with the given constant entries.
   Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
 
   void makeRandomRegisterPermutation(
@@ -247,9 +248,9 @@
             CondARM32::Cond Pred = CondARM32::AL) {
     Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
   }
-  // If Dest=nullptr is passed in, then a new variable is created,
-  // marked as infinite register allocation weight, and returned
-  // through the in/out Dest argument.
+  /// If Dest=nullptr is passed in, then a new variable is created,
+  /// marked as infinite register allocation weight, and returned
+  /// through the in/out Dest argument.
   void _mov(Variable *&Dest, Operand *Src0,
             CondARM32::Cond Pred = CondARM32::AL,
             int32_t RegNum = Variable::NoRegister) {
@@ -263,8 +264,8 @@
     NewInst->setDestNonKillable();
     Context.insert(NewInst);
   }
-  // The Operand can only be a 16-bit immediate or a ConstantRelocatable
-  // (with an upper16 relocation).
+  /// The Operand can only be a 16-bit immediate or a ConstantRelocatable
+  /// (with an upper16 relocation).
   void _movt(Variable *Dest, Operand *Src0,
              CondARM32::Cond Pred = CondARM32::AL) {
     Context.insert(InstARM32Movt::create(Func, Dest, Src0, Pred));
@@ -372,16 +373,16 @@
   VarList PhysicalRegisters[IceType_NUM];
   static IceString RegNames[];
 
-  // Helper class that understands the Calling Convention and register
-  // assignments. The first few integer type parameters can use r0-r3,
-  // regardless of their position relative to the floating-point/vector
-  // arguments in the argument list. Floating-point and vector arguments
-  // can use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can
-  // start with registers but extend beyond the available registers can be
-  // split between the registers and the stack. However, this is typically
-  // for passing GPR structs by value, and PNaCl transforms expand this out.
-  //
-  // Also, at the point before the call, the stack must be aligned.
+  /// Helper class that understands the Calling Convention and register
+  /// assignments. The first few integer type parameters can use r0-r3,
+  /// regardless of their position relative to the floating-point/vector
+  /// arguments in the argument list. Floating-point and vector arguments
+  /// can use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can
+  /// start with registers but extend beyond the available registers can be
+  /// split between the registers and the stack. However, this is typically
+  /// for passing GPR structs by value, and PNaCl transforms expand this out.
+  ///
+  /// Also, at the point before the call, the stack must be aligned.
   class CallingConv {
     CallingConv(const CallingConv &) = delete;
     CallingConv &operator=(const CallingConv &) = delete;
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 0adff03..6c0793f 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the TargetLoweringMIPS32 class, which consists almost
-// entirely of the lowering sequence for each high-level instruction.
-//
+///
+/// \file
+/// This file implements the TargetLoweringMIPS32 class, which consists almost
+/// entirely of the lowering sequence for each high-level instruction.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceTargetLoweringMIPS32.h"
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index eb4a9fd..f872af0 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the TargetLoweringMIPS32 class, which implements the
-// TargetLowering interface for the MIPS 32-bit architecture.
-//
+///
+/// \file
+/// This file declares the TargetLoweringMIPS32 class, which implements the
+/// TargetLowering interface for the MIPS 32-bit architecture.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETARGETLOWERINGMIPS32_H
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index e33eeef..c6a72d6 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the TargetLoweringX8632 class, which
-// consists almost entirely of the lowering sequence for each
-// high-level instruction.
-//
+///
+/// \file
+/// This file implements the TargetLoweringX8632 class, which
+/// consists almost entirely of the lowering sequence for each
+/// high-level instruction.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceTargetLoweringX8632.h"
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index f24275d..f1d8bb3 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the TargetLoweringX8632 class, which
-// implements the TargetLowering interface for the x86-32
-// architecture.
-//
+///
+/// \file
+/// This file declares the TargetLoweringX8632 class, which
+/// implements the TargetLowering interface for the x86-32
+/// architecture.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8632_H
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 21be080..f2be0c1 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// Implements the Target Lowering for x86-64.
-//
+///
+/// \file
+/// Implements the Target Lowering for x86-64.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceDefs.h"
diff --git a/src/IceTargetLoweringX8664.h b/src/IceTargetLoweringX8664.h
index 0107cfc..791343a 100644
--- a/src/IceTargetLoweringX8664.h
+++ b/src/IceTargetLoweringX8664.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the TargetLoweringX8664 class, which implements the
-// TargetLowering interface for the X86 64-bit architecture.
-//
+///
+/// \file
+/// This file declares the TargetLoweringX8664 class, which implements the
+/// TargetLowering interface for the X86 64-bit architecture.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8664_H
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 46c53f8..37a1107 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the TargetLoweringX86 template class, which
-// implements the TargetLowering base interface for the x86
-// architecture.
-//
+///
+/// \file
+/// This file declares the TargetLoweringX86 template class, which
+/// implements the TargetLowering base interface for the x86
+/// architecture.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASE_H
@@ -141,11 +142,11 @@
   void initNodeForLowering(CfgNode *Node) override;
   void addProlog(CfgNode *Node) override;
   void addEpilog(CfgNode *Node) override;
-  // Ensure that a 64-bit Variable has been split into 2 32-bit
-  // Variables, creating them if necessary.  This is needed for all
-  // I64 operations, and it is needed for pushing F64 arguments for
-  // function calls using the 32-bit push instruction (though the
-  // latter could be done by directly writing to the stack).
+  /// Ensure that a 64-bit Variable has been split into 2 32-bit
+  /// Variables, creating them if necessary.  This is needed for all
+  /// I64 operations, and it is needed for pushing F64 arguments for
+  /// function calls using the 32-bit push instruction (though the
+  /// latter could be done by directly writing to the stack).
   void split64(Variable *Var);
   Operand *loOperand(Operand *Operand);
   Operand *hiOperand(Operand *Operand);
@@ -190,10 +191,10 @@
   void doAddressOptStore() override;
   void randomlyInsertNop(float Probability) override;
 
-  // Naive lowering of cmpxchg.
+  /// Naive lowering of cmpxchg.
   void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected,
                           Operand *Desired);
-  // Attempt a more optimized lowering of cmpxchg. Returns true if optimized.
+  /// Attempt a more optimized lowering of cmpxchg. Returns true if optimized.
   bool tryOptimizedCmpxchgCmpBr(Variable *DestPrev, Operand *Ptr,
                                 Operand *Expected, Operand *Desired);
   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
@@ -210,13 +211,13 @@
   void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
                            Operand *Src0, Operand *Src1);
 
-  // Operand legalization helpers.  To deal with address mode
-  // constraints, the helpers will create a new Operand and emit
-  // instructions that guarantee that the Operand kind is one of those
-  // indicated by the LegalMask (a bitmask of allowed kinds).  If the
-  // input Operand is known to already meet the constraints, it may be
-  // simply returned as the result, without creating any new
-  // instructions or operands.
+  /// Operand legalization helpers.  To deal with address mode
+  /// constraints, the helpers will create a new Operand and emit
+  /// instructions that guarantee that the Operand kind is one of those
+  /// indicated by the LegalMask (a bitmask of allowed kinds).  If the
+  /// input Operand is known to already meet the constraints, it may be
+  /// simply returned as the result, without creating any new
+  /// instructions or operands.
   enum OperandLegalization {
     Legal_None = 0,
     Legal_Reg = 1 << 0, // physical register, not stack location
@@ -228,11 +229,11 @@
   Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
                     int32_t RegNum = Variable::NoRegister);
   Variable *legalizeToVar(Operand *From, int32_t RegNum = Variable::NoRegister);
-  // Legalize the first source operand for use in the cmp instruction.
+  /// Legalize the first source operand for use in the cmp instruction.
   Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1);
-  // Turn a pointer operand into a memory operand that can be
-  // used by a real load/store operation. Legalizes the operand as well.
-  // This is a nop if the operand is already a legal memory operand.
+  /// Turn a pointer operand into a memory operand that can be
+  /// used by a real load/store operation. Legalizes the operand as well.
+  /// This is a nop if the operand is already a legal memory operand.
   OperandX8632Mem *formMemoryOperand(Operand *Ptr, Type Ty,
                                      bool DoLegalize = true);
 
@@ -241,7 +242,7 @@
 
   Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
 
-  // Returns a vector in a register with the given constant entries.
+  /// Returns a vector in a register with the given constant entries.
   Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
   Variable *makeVectorOfOnes(Type Ty, int32_t RegNum = Variable::NoRegister);
   Variable *makeVectorOfMinusOnes(Type Ty,
@@ -251,7 +252,7 @@
   Variable *makeVectorOfFabsMask(Type Ty,
                                  int32_t RegNum = Variable::NoRegister);
 
-  // Return a memory operand corresponding to a stack allocated Variable.
+  /// Return a memory operand corresponding to a stack allocated Variable.
   OperandX8632Mem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
                                                 uint32_t Offset = 0);
 
@@ -260,9 +261,9 @@
       const llvm::SmallBitVector &ExcludeRegisters) const override;
 
   // TODO(jpp): move the helper methods below to the MachineTraits.
-  // The following are helpers that insert lowered x86 instructions
-  // with minimal syntactic overhead, so that the lowering code can
-  // look as close to assembly as practical.
+  /// The following are helpers that insert lowered x86 instructions
+  /// with minimal syntactic overhead, so that the lowering code can
+  /// look as close to assembly as practical.
   void _adc(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Adc::create(Func, Dest, Src0));
   }
@@ -385,9 +386,9 @@
     Context.insert(InstX8632Lea::create(Func, Dest, Src0));
   }
   void _mfence() { Context.insert(InstX8632Mfence::create(Func)); }
-  // If Dest=nullptr is passed in, then a new variable is created,
-  // marked as infinite register allocation weight, and returned
-  // through the in/out Dest argument.
+  /// If Dest=nullptr is passed in, then a new variable is created,
+  /// marked as infinite register allocation weight, and returned
+  /// through the in/out Dest argument.
   void _mov(Variable *&Dest, Operand *Src0,
             int32_t RegNum = Variable::NoRegister) {
     if (Dest == nullptr)
@@ -601,7 +602,7 @@
   VarList PhysicalRegisters[IceType_NUM];
   static IceString RegNames[];
 
-  // Randomize a given immediate operand
+  /// Randomize a given immediate operand
   Operand *randomizeOrPoolImmediate(Constant *Immediate,
                                     int32_t RegNum = Variable::NoRegister);
   OperandX8632Mem *
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 9ebeb62..6a0f3b3 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the TargetLoweringX86Base class, which
-// consists almost entirely of the lowering sequence for each
-// high-level instruction.
-//
+///
+/// \file
+/// This file implements the TargetLoweringX86Base class, which
+/// consists almost entirely of the lowering sequence for each
+/// high-level instruction.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
@@ -34,8 +35,8 @@
 namespace Ice {
 namespace X86Internal {
 
-// A helper class to ease the settings of RandomizationPoolingPause
-// to disable constant blinding or pooling for some translation phases.
+/// A helper class to ease the settings of RandomizationPoolingPause
+/// to disable constant blinding or pooling for some translation phases.
 class BoolFlagSaver {
   BoolFlagSaver() = delete;
   BoolFlagSaver(const BoolFlagSaver &) = delete;
@@ -57,15 +58,15 @@
   BoolFoldingEntry() = default;
   explicit BoolFoldingEntry(Inst *I);
   BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
-  // Instr is the instruction producing the i1-type variable of interest.
+  /// Instr is the instruction producing the i1-type variable of interest.
   Inst *Instr = nullptr;
-  // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
+  /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
   bool IsComplex = false;
-  // IsLiveOut is initialized conservatively to true, and is set to false when
-  // we encounter an instruction that ends Var's live range.  We disable the
-  // folding optimization when Var is live beyond this basic block.  Note that
-  // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
-  // always be true and the folding optimization will never be performed.
+  /// IsLiveOut is initialized conservatively to true, and is set to false when
+  /// we encounter an instruction that ends Var's live range.  We disable the
+  /// folding optimization when Var is live beyond this basic block.  Note that
+  /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
+  /// always be true and the folding optimization will never be performed.
   bool IsLiveOut = true;
   // NumUses counts the number of times Var is used as a source operand in the
   // basic block.  If IsComplex is true and there is more than one use of Var,
@@ -83,10 +84,10 @@
     PK_Trunc
   };
 
-  // Currently the actual enum values are not used (other than CK_None), but we
-  // go
-  // ahead and produce them anyway for symmetry with the
-  // BoolFoldingProducerKind.
+  /// Currently the actual enum values are not used (other than CK_None), but we
+  /// go
+  /// ahead and produce them anyway for symmetry with the
+  /// BoolFoldingProducerKind.
   enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
 
 private:
@@ -103,13 +104,13 @@
   void dump(const Cfg *Func) const;
 
 private:
-  // Returns true if Producers contains a valid entry for the given VarNum.
+  /// Returns true if Producers contains a valid entry for the given VarNum.
   bool containsValid(SizeT VarNum) const {
     auto Element = Producers.find(VarNum);
     return Element != Producers.end() && Element->second.Instr != nullptr;
   }
   void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
-  // Producers maps Variable::Number to a BoolFoldingEntry.
+  /// Producers maps Variable::Number to a BoolFoldingEntry.
   std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
 };
 
@@ -162,12 +163,12 @@
   return CK_None;
 }
 
-// Returns true if the producing instruction has a "complex" lowering
-// sequence.  This generally means that its lowering sequence requires
-// more than one conditional branch, namely 64-bit integer compares
-// and some floating-point compares.  When this is true, and there is
-// more than one consumer, we prefer to disable the folding
-// optimization because it minimizes branches.
+/// Returns true if the producing instruction has a "complex" lowering
+/// sequence.  This generally means that its lowering sequence requires
+/// more than one conditional branch, namely 64-bit integer compares
+/// and some floating-point compares.  When this is true, and there is
+/// more than one consumer, we prefer to disable the folding
+/// optimization because it minimizes branches.
 template <class MachineTraits>
 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
   switch (getProducerKind(Instr)) {
@@ -610,11 +611,11 @@
   return Intrinsics::MemoryOrderInvalid;
 }
 
-// Determines whether the dest of a Load instruction can be folded
-// into one of the src operands of a 2-operand instruction.  This is
-// true as long as the load dest matches exactly one of the binary
-// instruction's src operands.  Replaces Src0 or Src1 with LoadSrc if
-// the answer is true.
+/// Determines whether the dest of a Load instruction can be folded
+/// into one of the src operands of a 2-operand instruction.  This is
+/// true as long as the load dest matches exactly one of the binary
+/// instruction's src operands.  Replaces Src0 or Src1 with LoadSrc if
+/// the answer is true.
 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
                                Operand *&Src0, Operand *&Src1) {
   if (Src0 == LoadDest && Src1 != LoadDest) {
@@ -852,15 +853,15 @@
   }
 }
 
-// Helper function for addProlog().
-//
-// This assumes Arg is an argument passed on the stack.  This sets the
-// frame offset for Arg and updates InArgsSizeBytes according to Arg's
-// width.  For an I64 arg that has been split into Lo and Hi components,
-// it calls itself recursively on the components, taking care to handle
-// Lo first because of the little-endian architecture.  Lastly, this
-// function generates an instruction to copy Arg into its assigned
-// register if applicable.
+/// Helper function for addProlog().
+///
+/// This assumes Arg is an argument passed on the stack.  This sets the
+/// frame offset for Arg and updates InArgsSizeBytes according to Arg's
+/// width.  For an I64 arg that has been split into Lo and Hi components,
+/// it calls itself recursively on the components, taking care to handle
+/// Lo first because of the little-endian architecture.  Lastly, this
+/// function generates an instruction to copy Arg into its assigned
+/// register if applicable.
 template <class Machine>
 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
                                                     Variable *FramePtr,
@@ -1347,12 +1348,12 @@
   _mov(Dest, esp);
 }
 
-// Strength-reduce scalar integer multiplication by a constant (for
-// i32 or narrower) for certain constants.  The lea instruction can be
-// used to multiply by 3, 5, or 9, and the lsh instruction can be used
-// to multiply by powers of 2.  These can be combined such that
-// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
-// combined with left-shifting by 2.
+/// Strength-reduce scalar integer multiplication by a constant (for
+/// i32 or narrower) for certain constants.  The lea instruction can be
+/// used to multiply by 3, 5, or 9, and the lsh instruction can be used
+/// to multiply by powers of 2.  These can be combined such that
+/// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
+/// combined with left-shifting by 2.
 template <class Machine>
 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
                                                int32_t Src1) {
@@ -2391,7 +2392,7 @@
         _pcmpgt(T, Zeros);
         _movp(Dest, T);
       } else {
-        // width = width(elty) - 1; dest = (src << width) >> width
+        /// width = width(elty) - 1; dest = (src << width) >> width
         SizeT ShiftAmount =
             Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
             1;
@@ -3939,10 +3940,10 @@
   _mov(Dest, T_eax);
 }
 
-// Lowers count {trailing, leading} zeros intrinsic.
-//
-// We could do constant folding here, but that should have
-// been done by the front-end/middle-end optimizations.
+/// Lowers count {trailing, leading} zeros intrinsic.
+///
+/// We could do constant folding here, but that should have
+/// been done by the front-end/middle-end optimizations.
 template <class Machine>
 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
                                              Operand *FirstVal,
@@ -4608,14 +4609,14 @@
   lowerAssign(InstAssign::create(Func, Dest, T));
 }
 
-// The following pattern occurs often in lowered C and C++ code:
-//
-//   %cmp     = fcmp/icmp pred <n x ty> %src0, %src1
-//   %cmp.ext = sext <n x i1> %cmp to <n x ty>
-//
-// We can eliminate the sext operation by copying the result of pcmpeqd,
-// pcmpgtd, or cmpps (which produce sign extended results) to the result
-// of the sext operation.
+/// The following pattern occurs often in lowered C and C++ code:
+///
+///   %cmp     = fcmp/icmp pred <n x ty> %src0, %src1
+///   %cmp.ext = sext <n x i1> %cmp to <n x ty>
+///
+/// We can eliminate the sext operation by copying the result of pcmpeqd,
+/// pcmpgtd, or cmpps (which produce sign extended results) to the result
+/// of the sext operation.
 template <class Machine>
 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
     Variable *SignExtendedResult) {
@@ -4721,10 +4722,10 @@
   }
 }
 
-// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
-// preserve integrity of liveness analysis.  Undef values are also
-// turned into zeroes, since loOperand() and hiOperand() don't expect
-// Undef input.
+/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
+/// preserve integrity of liveness analysis.  Undef values are also
+/// turned into zeroes, since loOperand() and hiOperand() don't expect
+/// Undef input.
 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
   // Pause constant blinding or pooling, blinding or pooling will be done later
   // during phi lowering assignments
@@ -4770,8 +4771,8 @@
   return true;
 }
 
-// Lower the pre-ordered list of assignments into mov instructions.
-// Also has to do some ad-hoc register allocation as necessary.
+/// Lower the pre-ordered list of assignments into mov instructions.
+/// Also has to do some ad-hoc register allocation as necessary.
 template <class Machine>
 void TargetX86Base<Machine>::lowerPhiAssignments(
     CfgNode *Node, const AssignList &Assignments) {
@@ -4975,12 +4976,12 @@
   }
 }
 
-// Construct a mask in a register that can be and'ed with a
-// floating-point value to mask off its sign bit.  The value will be
-// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
-// for f64.  Construct it as vector of ones logically right shifted
-// one bit.  TODO(stichnot): Fix the wala TODO above, to represent
-// vector constants in memory.
+/// Construct a mask in a register that can be and'ed with a
+/// floating-point value to mask off its sign bit.  The value will be
+/// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
+/// for f64.  Construct it as vector of ones logically right shifted
+/// one bit.  TODO(stichnot): Fix the wala TODO above, to represent
+/// vector constants in memory.
 template <class Machine>
 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
                                                        int32_t RegNum) {
@@ -5007,8 +5008,8 @@
   return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
 }
 
-// Helper for legalize() to emit the right code to lower an operand to a
-// register of the appropriate type.
+/// Helper for legalize() to emit the right code to lower an operand to a
+/// register of the appropriate type.
 template <class Machine>
 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
   Type Ty = Src->getType();
@@ -5135,18 +5136,18 @@
   return From;
 }
 
-// Provide a trivial wrapper to legalize() for this common usage.
+/// Provide a trivial wrapper to legalize() for this common usage.
 template <class Machine>
 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) {
   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
 }
 
-// For the cmp instruction, if Src1 is an immediate, or known to be a
-// physical register, we can allow Src0 to be a memory operand.
-// Otherwise, Src0 must be copied into a physical register.
-// (Actually, either Src0 or Src1 can be chosen for the physical
-// register, but unfortunately we have to commit to one or the other
-// before register allocation.)
+/// For the cmp instruction, if Src1 is an immediate, or known to be a
+/// physical register, we can allow Src0 to be a memory operand.
+/// Otherwise, Src0 must be copied into a physical register.
+/// (Actually, either Src0 or Src1 can be chosen for the physical
+/// register, but unfortunately we have to commit to one or the other
+/// before register allocation.)
 template <class Machine>
 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
                                                     Operand *Src1) {
@@ -5315,7 +5316,7 @@
   llvm::report_fatal_error("undef value encountered by emitter.");
 }
 
-// Randomize or pool an Immediate.
+/// Randomize or pool an Immediate.
 template <class Machine>
 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
                                                           int32_t RegNum) {
diff --git a/src/IceThreading.cpp b/src/IceThreading.cpp
index 9720f77..153abff 100644
--- a/src/IceThreading.cpp
+++ b/src/IceThreading.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines threading-related functions.
-//
+///
+/// \file
+/// This file defines threading-related functions.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceThreading.h"
diff --git a/src/IceThreading.h b/src/IceThreading.h
index 4f04935..f59f46e 100644
--- a/src/IceThreading.h
+++ b/src/IceThreading.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares threading-related functions.
-//
+///
+/// \file
+/// This file declares threading-related functions.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETHREADING_H
@@ -21,31 +22,31 @@
 
 namespace Ice {
 
-// BoundedProducerConsumerQueue is a work queue that allows multiple
-// producers and multiple consumers.  A producer adds entries using
-// blockingPush(), and may block if the queue is "full".  A producer
-// uses notifyEnd() to indicate that no more entries will be added.  A
-// consumer removes an item using blockingPop(), which will return
-// nullptr if notifyEnd() has been called and the queue is empty (it
-// never returns nullptr if the queue contained any items).
-//
-// The MaxSize ctor arg controls the maximum size the queue can grow
-// to (subject to a hard limit of MaxStaticSize-1).  The Sequential
-// arg indicates purely sequential execution in which the single
-// thread should never wait().
-//
-// Two condition variables are used in the implementation.
-// GrewOrEnded signals a waiting worker that a producer has changed
-// the state of the queue.  Shrunk signals a blocked producer that a
-// consumer has changed the state of the queue.
-//
-// The methods begin with Sequential-specific code to be most clear.
-// The lock and condition variables are not used in the Sequential
-// case.
-//
-// Internally, the queue is implemented as a circular array of size
-// MaxStaticSize, where the queue boundaries are denoted by the Front
-// and Back fields.  Front==Back indicates an empty queue.
+/// BoundedProducerConsumerQueue is a work queue that allows multiple
+/// producers and multiple consumers.  A producer adds entries using
+/// blockingPush(), and may block if the queue is "full".  A producer
+/// uses notifyEnd() to indicate that no more entries will be added.  A
+/// consumer removes an item using blockingPop(), which will return
+/// nullptr if notifyEnd() has been called and the queue is empty (it
+/// never returns nullptr if the queue contained any items).
+///
+/// The MaxSize ctor arg controls the maximum size the queue can grow
+/// to (subject to a hard limit of MaxStaticSize-1).  The Sequential
+/// arg indicates purely sequential execution in which the single
+/// thread should never wait().
+///
+/// Two condition variables are used in the implementation.
+/// GrewOrEnded signals a waiting worker that a producer has changed
+/// the state of the queue.  Shrunk signals a blocked producer that a
+/// consumer has changed the state of the queue.
+///
+/// The methods begin with Sequential-specific code to be most clear.
+/// The lock and condition variables are not used in the Sequential
+/// case.
+///
+/// Internally, the queue is implemented as a circular array of size
+/// MaxStaticSize, where the queue boundaries are denoted by the Front
+/// and Back fields.  Front==Back indicates an empty queue.
 template <typename T, size_t MaxStaticSize = 128>
 class BoundedProducerConsumerQueue {
   BoundedProducerConsumerQueue() = delete;
@@ -94,47 +95,47 @@
   static_assert(!(MaxStaticSize & (MaxStaticSize - 1)),
                 "MaxStaticSize must be a power of 2");
 
-  // WorkItems and Lock are read/written by all.
   ICE_CACHELINE_BOUNDARY;
+  /// WorkItems and Lock are read/written by all.
   T *WorkItems[MaxStaticSize];
   ICE_CACHELINE_BOUNDARY;
-  // Lock guards access to WorkItems, Front, Back, and IsEnded.
+  /// Lock guards access to WorkItems, Front, Back, and IsEnded.
   GlobalLockType Lock;
 
   ICE_CACHELINE_BOUNDARY;
-  // GrewOrEnded is written by the producers and read by the
-  // consumers.  It is notified (by the producer) when something is
-  // added to the queue, in case consumers are waiting for a non-empty
-  // queue.
+  /// GrewOrEnded is written by the producers and read by the
+  /// consumers.  It is notified (by the producer) when something is
+  /// added to the queue, in case consumers are waiting for a non-empty
+  /// queue.
   std::condition_variable GrewOrEnded;
-  // Back is the index into WorkItems[] of where the next element will
-  // be pushed.  (More precisely, Back&MaxStaticSize is the index.)
-  // It is written by the producers, and read by all via size() and
-  // empty().
+  /// Back is the index into WorkItems[] of where the next element will
+  /// be pushed.  (More precisely, Back&MaxStaticSize is the index.)
+  /// It is written by the producers, and read by all via size() and
+  /// empty().
   size_t Back = 0;
 
   ICE_CACHELINE_BOUNDARY;
-  // Shrunk is notified (by the consumer) when something is removed
-  // from the queue, in case a producer is waiting for the queue to
-  // drop below maximum capacity.  It is written by the consumers and
-  // read by the producers.
+  /// Shrunk is notified (by the consumer) when something is removed
+  /// from the queue, in case a producer is waiting for the queue to
+  /// drop below maximum capacity.  It is written by the consumers and
+  /// read by the producers.
   std::condition_variable Shrunk;
-  // Front is the index into WorkItems[] of the oldest element,
-  // i.e. the next to be popped.  (More precisely Front&MaxStaticSize
-  // is the index.)  It is written by the consumers, and read by all
-  // via size() and empty().
+  /// Front is the index into WorkItems[] of the oldest element,
+  /// i.e. the next to be popped.  (More precisely Front&MaxStaticSize
+  /// is the index.)  It is written by the consumers, and read by all
+  /// via size() and empty().
   size_t Front = 0;
 
   ICE_CACHELINE_BOUNDARY;
 
-  // MaxSize and Sequential are read by all and written by none.
+  /// MaxSize and Sequential are read by all and written by none.
   const size_t MaxSize;
   const bool Sequential;
-  // IsEnded is read by the consumers, and only written once by the
-  // producer.
+  /// IsEnded is read by the consumers, and only written once by the
+  /// producer.
   bool IsEnded = false;
 
-  // The lock must be held when the following methods are called.
+  /// The lock must be held when the following methods are called.
   bool empty() const { return Front == Back; }
   size_t size() const { return Back - Front; }
   void push(T *Item) {
@@ -147,45 +148,45 @@
   }
 };
 
-// EmitterWorkItem is a simple wrapper around a pointer that
-// represents a work item to be emitted, i.e. a function or a set of
-// global declarations and initializers, and it includes a sequence
-// number so that work items can be emitted in a particular order for
-// deterministic output.  It acts like an interface class, but instead
-// of making the classes of interest inherit from EmitterWorkItem, it
-// wraps pointers to these classes.  Some space is wasted compared to
-// storing the pointers in a union, but not too much due to the work
-// granularity.
+/// EmitterWorkItem is a simple wrapper around a pointer that
+/// represents a work item to be emitted, i.e. a function or a set of
+/// global declarations and initializers, and it includes a sequence
+/// number so that work items can be emitted in a particular order for
+/// deterministic output.  It acts like an interface class, but instead
+/// of making the classes of interest inherit from EmitterWorkItem, it
+/// wraps pointers to these classes.  Some space is wasted compared to
+/// storing the pointers in a union, but not too much due to the work
+/// granularity.
 class EmitterWorkItem {
   EmitterWorkItem() = delete;
   EmitterWorkItem(const EmitterWorkItem &) = delete;
   EmitterWorkItem &operator=(const EmitterWorkItem &) = delete;
 
 public:
-  // ItemKind can be one of the following:
-  //
-  // WI_Nop: No actual work.  This is a placeholder to maintain
-  // sequence numbers in case there is a translation error.
-  //
-  // WI_GlobalInits: A list of global declarations and initializers.
-  //
-  // WI_Asm: A function that has already had emitIAS() called on it.
-  // The work is transferred via the Assembler buffer, and the
-  // originating Cfg has been deleted (to recover lots of memory).
-  //
-  // WI_Cfg: A Cfg that has not yet had emit() or emitIAS() called on
-  // it.  This is only used as a debugging configuration when we want
-  // to emit "readable" assembly code, possibly annotated with
-  // liveness and other information only available in the Cfg and not
-  // in the Assembler buffer.
+  /// ItemKind can be one of the following:
+  ///
+  /// WI_Nop: No actual work.  This is a placeholder to maintain
+  /// sequence numbers in case there is a translation error.
+  ///
+  /// WI_GlobalInits: A list of global declarations and initializers.
+  ///
+  /// WI_Asm: A function that has already had emitIAS() called on it.
+  /// The work is transferred via the Assembler buffer, and the
+  /// originating Cfg has been deleted (to recover lots of memory).
+  ///
+  /// WI_Cfg: A Cfg that has not yet had emit() or emitIAS() called on
+  /// it.  This is only used as a debugging configuration when we want
+  /// to emit "readable" assembly code, possibly annotated with
+  /// liveness and other information only available in the Cfg and not
+  /// in the Assembler buffer.
   enum ItemKind { WI_Nop, WI_GlobalInits, WI_Asm, WI_Cfg };
-  // Constructor for a WI_Nop work item.
+  /// Constructor for a WI_Nop work item.
   explicit EmitterWorkItem(uint32_t Seq);
-  // Constructor for a WI_GlobalInits work item.
+  /// Constructor for a WI_GlobalInits work item.
   EmitterWorkItem(uint32_t Seq, VariableDeclarationList *D);
-  // Constructor for a WI_Asm work item.
+  /// Constructor for a WI_Asm work item.
   EmitterWorkItem(uint32_t Seq, Assembler *A);
-  // Constructor for a WI_Cfg work item.
+  /// Constructor for a WI_Cfg work item.
   EmitterWorkItem(uint32_t Seq, Cfg *F);
   uint32_t getSequenceNumber() const { return Sequence; }
   ItemKind getKind() const { return Kind; }
diff --git a/src/IceTimerTree.cpp b/src/IceTimerTree.cpp
index 879f06e..818efc3 100644
--- a/src/IceTimerTree.cpp
+++ b/src/IceTimerTree.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the TimerTree class, which tracks flat and
-// cumulative execution time collection of call chains.
-//
+///
+/// \file
+/// This file defines the TimerTree class, which tracks flat and
+/// cumulative execution time collection of call chains.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceTimerTree.h"
diff --git a/src/IceTimerTree.h b/src/IceTimerTree.h
index dd8dcf1..aabeb29 100644
--- a/src/IceTimerTree.h
+++ b/src/IceTimerTree.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the TimerTree class, which allows flat and
-// cumulative execution time collection of call chains.
-//
+///
+/// \file
+/// This file declares the TimerTree class, which allows flat and
+/// cumulative execution time collection of call chains.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETIMERTREE_H
@@ -25,23 +26,23 @@
   TimerStack() = delete;
   TimerStack &operator=(const TimerStack &) = delete;
 
-  // Timer tree index type.  A variable of this type is used to access
-  // an interior, not-necessarily-leaf node of the tree.
+  /// Timer tree index type.  A variable of this type is used to access
+  /// an interior, not-necessarily-leaf node of the tree.
   typedef std::vector<class TimerTreeNode>::size_type TTindex;
-  // Representation of a path of leaf values leading to a particular
-  // node.  The representation happens to be in "reverse" order,
-  // i.e. from leaf/interior to root, for implementation efficiency.
+  /// Representation of a path of leaf values leading to a particular
+  /// node.  The representation happens to be in "reverse" order,
+  /// i.e. from leaf/interior to root, for implementation efficiency.
   typedef llvm::SmallVector<TTindex, 8> PathType;
-  // Representation of a mapping of leaf node indexes from one timer
-  // stack to another.
+  /// Representation of a mapping of leaf node indexes from one timer
+  /// stack to another.
   typedef std::vector<TimerIdT> TranslationType;
 
-  // TimerTreeNode represents an interior or leaf node in the call tree.
-  // It contains a list of children, a pointer to its parent, and the
-  // timer ID for the node.  It also holds the cumulative time spent at
-  // this node and below.  The children are always at a higher index in
-  // the TimerTreeNode::Nodes array, and the parent is always at a lower
-  // index.
+  /// TimerTreeNode represents an interior or leaf node in the call tree.
+  /// It contains a list of children, a pointer to its parent, and the
+  /// timer ID for the node.  It also holds the cumulative time spent at
+  /// this node and below.  The children are always at a higher index in
+  /// the TimerTreeNode::Nodes array, and the parent is always at a lower
+  /// index.
   class TimerTreeNode {
     TimerTreeNode &operator=(const TimerTreeNode &) = delete;
 
@@ -84,12 +85,12 @@
   double FirstTimestamp;
   double LastTimestamp;
   uint64_t StateChangeCount = 0;
-  // IDsIndex maps a symbolic timer name to its integer ID.
+  /// IDsIndex maps a symbolic timer name to its integer ID.
   std::map<IceString, TimerIdT> IDsIndex;
-  std::vector<IceString> IDs;       // indexed by TimerIdT
-  std::vector<TimerTreeNode> Nodes; // indexed by TTindex
-  std::vector<double> LeafTimes;    // indexed by TimerIdT
-  std::vector<size_t> LeafCounts;   // indexed by TimerIdT
+  std::vector<IceString> IDs;       /// indexed by TimerIdT
+  std::vector<TimerTreeNode> Nodes; /// indexed by TTindex
+  std::vector<double> LeafTimes;    /// indexed by TimerIdT
+  std::vector<size_t> LeafCounts;   /// indexed by TimerIdT
   TTindex StackTop = 0;
 };
 
diff --git a/src/IceTranslator.cpp b/src/IceTranslator.cpp
index a00fd6b..a2db1a2 100644
--- a/src/IceTranslator.cpp
+++ b/src/IceTranslator.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the general driver class for translating ICE to
-// machine code.
-//
+///
+/// \file
+/// This file defines the general driver class for translating ICE to
+/// machine code.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceTranslator.h"
diff --git a/src/IceTranslator.h b/src/IceTranslator.h
index 29b2d77..9fe46ee 100644
--- a/src/IceTranslator.h
+++ b/src/IceTranslator.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the general driver class for translating ICE to
-// machine code.
-//
+///
+/// \file
+/// This file declares the general driver class for translating ICE to
+/// machine code.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETRANSLATOR_H
@@ -29,9 +30,9 @@
 class VariableDeclaration;
 class GlobalContext;
 
-// Base class for translating ICE to machine code.  Derived classes convert
-// other intermediate representations down to ICE, and then call the appropriate
-// (inherited) methods to convert ICE into machine instructions.
+/// Base class for translating ICE to machine code.  Derived classes convert
+/// other intermediate representations down to ICE, and then call the
+/// appropriate (inherited) methods to convert ICE into machine instructions.
 class Translator {
   Translator() = delete;
   Translator(const Translator &) = delete;
@@ -70,7 +71,7 @@
 protected:
   GlobalContext *Ctx;
   uint32_t NextSequenceNumber;
-  // ErrorCode of the translation.
+  /// ErrorCode of the translation.
   ErrorCode ErrorStatus;
 };
 
diff --git a/src/IceTypeConverter.cpp b/src/IceTypeConverter.cpp
index 4341dcb..ec9e37b 100644
--- a/src/IceTypeConverter.cpp
+++ b/src/IceTypeConverter.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements how to convert LLVM types to ICE types, and ICE types
-// to LLVM types.
-//
+///
+/// \file
+/// This file implements how to convert LLVM types to ICE types, and ICE types
+/// to LLVM types.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceTypeConverter.h"
diff --git a/src/IceTypeConverter.h b/src/IceTypeConverter.h
index 18e3e00..81dde92 100644
--- a/src/IceTypeConverter.h
+++ b/src/IceTypeConverter.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines how to convert LLVM types to ICE types, and ICE types
-// to LLVM types.
-//
+///
+/// \file
+/// This file defines how to convert LLVM types to ICE types, and ICE types
+/// to LLVM types.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETYPECONVERTER_H
@@ -45,13 +46,13 @@
   }
 
 private:
-  // The mapping from LLVM types to corresopnding Ice types.
+  /// The mapping from LLVM types to corresopnding Ice types.
   std::map<llvm::Type *, Type> LLVM2IceMap;
 
-  // Add LLVM/ICE pair to internal tables.
+  /// Add LLVM/ICE pair to internal tables.
   void addLLVMType(Type Ty, llvm::Type *LLVMTy);
 
-  // Converts types not in LLVM2IceMap.
+  /// Converts types not in LLVM2IceMap.
   Type convertToIceTypeOther(llvm::Type *LLVMTy) const;
 };
 
diff --git a/src/IceTypes.cpp b/src/IceTypes.cpp
index 1b0ffb3..2099cb9 100644
--- a/src/IceTypes.cpp
+++ b/src/IceTypes.cpp
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines a few attributes of Subzero primitive types.
-//
+///
+/// \file
+/// This file defines a few attributes of Subzero primitive types.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceTypes.h"
diff --git a/src/IceTypes.h b/src/IceTypes.h
index 7daa8a7..1430094 100644
--- a/src/IceTypes.h
+++ b/src/IceTypes.h
@@ -6,11 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares a few properties of the primitive types allowed
-// in Subzero.  Every Subzero source file is expected to include
-// IceTypes.h.
-//
+///
+/// \file
+/// This file declares a few properties of the primitive types allowed
+/// in Subzero.  Every Subzero source file is expected to include
+/// IceTypes.h.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICETYPES_H
@@ -41,8 +42,8 @@
   return Stream << targetArchString(Arch);
 }
 
-// The list of all target instruction sets. Individual targets will
-// map this to include only what is valid for the target.
+/// The list of all target instruction sets. Individual targets will
+/// map this to include only what is valid for the target.
 enum TargetInstructionSet {
   // Represents baseline that can be assumed for a target (usually "Begin").
   BaseInstructionSet,
@@ -88,28 +89,28 @@
 /// Returns the number of bits in a scalar integer type.
 SizeT getScalarIntBitWidth(Type Ty);
 
-// Check if a type is byte sized (slight optimization over typeWidthInBytes).
+/// Check if a type is byte sized (slight optimization over typeWidthInBytes).
 inline bool isByteSizedType(Type Ty) {
   bool result = Ty == IceType_i8 || Ty == IceType_i1;
   assert(result == (1 == typeWidthInBytes(Ty)));
   return result;
 }
 
-// Check if Ty is byte sized and specifically i8. Assert that it's not
-// byte sized due to being an i1.
+/// Check if Ty is byte sized and specifically i8. Assert that it's not
+/// byte sized due to being an i1.
 inline bool isByteSizedArithType(Type Ty) {
   assert(Ty != IceType_i1);
   return Ty == IceType_i8;
 }
 
-// Return true if Ty is i32. This asserts that Ty is either i32 or i64.
+/// Return true if Ty is i32. This asserts that Ty is either i32 or i64.
 inline bool isInt32Asserting32Or64(Type Ty) {
   bool result = Ty == IceType_i32;
   assert(result || Ty == IceType_i64);
   return result;
 }
 
-// Return true if Ty is f32. This asserts that Ty is either f32 or f64.
+/// Return true if Ty is f32. This asserts that Ty is either f32 or f64.
 inline bool isFloat32Asserting32Or64(Type Ty) {
   bool result = Ty == IceType_f32;
   assert(result || Ty == IceType_f64);
@@ -129,8 +130,8 @@
 public:
   typedef std::vector<Type> ArgListType;
 
-  // Creates a function signature type with the given return type.
-  // Parameter types should be added using calls to appendArgType.
+  /// Creates a function signature type with the given return type.
+  /// Parameter types should be added using calls to appendArgType.
   FuncSigType() = default;
   FuncSigType(const FuncSigType &Ty) = default;
 
@@ -147,9 +148,9 @@
   void dump(Ostream &Stream) const;
 
 private:
-  // The return type.
+  /// The return type.
   Type ReturnType = IceType_void;
-  // The list of parameters.
+  /// The list of parameters.
   ArgListType ArgList;
 };
 
diff --git a/src/IceUtils.h b/src/IceUtils.h
index 6b0d729..6d65ade 100644
--- a/src/IceUtils.h
+++ b/src/IceUtils.h
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares some utility functions.
-//
+///
+/// \file
+/// This file declares some utility functions.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEUTILS_H
@@ -18,11 +19,11 @@
 
 namespace Ice {
 
-// Similar to bit_cast, but allows copying from types of unrelated
-// sizes. This method was introduced to enable the strict aliasing
-// optimizations of GCC 4.4. Basically, GCC mindlessly relies on
-// obscure details in the C++ standard that make reinterpret_cast
-// virtually useless.
+/// Similar to bit_cast, but allows copying from types of unrelated
+/// sizes. This method was introduced to enable the strict aliasing
+/// optimizations of GCC 4.4. Basically, GCC mindlessly relies on
+/// obscure details in the C++ standard that make reinterpret_cast
+/// virtually useless.
 template <class D, class S> inline D bit_copy(const S &source) {
   D destination;
   // This use of memcpy is safe: source and destination cannot overlap.
@@ -37,7 +38,7 @@
   Utils &operator=(const Utils &) = delete;
 
 public:
-  // Check whether an N-bit two's-complement representation can hold value.
+  /// Check whether an N-bit two's-complement representation can hold value.
   template <typename T> static inline bool IsInt(int N, T value) {
     assert((0 < N) &&
            (static_cast<unsigned int>(N) < (CHAR_BIT * sizeof(value))));
@@ -52,8 +53,8 @@
     return (0 <= value) && (value < limit);
   }
 
-  // Check whether the magnitude of value fits in N bits, i.e., whether an
-  // (N+1)-bit sign-magnitude representation can hold value.
+  /// Check whether the magnitude of value fits in N bits, i.e., whether an
+  /// (N+1)-bit sign-magnitude representation can hold value.
   template <typename T> static inline bool IsAbsoluteUint(int N, T Value) {
     assert((0 < N) &&
            (static_cast<unsigned int>(N) < (CHAR_BIT * sizeof(Value))));
@@ -62,27 +63,27 @@
     return IsUint(N, Value);
   }
 
-  // Return true if the addition X + Y will cause integer overflow for
-  // integers of type T.
+  /// Return true if the addition X + Y will cause integer overflow for
+  /// integers of type T.
   template <typename T> static inline bool WouldOverflowAdd(T X, T Y) {
     return ((X > 0 && Y > 0 && (X > std::numeric_limits<T>::max() - Y)) ||
             (X < 0 && Y < 0 && (X < std::numeric_limits<T>::min() - Y)));
   }
 
-  // Return true if X is already aligned by N, where N is a power of 2.
+  /// Return true if X is already aligned by N, where N is a power of 2.
   template <typename T> static inline bool IsAligned(T X, intptr_t N) {
     assert(llvm::isPowerOf2_64(N));
     return (X & (N - 1)) == 0;
   }
 
-  // Return Value adjusted to the next highest multiple of Alignment.
+  /// Return Value adjusted to the next highest multiple of Alignment.
   static inline uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
     assert(llvm::isPowerOf2_32(Alignment));
     return (Value + Alignment - 1) & -Alignment;
   }
 
-  // Return amount which must be added to adjust Pos to the next highest
-  // multiple of Align.
+  /// Return amount which must be added to adjust Pos to the next highest
+  /// multiple of Align.
   static inline uint64_t OffsetToAlignment(uint64_t Pos, uint64_t Align) {
     assert(llvm::isPowerOf2_64(Align));
     uint64_t Mod = Pos & (Align - 1);
@@ -91,15 +92,15 @@
     return Align - Mod;
   }
 
-  // Rotate the value bit pattern to the left by shift bits.
-  // Precondition: 0 <= shift < 32
+  /// Rotate the value bit pattern to the left by shift bits.
+  /// Precondition: 0 <= shift < 32
   static inline uint32_t rotateLeft32(uint32_t value, uint32_t shift) {
     if (shift == 0)
       return value;
     return (value << shift) | (value >> (32 - shift));
   }
 
-  // Rotate the value bit pattern to the right by shift bits.
+  /// Rotate the value bit pattern to the right by shift bits.
   static inline uint32_t rotateRight32(uint32_t value, uint32_t shift) {
     if (shift == 0)
       return value;
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index fea11d0..11e0b8e 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the PNaCl bitcode file to Ice, to machine code
-// translator.
-//
+///
+/// \file
+/// This file implements the PNaCl bitcode file to Ice, to machine code
+/// translator.
+///
 //===----------------------------------------------------------------------===//
 
 #include "PNaClTranslator.h"
diff --git a/src/PNaClTranslator.h b/src/PNaClTranslator.h
index b8764a6..b6286e8 100644
--- a/src/PNaClTranslator.h
+++ b/src/PNaClTranslator.h
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the PNaCl bitcode file to ICE, to machine code
-// translator.
-//
+///
+/// \file
+/// This file declares the PNaCl bitcode file to ICE, to machine code
+/// translator.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_PNACLTRANSLATOR_H
@@ -34,13 +35,13 @@
 public:
   explicit PNaClTranslator(GlobalContext *Ctx) : Translator(Ctx) {}
 
-  // Reads the PNaCl bitcode file and translates to ICE, which is then
-  // converted to machine code. Sets ErrorStatus to 1 if any errors
-  // occurred. Takes ownership of the MemoryObject.
+  /// Reads the PNaCl bitcode file and translates to ICE, which is then
+  /// converted to machine code. Sets ErrorStatus to 1 if any errors
+  /// occurred. Takes ownership of the MemoryObject.
   void translate(const std::string &IRFilename,
                  std::unique_ptr<llvm::MemoryObject> &&MemoryObject);
 
-  // Reads MemBuf, assuming it is the PNaCl bitcode contents of IRFilename.
+  /// Reads MemBuf, assuming it is the PNaCl bitcode contents of IRFilename.
   void translateBuffer(const std::string &IRFilename,
                        llvm::MemoryBuffer *MemBuf);
 };
diff --git a/src/main.cpp b/src/main.cpp
index c5ee1e4..a6cb490 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the entry point for translating PNaCl bitcode into
-// native code.
-//
+///
+/// \file
+/// This file defines the entry point for translating PNaCl bitcode into
+/// native code.
+///
 //===----------------------------------------------------------------------===//
 
 #include "IceBrowserCompileServer.h"