7023898: Intrinsify AtomicLongFieldUpdater.getAndIncrement()

Use shorter instruction sequences for atomic add and atomic exchange when possible.

Reviewed-by: kvn, jrose
diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp
index bccbe13..63359d5 100644
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp
@@ -347,7 +347,11 @@
 inline void Assembler::swap(    Register s1, Register s2, Register d) { v9_dep();  emit_long( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::swap(    Register s1, int simm13a, Register d) { v9_dep();  emit_data( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::swap(    Address& a, Register d, int offset ) { relocate(a.rspec(offset)); swap(  a.base(), a.disp() + offset, d ); }
+inline void Assembler::swap(    Address& a, Register d, int offset ) {
+  relocate(a.rspec(offset));
+  if (a.has_index()) { assert(offset == 0, ""); swap( a.base(), a.index(), d         ); }
+  else               {                          swap( a.base(), a.disp() + offset, d ); }
+}
 
 
 // Use the right loads/stores for the platform
diff --git a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
index 2890c56..61ec673 100644
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
@@ -1315,7 +1315,13 @@
 
 Address LIR_Assembler::as_Address(LIR_Address* addr) {
   Register reg = addr->base()->as_register();
-  return Address(reg, addr->disp());
+  LIR_Opr index = addr->index();
+  if (index->is_illegal()) {
+    return Address(reg, addr->disp());
+  } else {
+    assert (addr->disp() == 0, "unsupported address mode");
+    return Address(reg, index->as_pointer_register());
+  }
 }
 
 
@@ -3438,7 +3444,28 @@
   }
 }
 
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
+  LIR_Address* addr = src->as_address_ptr();
 
+  assert(data == dest, "swap uses only 2 operands");
+  assert (code == lir_xchg, "no xadd on sparc");
 
+  if (data->type() == T_INT) {
+    __ swap(as_Address(addr), data->as_register());
+  } else if (data->is_oop()) {
+    Register obj = data->as_register();
+    Register narrow = tmp->as_register();
+#ifdef _LP64
+    assert(UseCompressedOops, "swap is 32bit only");
+    __ encode_heap_oop(obj, narrow);
+    __ swap(as_Address(addr), narrow);
+    __ decode_heap_oop(narrow, obj);
+#else
+    __ swap(as_Address(addr), obj);
+#endif
+  } else {
+    ShouldNotReachHere();
+  }
+}
 
 #undef __
diff --git a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
index 75b7b6c..2d4b3a2 100644
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
@@ -1204,3 +1204,58 @@
     __ load(addr, dst);
   }
 }
+
+void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+  BasicType type = x->basic_type();
+  LIRItem src(x->object(), this);
+  LIRItem off(x->offset(), this);
+  LIRItem value(x->value(), this);
+
+  src.load_item();
+  value.load_item();
+  off.load_nonconstant();
+
+  LIR_Opr dst = rlock_result(x, type);
+  LIR_Opr data = value.result();
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+  LIR_Opr offset = off.result();
+
+  if (data != dst) {
+    __ move(data, dst);
+    data = dst;
+  }
+
+  assert (!x->is_add() && (type == T_INT || (is_obj LP64_ONLY(&& UseCompressedOops))), "unexpected type");
+  LIR_Address* addr;
+  if (offset->is_constant()) {
+
+#ifdef _LP64
+    jlong l = offset->as_jlong();
+    assert((jlong)((jint)l) == l, "offset too large for constant");
+    jint c = (jint)l;
+#else
+    jint c = offset->as_jint();
+#endif
+    addr = new LIR_Address(src.result(), c, type);
+  } else {
+    addr = new LIR_Address(src.result(), offset, type);
+  }
+
+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
+  LIR_Opr ptr = LIR_OprFact::illegalOpr;
+
+  if (is_obj) {
+    // Do the pre-write barrier, if any.
+    // barriers on sparc don't work with a base + index address
+    tmp = FrameMap::G3_opr;
+    ptr = new_pointer_register();
+    __ add(src.result(), off.result(), ptr);
+    pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+  }
+  __ xchg(LIR_OprFact::address(addr), data, dst, tmp);
+  if (is_obj) {
+    // Seems to be a precise address
+    post_barrier(ptr, data);
+  }
+}
diff --git a/hotspot/src/cpu/sparc/vm/sparc.ad b/hotspot/src/cpu/sparc/vm/sparc.ad
index 42b4cf8..f7040f5 100644
--- a/hotspot/src/cpu/sparc/vm/sparc.ad
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad
@@ -1838,6 +1838,12 @@
   case Op_PopCountL:
     if (!UsePopCountInstruction)
       return false;
+  case Op_CompareAndSwapL:
+#ifdef _LP64
+  case Op_CompareAndSwapP:
+#endif
+    if (!VM_Version::supports_cx8())
+      return false;
     break;
   }
 
@@ -7199,6 +7205,7 @@
 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 
 instruct compareAndSwapL_bool(iRegP mem_ptr, iRegL oldval, iRegL newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{
+  predicate(VM_Version::supports_cx8());
   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
   effect( USE mem_ptr, KILL ccr, KILL tmp1);
   format %{
@@ -7230,6 +7237,9 @@
 %}
 
 instruct compareAndSwapP_bool(iRegP mem_ptr, iRegP oldval, iRegP newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{
+#ifdef _LP64
+  predicate(VM_Version::supports_cx8());
+#endif
   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
   effect( USE mem_ptr, KILL ccr, KILL tmp1);
   format %{
@@ -7264,6 +7274,38 @@
   ins_pipe( long_memory_op );
 %}
 
+instruct xchgI( memory mem, iRegI newval) %{
+  match(Set newval (GetAndSetI mem newval));
+  format %{ "SWAP  [$mem],$newval" %}
+  size(4);
+  ins_encode %{
+    __ swap($mem$$Address, $newval$$Register);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+#ifndef _LP64
+instruct xchgP( memory mem, iRegP newval) %{
+  match(Set newval (GetAndSetP mem newval));
+  format %{ "SWAP  [$mem],$newval" %}
+  size(4);
+  ins_encode %{
+    __ swap($mem$$Address, $newval$$Register);
+  %}
+  ins_pipe( long_memory_op );
+%}
+#endif
+
+instruct xchgN( memory mem, iRegN newval) %{
+  match(Set newval (GetAndSetN mem newval));
+  format %{ "SWAP  [$mem],$newval" %}
+  size(4);
+  ins_encode %{
+    __ swap($mem$$Address, $newval$$Register);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
 //---------------------
 // Subtraction Instructions
 // Register Subtraction
diff --git a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
index ca93deb..b07bdda 100644
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
@@ -96,6 +96,7 @@
   UseSSE = 0; // Only on x86 and x64
 
   _supports_cx8 = has_v9();
+  _supports_atomic_getset4 = true; // swap instruction
 
   if (is_niagara()) {
     // Indirect branch is the same cost as direct
diff --git a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
index 36bd4d7..60eab09 100644
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
@@ -3794,5 +3794,49 @@
   // do nothing for now
 }
 
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
+  assert(data == dest, "xchg/xadd uses only 2 operands");
+
+  if (data->type() == T_INT) {
+    if (code == lir_xadd) {
+      if (os::is_MP()) {
+        __ lock();
+      }
+      __ xaddl(as_Address(src->as_address_ptr()), data->as_register());
+    } else {
+      __ xchgl(data->as_register(), as_Address(src->as_address_ptr()));
+    }
+  } else if (data->is_oop()) {
+    assert (code == lir_xchg, "xadd for oops");
+    Register obj = data->as_register();
+#ifdef _LP64
+    if (UseCompressedOops) {
+      __ encode_heap_oop(obj);
+      __ xchgl(obj, as_Address(src->as_address_ptr()));
+      __ decode_heap_oop(obj);
+    } else {
+      __ xchgptr(obj, as_Address(src->as_address_ptr()));
+    }
+#else
+    __ xchgl(obj, as_Address(src->as_address_ptr()));
+#endif
+  } else if (data->type() == T_LONG) {
+#ifdef _LP64
+    assert(data->as_register_lo() == data->as_register_hi(), "should be a single register");
+    if (code == lir_xadd) {
+      if (os::is_MP()) {
+        __ lock();
+      }
+      __ xaddq(as_Address(src->as_address_ptr()), data->as_register_lo());
+    } else {
+      __ xchgq(data->as_register_lo(), as_Address(src->as_address_ptr()));
+    }
+#else
+    ShouldNotReachHere();
+#endif
+  } else {
+    ShouldNotReachHere();
+  }
+}
 
 #undef __
diff --git a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
index d363c0e..d185613 100644
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
@@ -753,9 +753,24 @@
   LIR_Opr addr = new_pointer_register();
   LIR_Address* a;
   if(offset.result()->is_constant()) {
+#ifdef _LP64
+    jlong c = offset.result()->as_jlong();
+    if ((jlong)((jint)c) == c) {
+      a = new LIR_Address(obj.result(),
+                          (jint)c,
+                          as_BasicType(type));
+    } else {
+      LIR_Opr tmp = new_register(T_LONG);
+      __ move(offset.result(), tmp);
+      a = new LIR_Address(obj.result(),
+                          tmp,
+                          as_BasicType(type));
+    }
+#else
     a = new LIR_Address(obj.result(),
-                        NOT_LP64(offset.result()->as_constant_ptr()->as_jint()) LP64_ONLY((int)offset.result()->as_constant_ptr()->as_jlong()),
+                        offset.result()->as_jint(),
                         as_BasicType(type));
+#endif
   } else {
     a = new LIR_Address(obj.result(),
                         offset.result(),
@@ -1345,3 +1360,57 @@
     }
   }
 }
+
+void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+  BasicType type = x->basic_type();
+  LIRItem src(x->object(), this);
+  LIRItem off(x->offset(), this);
+  LIRItem value(x->value(), this);
+
+  src.load_item();
+  value.load_item();
+  off.load_nonconstant();
+
+  LIR_Opr dst = rlock_result(x, type);
+  LIR_Opr data = value.result();
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+  LIR_Opr offset = off.result();
+
+  assert (type == T_INT || (!x->is_add() && is_obj) LP64_ONLY( || type == T_LONG ), "unexpected type");
+  LIR_Address* addr;
+  if (offset->is_constant()) {
+#ifdef _LP64
+    jlong c = offset->as_jlong();
+    if ((jlong)((jint)c) == c) {
+      addr = new LIR_Address(src.result(), (jint)c, type);
+    } else {
+      LIR_Opr tmp = new_register(T_LONG);
+      __ move(offset, tmp);
+      addr = new LIR_Address(src.result(), tmp, type);
+    }
+#else
+    addr = new LIR_Address(src.result(), offset->as_jint(), type);
+#endif
+  } else {
+    addr = new LIR_Address(src.result(), offset, type);
+  }
+
+  if (data != dst) {
+    __ move(data, dst);
+    data = dst;
+  }
+  if (x->is_add()) {
+    __ xadd(LIR_OprFact::address(addr), data, dst, LIR_OprFact::illegalOpr);
+  } else {
+    if (is_obj) {
+      // Do the pre-write barrier, if any.
+      pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+                  true /* do_load */, false /* patch */, NULL);
+    }
+    __ xchg(LIR_OprFact::address(addr), data, dst, LIR_OprFact::illegalOpr);
+    if (is_obj) {
+      // Seems to be a precise address
+      post_barrier(LIR_OprFact::address(addr), data);
+    }
+  }
+}
diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
index a5de1ee..bf7b3c2 100644
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
@@ -363,6 +363,11 @@
   }
 
   _supports_cx8 = supports_cmpxchg8();
+  // xchg and xadd instructions
+  _supports_atomic_getset4 = true;
+  _supports_atomic_getadd4 = true;
+  LP64_ONLY(_supports_atomic_getset8 = true);
+  LP64_ONLY(_supports_atomic_getadd8 = true);
 
 #ifdef _LP64
   // OS should support SSE for x64 and hardware should support at least SSE2.
diff --git a/hotspot/src/cpu/x86/vm/x86.ad b/hotspot/src/cpu/x86/vm/x86.ad
index 9a057a4..c383100 100644
--- a/hotspot/src/cpu/x86/vm/x86.ad
+++ b/hotspot/src/cpu/x86/vm/x86.ad
@@ -503,6 +503,13 @@
       if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
         return false;
     break;
+    case Op_CompareAndSwapL:
+#ifdef _LP64
+    case Op_CompareAndSwapP:
+#endif
+      if (!VM_Version::supports_cx8())
+        return false;
+    break;
   }
 
   return true;  // Per default match rules are supported.
diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad
index e82d799..f5019cb 100644
--- a/hotspot/src/cpu/x86/vm/x86_32.ad
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad
@@ -7762,6 +7762,7 @@
 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 
 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
+  predicate(VM_Version::supports_cx8());
   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7798,6 +7799,47 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
+instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddI mem add));
+  effect(KILL cr);
+  format %{ "ADDL  [$mem],$add" %}
+  ins_encode %{
+    if (os::is_MP()) { __ lock(); }
+    __ addl($mem$$Address, $add$$constant);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
+  match(Set newval (GetAndAddI mem newval));
+  effect(KILL cr);
+  format %{ "XADDL  [$mem],$newval" %}
+  ins_encode %{
+    if (os::is_MP()) { __ lock(); }
+    __ xaddl($mem$$Address, $newval$$Register);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xchgI( memory mem, rRegI newval) %{
+  match(Set newval (GetAndSetI mem newval));
+  format %{ "XCHGL  $newval,[$mem]" %}
+  ins_encode %{
+    __ xchgl($newval$$Register, $mem$$Address);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xchgP( memory mem, pRegP newval) %{
+  match(Set newval (GetAndSetP mem newval));
+  format %{ "XCHGL  $newval,[$mem]" %}
+  ins_encode %{
+    __ xchgl($newval$$Register, $mem$$Address);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
 //----------Subtraction Instructions-------------------------------------------
 // Integer Subtraction Instructions
 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad
index 8c0fb37..dc2d4d8 100644
--- a/hotspot/src/cpu/x86/vm/x86_64.ad
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad
@@ -7242,6 +7242,7 @@
                          rax_RegP oldval, rRegP newval,
                          rFlagsReg cr)
 %{
+  predicate(VM_Version::supports_cx8());
   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
 
@@ -7265,6 +7266,7 @@
                          rax_RegL oldval, rRegL newval,
                          rFlagsReg cr)
 %{
+  predicate(VM_Version::supports_cx8());
   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
 
@@ -7329,6 +7331,88 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
+instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddI mem add));
+  effect(KILL cr);
+  format %{ "ADDL  [$mem],$add" %}
+  ins_encode %{
+    if (os::is_MP()) { __ lock(); }
+    __ addl($mem$$Address, $add$$constant);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
+  match(Set newval (GetAndAddI mem newval));
+  effect(KILL cr);
+  format %{ "XADDL  [$mem],$newval" %}
+  ins_encode %{
+    if (os::is_MP()) { __ lock(); }
+    __ xaddl($mem$$Address, $newval$$Register);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xaddL_no_res( memory mem, Universe dummy, immL add, rFlagsReg cr) %{
+  predicate(n->as_LoadStore()->result_not_used());
+  match(Set dummy (GetAndAddL mem add));
+  effect(KILL cr);
+  format %{ "ADDQ  [$mem],$add" %}
+  ins_encode %{
+    if (os::is_MP()) { __ lock(); }
+    __ addq($mem$$Address, $add$$constant);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
+  match(Set newval (GetAndAddL mem newval));
+  effect(KILL cr);
+  format %{ "XADDQ  [$mem],$newval" %}
+  ins_encode %{
+    if (os::is_MP()) { __ lock(); }
+    __ xaddq($mem$$Address, $newval$$Register);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xchgI( memory mem, rRegI newval) %{
+  match(Set newval (GetAndSetI mem newval));
+  format %{ "XCHGL  $newval,[$mem]" %}
+  ins_encode %{
+    __ xchgl($newval$$Register, $mem$$Address);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xchgL( memory mem, rRegL newval) %{
+  match(Set newval (GetAndSetL mem newval));
+  format %{ "XCHGL  $newval,[$mem]" %}
+  ins_encode %{
+    __ xchgq($newval$$Register, $mem$$Address);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xchgP( memory mem, rRegP newval) %{
+  match(Set newval (GetAndSetP mem newval));
+  format %{ "XCHGQ  $newval,[$mem]" %}
+  ins_encode %{
+    __ xchgq($newval$$Register, $mem$$Address);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct xchgN( memory mem, rRegN newval) %{
+  match(Set newval (GetAndSetN mem newval));
+  format %{ "XCHGL  $newval,$mem]" %}
+  ins_encode %{
+    __ xchgl($newval$$Register, $mem$$Address);
+  %}
+  ins_pipe( pipe_cmpxchg );
+%}
+
 //----------Subtraction Instructions-------------------------------------------
 
 // Integer Subtraction Instructions
diff --git a/hotspot/src/share/vm/adlc/formssel.cpp b/hotspot/src/share/vm/adlc/formssel.cpp
index cccce31..824306c 100644
--- a/hotspot/src/share/vm/adlc/formssel.cpp
+++ b/hotspot/src/share/vm/adlc/formssel.cpp
@@ -751,6 +751,7 @@
         !strcmp(_matrule->_rChild->_opType,"DecodeN")    ||
         !strcmp(_matrule->_rChild->_opType,"EncodeP")    ||
         !strcmp(_matrule->_rChild->_opType,"LoadN")      ||
+        !strcmp(_matrule->_rChild->_opType,"GetAndSetN") ||
         !strcmp(_matrule->_rChild->_opType,"LoadNKlass") ||
         !strcmp(_matrule->_rChild->_opType,"CreateEx")   ||  // type of exception
         !strcmp(_matrule->_rChild->_opType,"CheckCastPP")) ) return true;
@@ -3399,7 +3400,9 @@
     "StorePConditional", "StoreIConditional", "StoreLConditional",
     "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN",
     "StoreCM",
-    "ClearArray"
+    "ClearArray",
+    "GetAndAddI", "GetAndSetI", "GetAndSetP",
+    "GetAndAddL", "GetAndSetL", "GetAndSetN",
   };
   int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);
   if( strcmp(_opType,"PrefetchRead")==0 ||
diff --git a/hotspot/src/share/vm/c1/c1_Canonicalizer.cpp b/hotspot/src/share/vm/c1/c1_Canonicalizer.cpp
index 7d9ddf8..1fcdd64 100644
--- a/hotspot/src/share/vm/c1/c1_Canonicalizer.cpp
+++ b/hotspot/src/share/vm/c1/c1_Canonicalizer.cpp
@@ -931,6 +931,7 @@
 void Canonicalizer::do_UnsafePutRaw(UnsafePutRaw* x) { if (OptimizeUnsafes) do_UnsafeRawOp(x); }
 void Canonicalizer::do_UnsafeGetObject(UnsafeGetObject* x) {}
 void Canonicalizer::do_UnsafePutObject(UnsafePutObject* x) {}
+void Canonicalizer::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {}
 void Canonicalizer::do_UnsafePrefetchRead (UnsafePrefetchRead*  x) {}
 void Canonicalizer::do_UnsafePrefetchWrite(UnsafePrefetchWrite* x) {}
 void Canonicalizer::do_ProfileCall(ProfileCall* x) {}
diff --git a/hotspot/src/share/vm/c1/c1_Canonicalizer.hpp b/hotspot/src/share/vm/c1/c1_Canonicalizer.hpp
index b7ab8f5..b7c9f7e 100644
--- a/hotspot/src/share/vm/c1/c1_Canonicalizer.hpp
+++ b/hotspot/src/share/vm/c1/c1_Canonicalizer.hpp
@@ -100,6 +100,7 @@
   virtual void do_UnsafePutRaw   (UnsafePutRaw*    x);
   virtual void do_UnsafeGetObject(UnsafeGetObject* x);
   virtual void do_UnsafePutObject(UnsafePutObject* x);
+  virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x);
   virtual void do_UnsafePrefetchRead (UnsafePrefetchRead*  x);
   virtual void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x);
   virtual void do_ProfileCall    (ProfileCall*     x);
diff --git a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
index 636fed2..95d9902 100644
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
@@ -3383,6 +3383,41 @@
       append_unsafe_CAS(callee);
       return true;
 
+    case vmIntrinsics::_getAndAddInt:
+      if (!VM_Version::supports_atomic_getadd4()) {
+        return false;
+      }
+      return append_unsafe_get_and_set_obj(callee, true);
+    case vmIntrinsics::_getAndAddLong:
+      if (!VM_Version::supports_atomic_getadd8()) {
+        return false;
+      }
+      return append_unsafe_get_and_set_obj(callee, true);
+    case vmIntrinsics::_getAndSetInt:
+      if (!VM_Version::supports_atomic_getset4()) {
+        return false;
+      }
+      return append_unsafe_get_and_set_obj(callee, false);
+    case vmIntrinsics::_getAndSetLong:
+      if (!VM_Version::supports_atomic_getset8()) {
+        return false;
+      }
+      return append_unsafe_get_and_set_obj(callee, false);
+    case vmIntrinsics::_getAndSetObject:
+#ifdef _LP64
+      if (!UseCompressedOops && !VM_Version::supports_atomic_getset8()) {
+        return false;
+      }
+      if (UseCompressedOops && !VM_Version::supports_atomic_getset4()) {
+        return false;
+      }
+#else
+      if (!VM_Version::supports_atomic_getset4()) {
+        return false;
+      }
+#endif
+      return append_unsafe_get_and_set_obj(callee, false);
+
     case vmIntrinsics::_Reference_get:
       // Use the intrinsic version of Reference.get() so that the value in
       // the referent field can be registered by the G1 pre-barrier code.
@@ -4106,6 +4141,22 @@
   }
 }
 
+bool GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) {
+  if (InlineUnsafeOps) {
+    Values* args = state()->pop_arguments(callee->arg_size());
+    BasicType t = callee->return_type()->basic_type();
+    null_check(args->at(0));
+    Instruction* offset = args->at(2);
+#ifndef _LP64
+    offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
+#endif
+    Instruction* op = append(new UnsafeGetAndSetObject(t, args->at(1), offset, args->at(3), is_add));
+    compilation()->set_has_unsafe_access(true);
+    kill_all();
+    push(op->type(), op);
+  }
+  return InlineUnsafeOps;
+}
 
 #ifndef PRODUCT
 void GraphBuilder::print_stats() {
diff --git a/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp b/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp
index 8d7e7de..0997058 100644
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.hpp
@@ -367,6 +367,7 @@
   bool append_unsafe_put_raw(ciMethod* callee, BasicType t);
   bool append_unsafe_prefetch(ciMethod* callee, bool is_store, bool is_static);
   void append_unsafe_CAS(ciMethod* callee);
+  bool append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add);
 
   void print_inlining(ciMethod* callee, const char* msg, bool success = true);
 
diff --git a/hotspot/src/share/vm/c1/c1_Instruction.hpp b/hotspot/src/share/vm/c1/c1_Instruction.hpp
index 930a599..4fff026 100644
--- a/hotspot/src/share/vm/c1/c1_Instruction.hpp
+++ b/hotspot/src/share/vm/c1/c1_Instruction.hpp
@@ -102,6 +102,7 @@
 class     UnsafeObjectOp;
 class       UnsafeGetObject;
 class       UnsafePutObject;
+class         UnsafeGetAndSetObject;
 class       UnsafePrefetch;
 class         UnsafePrefetchRead;
 class         UnsafePrefetchWrite;
@@ -202,6 +203,7 @@
   virtual void do_UnsafePutRaw   (UnsafePutRaw*    x) = 0;
   virtual void do_UnsafeGetObject(UnsafeGetObject* x) = 0;
   virtual void do_UnsafePutObject(UnsafePutObject* x) = 0;
+  virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) = 0;
   virtual void do_UnsafePrefetchRead (UnsafePrefetchRead*  x) = 0;
   virtual void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x) = 0;
   virtual void do_ProfileCall    (ProfileCall*     x) = 0;
@@ -2273,6 +2275,27 @@
                                                    f->visit(&_value); }
 };
 
+LEAF(UnsafeGetAndSetObject, UnsafeObjectOp)
+ private:
+  Value _value;                                  // Value to be stored
+  bool  _is_add;
+ public:
+  UnsafeGetAndSetObject(BasicType basic_type, Value object, Value offset, Value value, bool is_add)
+  : UnsafeObjectOp(basic_type, object, offset, false, false)
+    , _value(value)
+    , _is_add(is_add)
+  {
+    ASSERT_VALUES
+  }
+
+  // accessors
+  bool is_add() const                            { return _is_add; }
+  Value value()                                  { return _value; }
+
+  // generic
+  virtual void input_values_do(ValueVisitor* f)   { UnsafeObjectOp::input_values_do(f);
+                                                   f->visit(&_value); }
+};
 
 BASE(UnsafePrefetch, UnsafeObjectOp)
  public:
diff --git a/hotspot/src/share/vm/c1/c1_InstructionPrinter.cpp b/hotspot/src/share/vm/c1/c1_InstructionPrinter.cpp
index 383ffbb..a2f6f86 100644
--- a/hotspot/src/share/vm/c1/c1_InstructionPrinter.cpp
+++ b/hotspot/src/share/vm/c1/c1_InstructionPrinter.cpp
@@ -831,6 +831,12 @@
   output()->put(')');
 }
 
+void InstructionPrinter::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+  print_unsafe_object_op(x, x->is_add()?"UnsafeGetAndSetObject (add)":"UnsafeGetAndSetObject");
+  output()->print(", value ");
+  print_value(x->value());
+  output()->put(')');
+}
 
 void InstructionPrinter::do_UnsafePrefetchRead(UnsafePrefetchRead* x) {
   print_unsafe_object_op(x, "UnsafePrefetchRead");
diff --git a/hotspot/src/share/vm/c1/c1_InstructionPrinter.hpp b/hotspot/src/share/vm/c1/c1_InstructionPrinter.hpp
index 3d09ef0..a89908c 100644
--- a/hotspot/src/share/vm/c1/c1_InstructionPrinter.hpp
+++ b/hotspot/src/share/vm/c1/c1_InstructionPrinter.hpp
@@ -128,6 +128,7 @@
   virtual void do_UnsafePutRaw   (UnsafePutRaw*    x);
   virtual void do_UnsafeGetObject(UnsafeGetObject* x);
   virtual void do_UnsafePutObject(UnsafePutObject* x);
+  virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x);
   virtual void do_UnsafePrefetchRead (UnsafePrefetchRead*  x);
   virtual void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x);
   virtual void do_ProfileCall    (ProfileCall*     x);
diff --git a/hotspot/src/share/vm/c1/c1_LIR.cpp b/hotspot/src/share/vm/c1/c1_LIR.cpp
index 9bc7ab8..178d186 100644
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp
@@ -264,6 +264,7 @@
 #ifdef ASSERT
   switch (code()) {
     case lir_cmove:
+    case lir_xchg:
       break;
 
     default:
@@ -630,6 +631,8 @@
     case lir_shl:
     case lir_shr:
     case lir_ushr:
+    case lir_xadd:
+    case lir_xchg:
     {
       assert(op->as_Op2() != NULL, "must be");
       LIR_Op2* op2 = (LIR_Op2*)op;
@@ -641,6 +644,13 @@
       if (op2->_opr2->is_valid())         do_input(op2->_opr2);
       if (op2->_tmp1->is_valid())         do_temp(op2->_tmp1);
       if (op2->_result->is_valid())       do_output(op2->_result);
+      if (op->code() == lir_xchg || op->code() == lir_xadd) {
+        // on ARM and PPC, return value is loaded first so could
+        // destroy inputs. On other platforms that implement those
+        // (x86, sparc), the extra constrainsts are harmless.
+        if (op2->_opr1->is_valid())       do_temp(op2->_opr1);
+        if (op2->_opr2->is_valid())       do_temp(op2->_opr2);
+      }
 
       break;
     }
@@ -1733,6 +1743,8 @@
      case lir_shr:                   s = "shift_right";   break;
      case lir_ushr:                  s = "ushift_right";  break;
      case lir_alloc_array:           s = "alloc_array";   break;
+     case lir_xadd:                  s = "xadd";          break;
+     case lir_xchg:                  s = "xchg";          break;
      // LIR_Op3
      case lir_idiv:                  s = "idiv";          break;
      case lir_irem:                  s = "irem";          break;
diff --git a/hotspot/src/share/vm/c1/c1_LIR.hpp b/hotspot/src/share/vm/c1/c1_LIR.hpp
index a79a8ac..122ebe8 100644
--- a/hotspot/src/share/vm/c1/c1_LIR.hpp
+++ b/hotspot/src/share/vm/c1/c1_LIR.hpp
@@ -963,6 +963,8 @@
       , lir_alloc_array
       , lir_throw
       , lir_compare_to
+      , lir_xadd
+      , lir_xchg
   , end_op2
   , begin_op3
       , lir_idiv
@@ -2191,6 +2193,9 @@
   void profile_call(ciMethod* method, int bci, ciMethod* callee, LIR_Opr mdo, LIR_Opr recv, LIR_Opr t1, ciKlass* cha_klass) {
     append(new LIR_OpProfileCall(lir_profile_call, method, bci, callee, mdo, recv, t1, cha_klass));
   }
+
+  void xadd(LIR_Opr src, LIR_Opr add, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2(lir_xadd, src, add, res, tmp)); }
+  void xchg(LIR_Opr src, LIR_Opr set, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2(lir_xchg, src, set, res, tmp)); }
 };
 
 void print_LIR(BlockList* blocks);
@@ -2287,16 +2292,21 @@
       LIR_Address* address = opr->as_address_ptr();
       if (address != NULL) {
         // special handling for addresses: add base and index register of the address
-        // both are always input operands!
+        // both are always input operands or temp if we want to extend
+        // their liveness!
+        if (mode == outputMode) {
+          mode = inputMode;
+        }
+        assert (mode == inputMode || mode == tempMode, "input or temp only for addresses");
         if (address->_base->is_valid()) {
           assert(address->_base->is_register(), "must be");
-          assert(_oprs_len[inputMode] < maxNumberOfOperands, "array overflow");
-          _oprs_new[inputMode][_oprs_len[inputMode]++] = &address->_base;
+          assert(_oprs_len[mode] < maxNumberOfOperands, "array overflow");
+          _oprs_new[mode][_oprs_len[mode]++] = &address->_base;
         }
         if (address->_index->is_valid()) {
           assert(address->_index->is_register(), "must be");
-          assert(_oprs_len[inputMode] < maxNumberOfOperands, "array overflow");
-          _oprs_new[inputMode][_oprs_len[inputMode]++] = &address->_index;
+          assert(_oprs_len[mode] < maxNumberOfOperands, "array overflow");
+          _oprs_new[mode][_oprs_len[mode]++] = &address->_index;
         }
 
       } else {
diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
index e67987d..a76f5bb 100644
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
@@ -773,6 +773,11 @@
       throw_op(op->in_opr1(), op->in_opr2(), op->info());
       break;
 
+    case lir_xadd:
+    case lir_xchg:
+      atomic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr());
+      break;
+
     default:
       Unimplemented();
       break;
diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
index 226e4f8..5cce9d0 100644
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
@@ -252,6 +252,8 @@
 
   void verify_oop_map(CodeEmitInfo* info);
 
+  void atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp);
+
 #ifdef TARGET_ARCH_x86
 # include "c1_LIRAssembler_x86.hpp"
 #endif
diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
index ac20e7a..aedd6a6 100644
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
@@ -527,6 +527,7 @@
   virtual void do_UnsafePutRaw   (UnsafePutRaw*    x);
   virtual void do_UnsafeGetObject(UnsafeGetObject* x);
   virtual void do_UnsafePutObject(UnsafePutObject* x);
+  virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x);
   virtual void do_UnsafePrefetchRead (UnsafePrefetchRead*  x);
   virtual void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x);
   virtual void do_ProfileCall    (ProfileCall*     x);
diff --git a/hotspot/src/share/vm/c1/c1_Optimizer.cpp b/hotspot/src/share/vm/c1/c1_Optimizer.cpp
index 487a7a7..3ba1765 100644
--- a/hotspot/src/share/vm/c1/c1_Optimizer.cpp
+++ b/hotspot/src/share/vm/c1/c1_Optimizer.cpp
@@ -505,6 +505,7 @@
   void do_UnsafePutRaw   (UnsafePutRaw*    x);
   void do_UnsafeGetObject(UnsafeGetObject* x);
   void do_UnsafePutObject(UnsafePutObject* x);
+  void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x);
   void do_UnsafePrefetchRead (UnsafePrefetchRead*  x);
   void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x);
   void do_ProfileCall    (ProfileCall*     x);
@@ -676,6 +677,7 @@
 void NullCheckVisitor::do_UnsafePutRaw   (UnsafePutRaw*    x) {}
 void NullCheckVisitor::do_UnsafeGetObject(UnsafeGetObject* x) {}
 void NullCheckVisitor::do_UnsafePutObject(UnsafePutObject* x) {}
+void NullCheckVisitor::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {}
 void NullCheckVisitor::do_UnsafePrefetchRead (UnsafePrefetchRead*  x) {}
 void NullCheckVisitor::do_UnsafePrefetchWrite(UnsafePrefetchWrite* x) {}
 void NullCheckVisitor::do_ProfileCall    (ProfileCall*     x) { nce()->clear_last_explicit_null_check(); }
diff --git a/hotspot/src/share/vm/c1/c1_ValueMap.hpp b/hotspot/src/share/vm/c1/c1_ValueMap.hpp
index 6f89738..95fa732 100644
--- a/hotspot/src/share/vm/c1/c1_ValueMap.hpp
+++ b/hotspot/src/share/vm/c1/c1_ValueMap.hpp
@@ -157,6 +157,7 @@
   void do_Invoke         (Invoke*          x) { kill_memory(); }
   void do_UnsafePutRaw   (UnsafePutRaw*    x) { kill_memory(); }
   void do_UnsafePutObject(UnsafePutObject* x) { kill_memory(); }
+  void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { kill_memory(); }
   void do_Intrinsic      (Intrinsic*       x) { if (!x->preserves_state()) kill_memory(); }
 
   void do_Phi            (Phi*             x) { /* nothing to do */ }
diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp
index da50cba..1d175b4 100644
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@@ -872,6 +872,20 @@
    do_name(     putOrderedInt_name,                              "putOrderedInt")                                       \
    do_alias(    putOrderedInt_signature,                        /*(Ljava/lang/Object;JI)V*/ putInt_signature)           \
                                                                                                                         \
+  do_intrinsic(_getAndAddInt,             sun_misc_Unsafe,        getAndAddInt_name, getAndAddInt_signature, F_R)       \
+   do_name(     getAndAddInt_name,                                "getAndAddInt")                                       \
+   do_signature(getAndAddInt_signature,                           "(Ljava/lang/Object;JI)I" )                           \
+  do_intrinsic(_getAndAddLong,            sun_misc_Unsafe,        getAndAddLong_name, getAndAddLong_signature, F_R)     \
+   do_name(     getAndAddLong_name,                               "getAndAddLong")                                      \
+   do_signature(getAndAddLong_signature,                          "(Ljava/lang/Object;JJ)J" )                           \
+  do_intrinsic(_getAndSetInt,             sun_misc_Unsafe,        getAndSet_name, getAndSetInt_signature, F_R)          \
+   do_name(     getAndSet_name,                                   "getAndSet")                                          \
+   do_alias(    getAndSetInt_signature,                         /*"(Ljava/lang/Object;JI)I"*/ getAndAddInt_signature)   \
+  do_intrinsic(_getAndSetLong,            sun_misc_Unsafe,        getAndSet_name, getAndSetLong_signature, F_R)         \
+   do_alias(    getAndSetLong_signature,                        /*"(Ljava/lang/Object;JJ)J"*/ getAndAddLong_signature)  \
+  do_intrinsic(_getAndSetObject,          sun_misc_Unsafe,        getAndSet_name, getAndSetObject_signature,  F_R)      \
+   do_signature(getAndSetObject_signature,                        "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
+                                                                                                                        \
   /* prefetch_signature is shared by all prefetch variants */                                                           \
   do_signature( prefetch_signature,        "(Ljava/lang/Object;J)V")                                                    \
                                                                                                                         \
diff --git a/hotspot/src/share/vm/opto/classes.hpp b/hotspot/src/share/vm/opto/classes.hpp
index 02c62f6..1316581 100644
--- a/hotspot/src/share/vm/opto/classes.hpp
+++ b/hotspot/src/share/vm/opto/classes.hpp
@@ -83,6 +83,12 @@
 macro(CompareAndSwapL)
 macro(CompareAndSwapP)
 macro(CompareAndSwapN)
+macro(GetAndAddI)
+macro(GetAndAddL)
+macro(GetAndSetI)
+macro(GetAndSetL)
+macro(GetAndSetP)
+macro(GetAndSetN)
 macro(Con)
 macro(ConN)
 macro(ConD)
diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp
index fd0a89e..0fe860c 100644
--- a/hotspot/src/share/vm/opto/compile.cpp
+++ b/hotspot/src/share/vm/opto/compile.cpp
@@ -2276,6 +2276,12 @@
   case Op_CompareAndSwapL:
   case Op_CompareAndSwapP:
   case Op_CompareAndSwapN:
+  case Op_GetAndAddI:
+  case Op_GetAndAddL:
+  case Op_GetAndSetI:
+  case Op_GetAndSetL:
+  case Op_GetAndSetP:
+  case Op_GetAndSetN:
   case Op_StoreP:
   case Op_StoreN:
   case Op_LoadB:
diff --git a/hotspot/src/share/vm/opto/connode.cpp b/hotspot/src/share/vm/opto/connode.cpp
index e038ccb..ee7ed64 100644
--- a/hotspot/src/share/vm/opto/connode.cpp
+++ b/hotspot/src/share/vm/opto/connode.cpp
@@ -480,7 +480,9 @@
         opc == Op_CheckCastPP ||
         opc == Op_StorePConditional ||
         opc == Op_CompareAndSwapP ||
-        opc == Op_CompareAndSwapN;
+        opc == Op_CompareAndSwapN ||
+        opc == Op_GetAndSetP ||
+        opc == Op_GetAndSetN;
   }
   return possible_alias;
 }
diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp
index 49206f7..85d40b0 100644
--- a/hotspot/src/share/vm/opto/escape.cpp
+++ b/hotspot/src/share/vm/opto/escape.cpp
@@ -282,6 +282,26 @@
   return has_non_escaping_obj;
 }
 
+// Utility function for nodes that load an object
+void ConnectionGraph::add_objload_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist) {
+  // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
+  // ThreadLocal has RawPtr type.
+  const Type* t = _igvn->type(n);
+  if (t->make_ptr() != NULL) {
+    Node* adr = n->in(MemNode::Address);
+#ifdef ASSERT
+    if (!adr->is_AddP()) {
+      assert(_igvn->type(adr)->isa_rawptr(), "sanity");
+    } else {
+      assert((ptnode_adr(adr->_idx) == NULL ||
+              ptnode_adr(adr->_idx)->as_Field()->is_oop()), "sanity");
+    }
+#endif
+    add_local_var_and_edge(n, PointsToNode::NoEscape,
+                           adr, delayed_worklist);
+  }
+}
+
 // Populate Connection Graph with PointsTo nodes and create simple
 // connection graph edges.
 void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist) {
@@ -387,22 +407,7 @@
     case Op_LoadP:
     case Op_LoadN:
     case Op_LoadPLocked: {
-      // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
-      // ThreadLocal has RawPrt type.
-      const Type* t = igvn->type(n);
-      if (t->make_ptr() != NULL) {
-        Node* adr = n->in(MemNode::Address);
-#ifdef ASSERT
-        if (!adr->is_AddP()) {
-          assert(igvn->type(adr)->isa_rawptr(), "sanity");
-        } else {
-          assert((ptnode_adr(adr->_idx) == NULL ||
-                  ptnode_adr(adr->_idx)->as_Field()->is_oop()), "sanity");
-        }
-#endif
-        add_local_var_and_edge(n, PointsToNode::NoEscape,
-                               adr, delayed_worklist);
-      }
+      add_objload_to_connection_graph(n, delayed_worklist);
       break;
     }
     case Op_Parm: {
@@ -417,7 +422,7 @@
     }
     case Op_Phi: {
       // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
-      // ThreadLocal has RawPrt type.
+      // ThreadLocal has RawPtr type.
       const Type* t = n->as_Phi()->type();
       if (t->make_ptr() != NULL) {
         add_local_var(n, PointsToNode::NoEscape);
@@ -446,6 +451,11 @@
       }
       break;
     }
+    case Op_GetAndSetP:
+    case Op_GetAndSetN: {
+      add_objload_to_connection_graph(n, delayed_worklist);
+      // fallthrough
+    }
     case Op_StoreP:
     case Op_StoreN:
     case Op_StorePConditional:
@@ -585,7 +595,7 @@
     case Op_LoadN:
     case Op_LoadPLocked: {
       // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
-      // ThreadLocal has RawPrt type.
+      // ThreadLocal has RawPtr type.
       const Type* t = _igvn->type(n);
       if (t->make_ptr() != NULL) {
         Node* adr = n->in(MemNode::Address);
@@ -596,7 +606,7 @@
     }
     case Op_Phi: {
       // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
-      // ThreadLocal has RawPrt type.
+      // ThreadLocal has RawPtr type.
       const Type* t = n->as_Phi()->type();
       if (t->make_ptr() != NULL) {
         for (uint i = 1; i < n->req(); i++) {
@@ -638,8 +648,16 @@
     case Op_StoreN:
     case Op_StorePConditional:
     case Op_CompareAndSwapP:
-    case Op_CompareAndSwapN: {
+    case Op_CompareAndSwapN:
+    case Op_GetAndSetP:
+    case Op_GetAndSetN: {
       Node* adr = n->in(MemNode::Address);
+      if (opcode == Op_GetAndSetP || opcode == Op_GetAndSetN) {
+        const Type* t = _igvn->type(n);
+        if (t->make_ptr() != NULL) {
+          add_local_var_and_edge(n, PointsToNode::NoEscape, adr, NULL);
+        }
+      }
       const Type *adr_type = _igvn->type(adr);
       adr_type = adr_type->make_ptr();
       if (adr_type->isa_oopptr() ||
diff --git a/hotspot/src/share/vm/opto/escape.hpp b/hotspot/src/share/vm/opto/escape.hpp
index 54e5f58..78447f0 100644
--- a/hotspot/src/share/vm/opto/escape.hpp
+++ b/hotspot/src/share/vm/opto/escape.hpp
@@ -371,6 +371,8 @@
     _nodes.at_put(n->_idx, ptn);
   }
 
+  // Utility function for nodes that load an object
+  void add_objload_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist);
   // Create PointsToNode node and add it to Connection Graph.
   void add_node_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist);
 
diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp
index 832799a..a8b87f3 100644
--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
@@ -65,6 +65,8 @@
  private:
   LibraryIntrinsic* _intrinsic;   // the library intrinsic being called
 
+  const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr = false);
+
  public:
   LibraryCallKit(JVMState* caller, LibraryIntrinsic* intrinsic)
     : GraphKit(caller),
@@ -241,7 +243,8 @@
                                     Node* src,  Node* src_offset,
                                     Node* dest, Node* dest_offset,
                                     Node* copy_length, bool dest_uninitialized);
-  bool inline_unsafe_CAS(BasicType type);
+  typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
+  bool inline_unsafe_load_store(BasicType type,  LoadStoreKind kind);
   bool inline_unsafe_ordered_store(BasicType type);
   bool inline_fp_conversions(vmIntrinsics::ID id);
   bool inline_numberOfLeadingZeros(vmIntrinsics::ID id);
@@ -290,6 +293,11 @@
     case vmIntrinsics::_compareTo:
     case vmIntrinsics::_equals:
     case vmIntrinsics::_equalsC:
+    case vmIntrinsics::_getAndAddInt:
+    case vmIntrinsics::_getAndAddLong:
+    case vmIntrinsics::_getAndSetInt:
+    case vmIntrinsics::_getAndSetLong:
+    case vmIntrinsics::_getAndSetObject:
       break;  // InlineNatives does not control String.compareTo
     case vmIntrinsics::_Reference_get:
       break;  // InlineNatives does not control Reference.get
@@ -369,6 +377,42 @@
     // across safepoint since GC can change it value.
     break;
 
+  case vmIntrinsics::_compareAndSwapObject:
+#ifdef _LP64
+    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return NULL;
+#endif
+    break;
+
+  case vmIntrinsics::_compareAndSwapLong:
+    if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return NULL;
+    break;
+
+  case vmIntrinsics::_getAndAddInt:
+    if (!Matcher::match_rule_supported(Op_GetAndAddI)) return NULL;
+    break;
+
+  case vmIntrinsics::_getAndAddLong:
+    if (!Matcher::match_rule_supported(Op_GetAndAddL)) return NULL;
+    break;
+
+  case vmIntrinsics::_getAndSetInt:
+    if (!Matcher::match_rule_supported(Op_GetAndSetI)) return NULL;
+    break;
+
+  case vmIntrinsics::_getAndSetLong:
+    if (!Matcher::match_rule_supported(Op_GetAndSetL)) return NULL;
+    break;
+
+  case vmIntrinsics::_getAndSetObject:
+#ifdef _LP64
+    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
+    if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return NULL;
+    break;
+#else
+    if (!Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
+    break;
+#endif
+
  default:
     assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
     assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -620,11 +664,11 @@
     return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static);
 
   case vmIntrinsics::_compareAndSwapObject:
-    return inline_unsafe_CAS(T_OBJECT);
+    return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg);
   case vmIntrinsics::_compareAndSwapInt:
-    return inline_unsafe_CAS(T_INT);
+    return inline_unsafe_load_store(T_INT, LS_cmpxchg);
   case vmIntrinsics::_compareAndSwapLong:
-    return inline_unsafe_CAS(T_LONG);
+    return inline_unsafe_load_store(T_LONG, LS_cmpxchg);
 
   case vmIntrinsics::_putOrderedObject:
     return inline_unsafe_ordered_store(T_OBJECT);
@@ -633,6 +677,17 @@
   case vmIntrinsics::_putOrderedLong:
     return inline_unsafe_ordered_store(T_LONG);
 
+  case vmIntrinsics::_getAndAddInt:
+    return inline_unsafe_load_store(T_INT, LS_xadd);
+  case vmIntrinsics::_getAndAddLong:
+    return inline_unsafe_load_store(T_LONG, LS_xadd);
+  case vmIntrinsics::_getAndSetInt:
+    return inline_unsafe_load_store(T_INT, LS_xchg);
+  case vmIntrinsics::_getAndSetLong:
+    return inline_unsafe_load_store(T_LONG, LS_xchg);
+  case vmIntrinsics::_getAndSetObject:
+    return inline_unsafe_load_store(T_OBJECT, LS_xchg);
+
   case vmIntrinsics::_currentThread:
     return inline_native_currentThread();
   case vmIntrinsics::_isInterrupted:
@@ -2301,6 +2356,43 @@
 // Interpret Unsafe.fieldOffset cookies correctly:
 extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
 
+const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr) {
+  // Attempt to infer a sharper value type from the offset and base type.
+  ciKlass* sharpened_klass = NULL;
+
+  // See if it is an instance field, with an object type.
+  if (alias_type->field() != NULL) {
+    assert(!is_native_ptr, "native pointer op cannot use a java address");
+    if (alias_type->field()->type()->is_klass()) {
+      sharpened_klass = alias_type->field()->type()->as_klass();
+    }
+  }
+
+  // See if it is a narrow oop array.
+  if (adr_type->isa_aryptr()) {
+    if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes()) {
+      const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr();
+      if (elem_type != NULL) {
+        sharpened_klass = elem_type->klass();
+      }
+    }
+  }
+
+  if (sharpened_klass != NULL) {
+    const TypeOopPtr* tjp = TypeOopPtr::make_from_klass(sharpened_klass);
+
+#ifndef PRODUCT
+    if (PrintIntrinsics || PrintInlining || PrintOptoInlining) {
+      tty->print("  from base type:  ");   adr_type->dump();
+      tty->print("  sharpened value: ");   tjp->dump();
+    }
+#endif
+    // Sharpen the value type.
+    return tjp;
+  }
+  return NULL;
+}
+
 bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile) {
   if (callee()->is_static())  return false;  // caller must have the capability!
 
@@ -2430,39 +2522,9 @@
                            offset != top() && heap_base_oop != top();
 
   if (!is_store && type == T_OBJECT) {
-    // Attempt to infer a sharper value type from the offset and base type.
-    ciKlass* sharpened_klass = NULL;
-
-    // See if it is an instance field, with an object type.
-    if (alias_type->field() != NULL) {
-      assert(!is_native_ptr, "native pointer op cannot use a java address");
-      if (alias_type->field()->type()->is_klass()) {
-        sharpened_klass = alias_type->field()->type()->as_klass();
-      }
-    }
-
-    // See if it is a narrow oop array.
-    if (adr_type->isa_aryptr()) {
-      if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes()) {
-        const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr();
-        if (elem_type != NULL) {
-          sharpened_klass = elem_type->klass();
-        }
-      }
-    }
-
-    if (sharpened_klass != NULL) {
-      const TypeOopPtr* tjp = TypeOopPtr::make_from_klass(sharpened_klass);
-
-      // Sharpen the value type.
+    const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type, is_native_ptr);
+    if (tjp != NULL) {
       value_type = tjp;
-
-#ifndef PRODUCT
-      if (PrintIntrinsics || PrintInlining || PrintOptoInlining) {
-        tty->print("  from base type:  ");   adr_type->dump();
-        tty->print("  sharpened value: "); value_type->dump();
-      }
-#endif
     }
   }
 
@@ -2673,9 +2735,9 @@
   return true;
 }
 
-//----------------------------inline_unsafe_CAS----------------------------
+//----------------------------inline_unsafe_load_store----------------------------
 
-bool LibraryCallKit::inline_unsafe_CAS(BasicType type) {
+bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind) {
   // This basic scheme here is the same as inline_unsafe_access, but
   // differs in enough details that combining them would make the code
   // overly confusing.  (This is a true fact! I originally combined
@@ -2686,37 +2748,47 @@
   if (callee()->is_static())  return false;  // caller must have the capability!
 
 #ifndef PRODUCT
+  BasicType rtype;
   {
     ResourceMark rm;
-    // Check the signatures.
     ciSignature* sig = signature();
+    rtype = sig->return_type()->basic_type();
+    if (kind == LS_xadd || kind == LS_xchg) {
+      // Check the signatures.
 #ifdef ASSERT
-    BasicType rtype = sig->return_type()->basic_type();
-    assert(rtype == T_BOOLEAN, "CAS must return boolean");
-    assert(sig->count() == 4, "CAS has 4 arguments");
-    assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object");
-    assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long");
+      assert(rtype == type, "get and set must return the expected type");
+      assert(sig->count() == 3, "get and set has 3 arguments");
+      assert(sig->type_at(0)->basic_type() == T_OBJECT, "get and set base is object");
+      assert(sig->type_at(1)->basic_type() == T_LONG, "get and set offset is long");
+      assert(sig->type_at(2)->basic_type() == type, "get and set must take expected type as new value/delta");
 #endif // ASSERT
+    } else if (kind == LS_cmpxchg) {
+      // Check the signatures.
+#ifdef ASSERT
+      assert(rtype == T_BOOLEAN, "CAS must return boolean");
+      assert(sig->count() == 4, "CAS has 4 arguments");
+      assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object");
+      assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long");
+#endif // ASSERT
+    } else {
+      ShouldNotReachHere();
+    }
   }
 #endif //PRODUCT
 
   // number of stack slots per value argument (1 or 2)
   int type_words = type2size[type];
 
-  // Cannot inline wide CAS on machines that don't support it natively
-  if (type2aelembytes(type) > BytesPerInt && !VM_Version::supports_cx8())
-    return false;
-
   C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
 
-  // Argument words:  "this" plus oop plus offset plus oldvalue plus newvalue;
-  int nargs = 1 + 1 + 2  + type_words + type_words;
+  // Argument words:  "this" plus oop plus offset (plus oldvalue) plus newvalue/delta;
+  int nargs = 1 + 1 + 2  + ((kind == LS_cmpxchg) ? type_words : 0) + type_words;
 
-  // pop arguments: newval, oldval, offset, base, and receiver
+  // pop arguments: newval, offset, base, and receiver
   debug_only(int saved_sp = _sp);
   _sp += nargs;
   Node* newval   = (type_words == 1) ? pop() : pop_pair();
-  Node* oldval   = (type_words == 1) ? pop() : pop_pair();
+  Node* oldval   = (kind == LS_cmpxchg) ? ((type_words == 1) ? pop() : pop_pair()) : NULL;
   Node *offset   = pop_pair();
   Node *base     = pop();
   Node *receiver = pop();
@@ -2740,16 +2812,24 @@
   Node* adr = make_unsafe_address(base, offset);
   const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
 
-  // (Unlike inline_unsafe_access, there seems no point in trying
-  // to refine types. Just use the coarse types here.
+  // For CAS, unlike inline_unsafe_access, there seems no point in
+  // trying to refine types. Just use the coarse types here.
   const Type *value_type = Type::get_const_basic_type(type);
   Compile::AliasType* alias_type = C->alias_type(adr_type);
   assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
+
+  if (kind == LS_xchg && type == T_OBJECT) {
+    const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type);
+    if (tjp != NULL) {
+      value_type = tjp;
+    }
+  }
+
   int alias_idx = C->get_alias_index(adr_type);
 
-  // Memory-model-wise, a CAS acts like a little synchronized block,
-  // so needs barriers on each side.  These don't translate into
-  // actual barriers on most machines, but we still need rest of
+  // Memory-model-wise, a LoadStore acts like a little synchronized
+  // block, so needs barriers on each side.  These don't translate
+  // into actual barriers on most machines, but we still need rest of
   // compiler to respect ordering.
 
   insert_mem_bar(Op_MemBarRelease);
@@ -2762,13 +2842,29 @@
 
   // For now, we handle only those cases that actually exist: ints,
   // longs, and Object. Adding others should be straightforward.
-  Node* cas;
+  Node* load_store;
   switch(type) {
   case T_INT:
-    cas = _gvn.transform(new (C, 5) CompareAndSwapINode(control(), mem, adr, newval, oldval));
+    if (kind == LS_xadd) {
+      load_store = _gvn.transform(new (C, 4) GetAndAddINode(control(), mem, adr, newval, adr_type));
+    } else if (kind == LS_xchg) {
+      load_store = _gvn.transform(new (C, 4) GetAndSetINode(control(), mem, adr, newval, adr_type));
+    } else if (kind == LS_cmpxchg) {
+      load_store = _gvn.transform(new (C, 5) CompareAndSwapINode(control(), mem, adr, newval, oldval));
+    } else {
+      ShouldNotReachHere();
+    }
     break;
   case T_LONG:
-    cas = _gvn.transform(new (C, 5) CompareAndSwapLNode(control(), mem, adr, newval, oldval));
+    if (kind == LS_xadd) {
+      load_store = _gvn.transform(new (C, 4) GetAndAddLNode(control(), mem, adr, newval, adr_type));
+    } else if (kind == LS_xchg) {
+      load_store = _gvn.transform(new (C, 4) GetAndSetLNode(control(), mem, adr, newval, adr_type));
+    } else if (kind == LS_cmpxchg) {
+      load_store = _gvn.transform(new (C, 5) CompareAndSwapLNode(control(), mem, adr, newval, oldval));
+    } else {
+      ShouldNotReachHere();
+    }
     break;
   case T_OBJECT:
     // Transformation of a value which could be NULL pointer (CastPP #NULL)
@@ -2778,7 +2874,6 @@
       newval = _gvn.makecon(TypePtr::NULL_PTR);
 
     // Reference stores need a store barrier.
-    // (They don't if CAS fails, but it isn't worth checking.)
     pre_barrier(true /* do_load*/,
                 control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
                 NULL /* pre_val*/,
@@ -2786,32 +2881,50 @@
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
       Node *newval_enc = _gvn.transform(new (C, 2) EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
-      Node *oldval_enc = _gvn.transform(new (C, 2) EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
-      cas = _gvn.transform(new (C, 5) CompareAndSwapNNode(control(), mem, adr,
-                                                          newval_enc, oldval_enc));
+      if (kind == LS_xchg) {
+        load_store = _gvn.transform(new (C, 4) GetAndSetNNode(control(), mem, adr,
+                                                              newval_enc, adr_type, value_type->make_narrowoop()));
+      } else {
+        assert(kind == LS_cmpxchg, "wrong LoadStore operation");
+        Node *oldval_enc = _gvn.transform(new (C, 2) EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
+        load_store = _gvn.transform(new (C, 5) CompareAndSwapNNode(control(), mem, adr,
+                                                                   newval_enc, oldval_enc));
+      }
     } else
 #endif
     {
-      cas = _gvn.transform(new (C, 5) CompareAndSwapPNode(control(), mem, adr, newval, oldval));
+      if (kind == LS_xchg) {
+        load_store = _gvn.transform(new (C, 4) GetAndSetPNode(control(), mem, adr, newval, adr_type, value_type->is_oopptr()));
+      } else {
+        assert(kind == LS_cmpxchg, "wrong LoadStore operation");
+        load_store = _gvn.transform(new (C, 5) CompareAndSwapPNode(control(), mem, adr, newval, oldval));
+      }
     }
-    post_barrier(control(), cas, base, adr, alias_idx, newval, T_OBJECT, true);
+    post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
     break;
   default:
     ShouldNotReachHere();
     break;
   }
 
-  // SCMemProjNodes represent the memory state of CAS. Their main
-  // role is to prevent CAS nodes from being optimized away when their
-  // results aren't used.
-  Node* proj = _gvn.transform( new (C, 1) SCMemProjNode(cas));
+  // SCMemProjNodes represent the memory state of a LoadStore. Their
+  // main role is to prevent LoadStore nodes from being optimized away
+  // when their results aren't used.
+  Node* proj = _gvn.transform( new (C, 1) SCMemProjNode(load_store));
   set_memory(proj, alias_idx);
 
   // Add the trailing membar surrounding the access
   insert_mem_bar(Op_MemBarCPUOrder);
   insert_mem_bar(Op_MemBarAcquire);
 
-  push(cas);
+#ifdef _LP64
+  if (type == T_OBJECT && adr->bottom_type()->is_ptr_to_narrowoop() && kind == LS_xchg) {
+    load_store = _gvn.transform(new (C, 2) DecodeNNode(load_store, load_store->bottom_type()->make_ptr()));
+  }
+#endif
+
+  assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
+  push_node(load_store->bottom_type()->basic_type(), load_store);
   return true;
 }
 
diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp
index bf4a345..015c0f0 100644
--- a/hotspot/src/share/vm/opto/matcher.cpp
+++ b/hotspot/src/share/vm/opto/matcher.cpp
@@ -2134,10 +2134,10 @@
       case Op_CompareAndSwapP:
       case Op_CompareAndSwapN: {   // Convert trinary to binary-tree
         Node *newval = n->in(MemNode::ValueIn );
-        Node *oldval  = n->in(LoadStoreNode::ExpectedIn);
+        Node *oldval  = n->in(LoadStoreConditionalNode::ExpectedIn);
         Node *pair = new (C, 3) BinaryNode( oldval, newval );
         n->set_req(MemNode::ValueIn,pair);
-        n->del_req(LoadStoreNode::ExpectedIn);
+        n->del_req(LoadStoreConditionalNode::ExpectedIn);
         break;
       }
       case Op_CMoveD:              // Convert trinary to binary-tree
diff --git a/hotspot/src/share/vm/opto/memnode.cpp b/hotspot/src/share/vm/opto/memnode.cpp
index 1cbe317..cc2d424 100644
--- a/hotspot/src/share/vm/opto/memnode.cpp
+++ b/hotspot/src/share/vm/opto/memnode.cpp
@@ -2552,14 +2552,38 @@
 }
 
 //=============================================================================
-LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : Node(5) {
+//----------------------------------LoadStoreNode------------------------------
+LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required )
+  : Node(required),
+    _type(rt),
+    _adr_type(at)
+{
   init_req(MemNode::Control, c  );
   init_req(MemNode::Memory , mem);
   init_req(MemNode::Address, adr);
   init_req(MemNode::ValueIn, val);
-  init_req(         ExpectedIn, ex );
   init_class_id(Class_LoadStore);
+}
 
+uint LoadStoreNode::ideal_reg() const {
+  return _type->ideal_reg();
+}
+
+bool LoadStoreNode::result_not_used() const {
+  for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) {
+    Node *x = fast_out(i);
+    if (x->Opcode() == Op_SCMemProj) continue;
+    return false;
+  }
+  return true;
+}
+
+uint LoadStoreNode::size_of() const { return sizeof(*this); }
+
+//=============================================================================
+//----------------------------------LoadStoreConditionalNode--------------------
+LoadStoreConditionalNode::LoadStoreConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : LoadStoreNode(c, mem, adr, val, NULL, TypeInt::BOOL, 5) {
+  init_req(ExpectedIn, ex );
 }
 
 //=============================================================================
diff --git a/hotspot/src/share/vm/opto/memnode.hpp b/hotspot/src/share/vm/opto/memnode.hpp
index f79f615..6f722f9 100644
--- a/hotspot/src/share/vm/opto/memnode.hpp
+++ b/hotspot/src/share/vm/opto/memnode.hpp
@@ -657,23 +657,36 @@
 //------------------------------LoadStoreNode---------------------------
 // Note: is_Mem() method returns 'true' for this class.
 class LoadStoreNode : public Node {
+private:
+  const Type* const _type;      // What kind of value is loaded?
+  const TypePtr* _adr_type;     // What kind of memory is being addressed?
+  virtual uint size_of() const; // Size is bigger
+public:
+  LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required );
+  virtual bool depends_only_on_test() const { return false; }
+  virtual uint match_edge(uint idx) const { return idx == MemNode::Address || idx == MemNode::ValueIn; }
+
+  virtual const Type *bottom_type() const { return _type; }
+  virtual uint ideal_reg() const;
+  virtual const class TypePtr *adr_type() const { return _adr_type; }  // returns bottom_type of address
+
+  bool result_not_used() const;
+};
+
+class LoadStoreConditionalNode : public LoadStoreNode {
 public:
   enum {
     ExpectedIn = MemNode::ValueIn+1 // One more input than MemNode
   };
-  LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex);
-  virtual bool depends_only_on_test() const { return false; }
-  virtual const Type *bottom_type() const { return TypeInt::BOOL; }
-  virtual uint ideal_reg() const { return Op_RegI; }
-  virtual uint match_edge(uint idx) const { return idx == MemNode::Address || idx == MemNode::ValueIn; }
+  LoadStoreConditionalNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex);
 };
 
 //------------------------------StorePConditionalNode---------------------------
 // Conditionally store pointer to memory, if no change since prior
 // load-locked.  Sets flags for success or failure of the store.
-class StorePConditionalNode : public LoadStoreNode {
+class StorePConditionalNode : public LoadStoreConditionalNode {
 public:
-  StorePConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { }
+  StorePConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreConditionalNode(c, mem, adr, val, ll) { }
   virtual int Opcode() const;
   // Produces flags
   virtual uint ideal_reg() const { return Op_RegFlags; }
@@ -682,9 +695,9 @@
 //------------------------------StoreIConditionalNode---------------------------
 // Conditionally store int to memory, if no change since prior
 // load-locked.  Sets flags for success or failure of the store.
-class StoreIConditionalNode : public LoadStoreNode {
+class StoreIConditionalNode : public LoadStoreConditionalNode {
 public:
-  StoreIConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ii ) : LoadStoreNode(c, mem, adr, val, ii) { }
+  StoreIConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ii ) : LoadStoreConditionalNode(c, mem, adr, val, ii) { }
   virtual int Opcode() const;
   // Produces flags
   virtual uint ideal_reg() const { return Op_RegFlags; }
@@ -693,9 +706,9 @@
 //------------------------------StoreLConditionalNode---------------------------
 // Conditionally store long to memory, if no change since prior
 // load-locked.  Sets flags for success or failure of the store.
-class StoreLConditionalNode : public LoadStoreNode {
+class StoreLConditionalNode : public LoadStoreConditionalNode {
 public:
-  StoreLConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { }
+  StoreLConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreConditionalNode(c, mem, adr, val, ll) { }
   virtual int Opcode() const;
   // Produces flags
   virtual uint ideal_reg() const { return Op_RegFlags; }
@@ -703,32 +716,75 @@
 
 
 //------------------------------CompareAndSwapLNode---------------------------
-class CompareAndSwapLNode : public LoadStoreNode {
+class CompareAndSwapLNode : public LoadStoreConditionalNode {
 public:
-  CompareAndSwapLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+  CompareAndSwapLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { }
   virtual int Opcode() const;
 };
 
 
 //------------------------------CompareAndSwapINode---------------------------
-class CompareAndSwapINode : public LoadStoreNode {
+class CompareAndSwapINode : public LoadStoreConditionalNode {
 public:
-  CompareAndSwapINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+  CompareAndSwapINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { }
   virtual int Opcode() const;
 };
 
 
 //------------------------------CompareAndSwapPNode---------------------------
-class CompareAndSwapPNode : public LoadStoreNode {
+class CompareAndSwapPNode : public LoadStoreConditionalNode {
 public:
-  CompareAndSwapPNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+  CompareAndSwapPNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { }
   virtual int Opcode() const;
 };
 
 //------------------------------CompareAndSwapNNode---------------------------
-class CompareAndSwapNNode : public LoadStoreNode {
+class CompareAndSwapNNode : public LoadStoreConditionalNode {
 public:
-  CompareAndSwapNNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+  CompareAndSwapNNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { }
+  virtual int Opcode() const;
+};
+
+//------------------------------GetAndAddINode---------------------------
+class GetAndAddINode : public LoadStoreNode {
+public:
+  GetAndAddINode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at ) : LoadStoreNode(c, mem, adr, val, at, TypeInt::INT, 4) { }
+  virtual int Opcode() const;
+};
+
+//------------------------------GetAndAddLNode---------------------------
+class GetAndAddLNode : public LoadStoreNode {
+public:
+  GetAndAddLNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at ) : LoadStoreNode(c, mem, adr, val, at, TypeLong::LONG, 4) { }
+  virtual int Opcode() const;
+};
+
+
+//------------------------------GetAndSetINode---------------------------
+class GetAndSetINode : public LoadStoreNode {
+public:
+  GetAndSetINode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at ) : LoadStoreNode(c, mem, adr, val, at, TypeInt::INT, 4) { }
+  virtual int Opcode() const;
+};
+
+//------------------------------GetAndSetINode---------------------------
+class GetAndSetLNode : public LoadStoreNode {
+public:
+  GetAndSetLNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at ) : LoadStoreNode(c, mem, adr, val, at, TypeLong::LONG, 4) { }
+  virtual int Opcode() const;
+};
+
+//------------------------------GetAndSetPNode---------------------------
+class GetAndSetPNode : public LoadStoreNode {
+public:
+  GetAndSetPNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t ) : LoadStoreNode(c, mem, adr, val, at, t, 4) { }
+  virtual int Opcode() const;
+};
+
+//------------------------------GetAndSetNNode---------------------------
+class GetAndSetNNode : public LoadStoreNode {
+public:
+  GetAndSetNNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t ) : LoadStoreNode(c, mem, adr, val, at, t, 4) { }
   virtual int Opcode() const;
 };
 
diff --git a/hotspot/src/share/vm/runtime/vm_version.cpp b/hotspot/src/share/vm/runtime/vm_version.cpp
index 2d51b67..7bcd1d7 100644
--- a/hotspot/src/share/vm/runtime/vm_version.cpp
+++ b/hotspot/src/share/vm/runtime/vm_version.cpp
@@ -45,6 +45,10 @@
 const char* Abstract_VM_Version::_s_vm_release = Abstract_VM_Version::vm_release();
 const char* Abstract_VM_Version::_s_internal_vm_info_string = Abstract_VM_Version::internal_vm_info_string();
 bool Abstract_VM_Version::_supports_cx8 = false;
+bool Abstract_VM_Version::_supports_atomic_getset4 = false;
+bool Abstract_VM_Version::_supports_atomic_getset8 = false;
+bool Abstract_VM_Version::_supports_atomic_getadd4 = false;
+bool Abstract_VM_Version::_supports_atomic_getadd8 = false;
 unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U;
 int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0;
 
diff --git a/hotspot/src/share/vm/runtime/vm_version.hpp b/hotspot/src/share/vm/runtime/vm_version.hpp
index 0d35522..6506103 100644
--- a/hotspot/src/share/vm/runtime/vm_version.hpp
+++ b/hotspot/src/share/vm/runtime/vm_version.hpp
@@ -37,6 +37,10 @@
   static const char*  _s_internal_vm_info_string;
   // These are set by machine-dependent initializations
   static bool         _supports_cx8;
+  static bool         _supports_atomic_getset4;
+  static bool         _supports_atomic_getset8;
+  static bool         _supports_atomic_getadd4;
+  static bool         _supports_atomic_getadd8;
   static unsigned int _logical_processors_per_package;
   static int          _vm_major_version;
   static int          _vm_minor_version;
@@ -75,6 +79,13 @@
 
   // does HW support an 8-byte compare-exchange operation?
   static bool supports_cx8()  {return _supports_cx8;}
+  // does HW support atomic get-and-set or atomic get-and-add?  Used
+  // to guide intrinsification decisions for Unsafe atomic ops
+  static bool supports_atomic_getset4()  {return _supports_atomic_getset4;}
+  static bool supports_atomic_getset8()  {return _supports_atomic_getset8;}
+  static bool supports_atomic_getadd4()  {return _supports_atomic_getadd4;}
+  static bool supports_atomic_getadd8()  {return _supports_atomic_getadd8;}
+
   static unsigned int logical_processors_per_package() {
     return _logical_processors_per_package;
   }