diff --git a/include/dwarf.h b/include/dwarf.h
index 6f2fab8..5360ee3 100644
--- a/include/dwarf.h
+++ b/include/dwarf.h
@@ -231,6 +231,10 @@
   {
     struct dwarf_reg_state *next;	/* for rs_stack */
     dwarf_save_loc_t reg[DWARF_NUM_PRESERVED_REGS + 2];
+    unw_word_t ip;		          /* ip this rs is for */
+    unsigned short lru_chain;	  /* used for least-recently-used chain */
+    unsigned short coll_chain;	/* used for hash collisions */
+    unsigned short hint;	      /* hint for next rs to try (or -1) */
   }
 dwarf_reg_state_t;
 
@@ -280,9 +284,39 @@
     unsigned int pi_valid :1;	/* is proc_info valid? */
     unsigned int pi_is_dynamic :1; /* proc_info found via dynamic proc info? */
     unw_proc_info_t pi;		/* info about current procedure */
+
+    short hint; /* faster lookup of the rs cache */
+    short prev_rs;
   }
 dwarf_cursor_t;
 
+#define DWARF_LOG_UNW_CACHE_SIZE	7
+#define DWARF_UNW_CACHE_SIZE	(1 << DWARF_LOG_UNW_CACHE_SIZE)
+
+#define DWARF_LOG_UNW_HASH_SIZE	(DWARF_LOG_UNW_CACHE_SIZE + 1)
+#define DWARF_UNW_HASH_SIZE	(1 << DWARF_LOG_UNW_HASH_SIZE)
+
+typedef unsigned char unw_hash_index_t;
+
+struct dwarf_rs_cache
+  {
+#ifdef HAVE_ATOMIC_OPS_H
+    AO_TS_t busy;		/* is the rs-cache busy? */
+#else
+    pthread_mutex_t lock;
+#endif
+    unsigned short lru_head;	/* index of lead-recently used rs */
+    unsigned short lru_tail;	/* index of most-recently used rs */
+
+    /* hash table that maps instruction pointer to rs index: */
+    unsigned short hash[DWARF_UNW_HASH_SIZE];
+
+    uint32_t generation;	/* generation number */
+
+    /* rs cache: */
+    dwarf_reg_state_t buckets[DWARF_UNW_CACHE_SIZE];
+  };
+
 /* Convenience macros: */
 #define dwarf_init			UNW_ARCH_OBJ (dwarf_init)
 #define dwarf_find_proc_info		UNW_OBJ (dwarf_find_proc_info)
diff --git a/include/tdep-x86_64/libunwind_i.h b/include/tdep-x86_64/libunwind_i.h
index 0f841da..59ac2c0 100644
--- a/include/tdep-x86_64/libunwind_i.h
+++ b/include/tdep-x86_64/libunwind_i.h
@@ -48,6 +48,7 @@
 #endif
     unw_word_t dyn_generation;		/* see dyn-common.h */
     unw_word_t dyn_info_list_addr;	/* (cached) dyn_info_list_addr */
+    struct dwarf_rs_cache global_cache;
    };
 
 struct cursor
diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
index b6f00b8..e2b3875 100644
--- a/src/dwarf/Gparser.c
+++ b/src/dwarf/Gparser.c
@@ -23,6 +23,7 @@
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 
+#include <stddef.h>
 #include "dwarf_i.h"
 #include "libunwind_i.h"
 
@@ -452,6 +453,138 @@
   return 0;
 }
 
+static inline void
+flush_rs_cache (struct dwarf_rs_cache *cache)
+{
+  int i;
+
+  cache->lru_head = DWARF_UNW_CACHE_SIZE - 1;
+  cache->lru_tail = 0;
+
+  for (i = 0; i < DWARF_UNW_CACHE_SIZE; ++i)
+    {
+      if (i > 0)
+	cache->buckets[i].lru_chain = (i - 1);
+      cache->buckets[i].coll_chain = -1;
+      cache->buckets[i].ip = 0;
+    }
+  for (i = 0; i<DWARF_UNW_HASH_SIZE; ++i)
+    cache->hash[i] = -1;
+}
+
+static struct dwarf_rs_cache *
+get_rs_cache (unw_addr_space_t as)
+{
+  struct dwarf_rs_cache *cache = &as->global_cache;
+  if (atomic_read (&as->cache_generation) != atomic_read (&cache->generation))
+    {
+      flush_rs_cache (cache);
+      cache->generation = as->cache_generation;
+    }
+
+  return cache;
+}
+
+static inline unw_hash_index_t
+hash (unw_word_t ip)
+{
+  /* based on (sqrt(5)/2-1)*2^64 */
+# define magic	((unw_word_t) 0x9e3779b97f4a7c16ULL)
+
+  return (ip >> 4) * magic >> (64 - DWARF_LOG_UNW_HASH_SIZE);
+}
+
+static inline long
+cache_match (dwarf_reg_state_t *rs, unw_word_t ip)
+{
+  if (ip == rs->ip)
+    return 1;
+  return 0;
+}
+
+static dwarf_reg_state_t *
+rs_lookup (struct dwarf_rs_cache *cache, struct dwarf_cursor *c)
+{
+  dwarf_reg_state_t *rs = cache->buckets + c->hint;
+  unsigned short index;
+  unw_word_t ip;
+
+  ip = c->ip;
+
+  if (cache_match (rs, ip))
+    return rs;
+
+  index = cache->hash[hash (ip)];
+  if (index >= DWARF_UNW_CACHE_SIZE)
+    return 0;
+
+  rs = cache->buckets + index;
+  while (1)
+    {
+      if (cache_match (rs, ip))
+        {
+          /* update hint; no locking needed: single-word writes are atomic */
+          c->hint = cache->buckets[c->prev_rs].hint =
+            (rs - cache->buckets);
+          return rs;
+        }
+      if (rs->coll_chain >= DWARF_UNW_HASH_SIZE)
+        return 0;
+      rs = cache->buckets + rs->coll_chain;
+    }
+}
+
+static inline dwarf_reg_state_t *
+rs_new (struct dwarf_rs_cache *cache, unw_word_t ip)
+{
+  dwarf_reg_state_t *rs, *prev, *tmp;
+  unw_hash_index_t index;
+  unsigned short head;
+
+  head = cache->lru_head;
+  rs = cache->buckets + head;
+  cache->lru_head = rs->lru_chain;
+
+  /* re-insert rs at the tail of the LRU chain: */
+  cache->buckets[cache->lru_tail].lru_chain = head;
+  cache->lru_tail = head;
+
+  /* remove the old rs from the hash table (if it's there): */
+  if (rs->ip)
+    {
+      index = hash (rs->ip);
+      tmp = cache->buckets + cache->hash[index];
+      prev = 0;
+      while (1)
+	{
+	  if (tmp == rs)
+	    {
+	      if (prev)
+		prev->coll_chain = tmp->coll_chain;
+	      else
+		cache->hash[index] = tmp->coll_chain;
+	      break;
+	    }
+	  else
+	    prev = tmp;
+	  if (tmp->coll_chain >= DWARF_UNW_CACHE_SIZE)
+	    /* old rs wasn't in the hash-table */
+	    break;
+	  tmp = cache->buckets + tmp->coll_chain;
+	}
+    }
+
+  /* enter new rs in the hash table */
+  index = hash (ip);
+  rs->coll_chain = cache->hash[index];
+  cache->hash[index] = rs - cache->buckets;
+
+  rs->hint = 0;
+  rs->ip = ip;
+
+  return rs;
+}
+
 static int
 create_state_record_for (struct dwarf_cursor *c, dwarf_state_record_t *sr,
 			 unw_word_t ip)
@@ -509,13 +642,17 @@
 static int
 apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
 {
-  unw_word_t regnum, addr, cfa;
+  unw_word_t regnum, addr, cfa, ip;
+  unw_word_t prev_ip, prev_cfa;
   unw_addr_space_t as;
   dwarf_loc_t cfa_loc;
   unw_accessors_t *a;
   int i, ret;
   void *arg;
 
+  prev_ip = c->ip;
+  prev_cfa = c->cfa;
+
   as = c->as;
   arg = c->as_arg;
   a = unw_get_accessors (as);
@@ -583,11 +720,23 @@
 	}
     }
   c->cfa = cfa;
+  ret = dwarf_get (c, c->loc[c->ret_addr_column], &ip);
+  if (ret < 0)
+    return ret;
+  c->ip = ip;
+  /* XXX: check for ip to be code_aligned */
+
+  if (c->ip == prev_ip && c->cfa == prev_cfa)
+    {
+      dprintf ("%s: ip and cfa unchanged; stopping here (ip=0x%lx)\n",
+	       __FUNCTION__, (long) c->ip);
+      return -UNW_EBADFRAME;
+    }
   return 0;
 }
 
-HIDDEN int
-dwarf_find_save_locs (struct dwarf_cursor *c)
+static int
+uncached_dwarf_find_save_locs (struct dwarf_cursor *c)
 {
   dwarf_state_record_t sr;
   int ret;
@@ -605,6 +754,54 @@
   return 0;
 }
 
+HIDDEN int
+dwarf_find_save_locs (struct dwarf_cursor *c)
+{
+  dwarf_state_record_t sr;
+  dwarf_reg_state_t *rs, *rs1;
+  struct dwarf_rs_cache *cache;
+  int ret;
+
+  if (c->as->caching_policy == UNW_CACHE_NONE)
+    return uncached_dwarf_find_save_locs (c);
+
+  cache = get_rs_cache(c->as);
+  rs = rs_lookup(cache, c);
+
+  if (rs)
+    goto apply;
+
+  if ((ret = fetch_proc_info (c, c->ip, 1)) < 0)
+    return ret;
+
+  if ((ret = create_state_record_for (c, &sr, c->ip)) < 0)
+    return ret;
+
+  rs1 = &sr.rs_current;
+  if (rs1)
+    {
+      rs = rs_new (cache, c->ip);
+      memcpy(rs, rs1, offsetof(struct dwarf_reg_state, ip));
+      if (!rs)
+        {
+          dprintf ("%s: failed to create unwind rs\n", __FUNCTION__);
+          ret = -UNW_EUNSPEC;
+          return ret;
+        }
+    }
+  cache->buckets[c->prev_rs].hint = rs - cache->buckets;
+
+  c->hint = rs->hint;
+  c->prev_rs = rs - cache->buckets;
+
+apply:
+  if ((ret = apply_reg_state (c, rs)) < 0)
+    return ret;
+
+  put_unwind_info (c, &c->pi);
+  return 0;
+}
+
 /* The proc-info must be valid for IP before this routine can be
    called.  */
 HIDDEN int
diff --git a/src/dwarf/Gstep.c b/src/dwarf/Gstep.c
index 739bca2..a9b789c 100644
--- a/src/dwarf/Gstep.c
+++ b/src/dwarf/Gstep.c
@@ -26,107 +26,16 @@
 #include "dwarf.h"
 #include "libunwind_i.h"
 
-static int
-update_frame_state (struct dwarf_cursor *c, unw_word_t prev_cfa)
-{
-  unw_word_t prev_ip, ip;
-  int ret;
-
-  prev_ip = c->ip;
-
-  /* Update the IP cache (do this first: if we reach the end of the
-     frame-chain, the rest of the info may not be valid/useful
-     anymore. */
-  ret = dwarf_get (c, c->loc[c->ret_addr_column], &ip);
-  if (ret < 0)
-    return ret;
-  c->ip = ip;
-
-#if 0
-  /* ??? fix me---perhaps move to where we have convenient access to
-     code_align? */
-  if ((ip & (c->code_align - 1)) != 0)
-    {
-      /* don't let obviously bad addresses pollute the cache */
-      Debug (1, "rejecting bad ip=0x%lx\n", (long) c->ip);
-      return -UNW_EINVALIDIP;
-    }
-#endif
-  if (ip == 0)
-    /* end of frame-chain reached */
-    return 0;
-
-#if 0
-  num_regs = 0;
-  if (unlikely (c->abi_marker))
-    {
-      c->last_abi_marker = c->abi_marker;
-      switch (c->abi_marker)
-	{
-	case ABI_MARKER_LINUX_SIGTRAMP:
-	case ABI_MARKER_OLD_LINUX_SIGTRAMP:
-	  c->as->abi = ABI_LINUX;
-	  if ((ret = linux_sigtramp (c, &num_regs)) < 0)
-	    return ret;
-	  break;
-
-	case ABI_MARKER_OLD_LINUX_INTERRUPT:
-	case ABI_MARKER_LINUX_INTERRUPT:
-	  c->as->abi = ABI_LINUX;
-	  if ((ret = linux_interrupt (c, &num_regs, c->abi_marker)) < 0)
-	    return ret;
-	  break;
-
-	case ABI_MARKER_HP_UX_SIGTRAMP:
-	  c->as->abi = ABI_HPUX;
-	  if ((ret = hpux_sigtramp (c, &num_regs)) < 0)
-	    return ret;
-	  break;
-
-	default:
-	  Debug (1, "unknown ABI marker: ABI=%u, context=%u\n",
-		 c->abi_marker >> 8, c->abi_marker & 0xff);
-	  return -UNW_EINVAL;
-	}
-      Debug (10, "sigcontext_addr=%lx (ret=%d)\n",
-	     (unsigned long) c->sigcontext_addr, ret);
-
-      c->sigcontext_off = c->sigcontext_addr - c->cfa;
-
-      /* update the IP cache: */
-      if ((ret = ia64_get (c, c->loc[IA64_REG_IP], &ip)) < 0)
- 	return ret;
-      c->ip = ip;
-      if (ip == 0)
-	/* end of frame-chain reached */
-	return 0;
-    }
-  else
-    num_regs = (c->cfm >> 7) & 0x7f;	/* size of locals */
-
-  c->abi_marker = 0;
-#endif
-
-  if (c->ip == prev_ip && c->cfa == prev_cfa)
-    {
-      dprintf ("%s: ip and cfa unchanged; stopping (ip=0x%lx cfa=0x%lx)\n",
-	       __FUNCTION__, (long) prev_ip, (long) prev_cfa);
-      return -UNW_EBADFRAME;
-    }
-
-  c->pi_valid = 0;
-  return 0;
-}
-
 HIDDEN int
 dwarf_step (struct dwarf_cursor *c)
 {
   unw_word_t prev_cfa = c->cfa;
   int ret;
 
-  if ((ret = dwarf_find_save_locs (c)) >= 0
-      && (ret = update_frame_state (c, prev_cfa)) >= 0)
+  if ((ret = dwarf_find_save_locs (c)) >= 0) {
+    c->pi_valid = 0;
     ret = (c->ip == 0) ? 0 : 1;
+  }
 
   Debug (15, "returning %d\n", ret);
   return ret;
diff --git a/src/x86_64/init.h b/src/x86_64/init.h
index 7787b34..ae108b2 100644
--- a/src/x86_64/init.h
+++ b/src/x86_64/init.h
@@ -63,8 +63,11 @@
   c->sigcontext_addr = 0;
 
   c->dwarf.args_size = 0;
-  c->dwarf.ret_addr_column = 0;
+  c->dwarf.ret_addr_column = RIP;
   c->dwarf.pi_valid = 0;
   c->dwarf.pi_is_dynamic = 0;
+  c->dwarf.hint = 0;
+  c->dwarf.prev_rs = 0;
+
   return 0;
 }
