r600g: reduce memory usage from range/block hash table.

This table covered a large range unnecessarily, reduce the address
range covered, use the fact that the bottom two bits aren't significant,
and remove unused fields from the range struct. It also drops the hash_size/shift in context in favour of a define, which should make doing the math
a bit less CPU intensive.

valgrind glxinfo
Before:
==320==     in use at exit: 419,754 bytes in 706 blocks
==320==   total heap usage: 3,691 allocs, 2,985 frees, 7,272,467 bytes allocated

After:
==967==     in use at exit: 419,754 bytes in 706 blocks
==967==   total heap usage: 3,552 allocs, 2,846 frees, 3,550,131 bytes allocated

Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 2ca122a..93c41c0 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -185,8 +185,17 @@
 /*
  * helpers
  */
-#define CTX_RANGE_ID(ctx, offset) (((offset) >> (ctx)->hash_shift) & 255)
-#define CTX_BLOCK_ID(ctx, offset) ((offset) & ((1 << (ctx)->hash_shift) - 1))
+
+/* each range covers 9 bits of dword space = 512 dwords = 2k bytes */
+/* there is a block entry for each register so 512 blocks */
+/* we have no registers to read/write below 0x8000 (0x2000 in dw space) */
+/* we use some fake offsets at 0x40000 to do evergreen sampler borders so take 0x42000 as a max bound*/
+#define RANGE_OFFSET_START 0x8000
+#define HASH_SHIFT 9
+#define NUM_RANGES (0x42000 - RANGE_OFFSET_START) / (4 << HASH_SHIFT) /* 128 << 9 = 64k */
+
+#define CTX_RANGE_ID(ctx, offset) ((((offset - RANGE_OFFSET_START) >> 2) >> HASH_SHIFT) & 255)
+#define CTX_BLOCK_ID(ctx, offset) (((offset - RANGE_OFFSET_START) >> 2) & ((1 << HASH_SHIFT) - 1))
 
 /*
  * radeon_bo.c