Streamline tcache-related malloc/free fast paths.

tcache_get() is inlined, so do the config_tcache check inside
tcache_get() and simplify its callers.

Make arena_malloc() an inline function, since it is part of the malloc()
fast path.

Remove conditional logic that cause build issues if --disable-tcache was
specified.
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 78ea269..b8de12b 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -446,7 +446,6 @@
     size_t binind, uint64_t prof_accumbytes);
 void	*arena_malloc_small(arena_t *arena, size_t size, bool zero);
 void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
-void	*arena_malloc(size_t size, bool zero);
 void	*arena_palloc(arena_t *arena, size_t size, size_t alloc_size,
     size_t alignment, bool zero);
 size_t	arena_salloc(const void *ptr);
@@ -475,6 +474,7 @@
     const void *ptr);
 prof_ctx_t	*arena_prof_ctx_get(const void *ptr);
 void	arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+void	*arena_malloc(size_t size, bool zero);
 void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 #endif
 
@@ -630,11 +630,33 @@
 		chunk->map[pageind-map_bias].prof_ctx = ctx;
 }
 
+JEMALLOC_INLINE void *
+arena_malloc(size_t size, bool zero)
+{
+	tcache_t *tcache = tcache_get();
+
+	assert(size != 0);
+	assert(QUANTUM_CEILING(size) <= arena_maxclass);
+
+	if (size <= small_maxclass) {
+		if (tcache != NULL)
+			return (tcache_alloc_small(tcache, size, zero));
+		else
+			return (arena_malloc_small(choose_arena(), size, zero));
+	} else {
+		if (tcache != NULL && size <= tcache_maxclass)
+			return (tcache_alloc_large(tcache, size, zero));
+		else
+			return (arena_malloc_large(choose_arena(), size, zero));
+	}
+}
+
 JEMALLOC_INLINE void
 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 {
 	size_t pageind;
 	arena_chunk_map_t *mapelm;
+	tcache_t *tcache = tcache_get();
 
 	assert(arena != NULL);
 	assert(chunk->arena == arena);
@@ -646,9 +668,7 @@
 	assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
 	if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
 		/* Small allocation. */
-		tcache_t *tcache;
-
-		if (config_tcache && (tcache = tcache_get()) != NULL)
+		if (tcache != NULL)
 			tcache_dalloc_small(tcache, ptr);
 		else {
 			arena_run_t *run;
@@ -671,27 +691,13 @@
 			malloc_mutex_unlock(&bin->lock);
 		}
 	} else {
-		if (config_tcache) {
-			size_t size = mapelm->bits & ~PAGE_MASK;
+		size_t size = mapelm->bits & ~PAGE_MASK;
 
-			assert(((uintptr_t)ptr & PAGE_MASK) == 0);
-			if (size <= tcache_maxclass) {
-				tcache_t *tcache;
+		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 
-				if ((tcache = tcache_get()) != NULL)
-					tcache_dalloc_large(tcache, ptr, size);
-				else {
-					malloc_mutex_lock(&arena->lock);
-					arena_dalloc_large(arena, chunk, ptr);
-					malloc_mutex_unlock(&arena->lock);
-				}
-			} else {
-				malloc_mutex_lock(&arena->lock);
-				arena_dalloc_large(arena, chunk, ptr);
-				malloc_mutex_unlock(&arena->lock);
-			}
+		if (size <= tcache_maxclass && tcache != NULL) {
+			tcache_dalloc_large(tcache, ptr, size);
 		} else {
-			assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 			malloc_mutex_lock(&arena->lock);
 			arena_dalloc_large(arena, chunk, ptr);
 			malloc_mutex_unlock(&arena->lock);
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 83e03d9..717682d 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -1,4 +1,3 @@
-#ifdef JEMALLOC_TCACHE
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
@@ -134,7 +133,11 @@
 {
 	tcache_t *tcache;
 
-	if ((isthreaded & opt_tcache) == false)
+	if (config_tcache == false)
+		return (NULL);
+	if (config_lazy_lock && (isthreaded & opt_tcache) == false)
+		return (NULL);
+	else if (opt_tcache == false)
 		return (NULL);
 
 	tcache = TCACHE_GET();
@@ -391,4 +394,3 @@
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
-#endif /* JEMALLOC_TCACHE */
diff --git a/src/arena.c b/src/arena.c
index c2632d9..8a158df 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1455,35 +1455,6 @@
 	return (ret);
 }
 
-void *
-arena_malloc(size_t size, bool zero)
-{
-
-	assert(size != 0);
-	assert(QUANTUM_CEILING(size) <= arena_maxclass);
-
-	if (size <= small_maxclass) {
-		tcache_t *tcache;
-
-		if (config_tcache && (tcache = tcache_get()) != NULL)
-			return (tcache_alloc_small(tcache, size, zero));
-		else
-			return (arena_malloc_small(choose_arena(), size, zero));
-	} else {
-		if (config_tcache && size <= tcache_maxclass) {
-			tcache_t *tcache;
-
-			if ((tcache = tcache_get()) != NULL)
-				return (tcache_alloc_large(tcache, size, zero));
-			else {
-				return (arena_malloc_large(choose_arena(),
-				    size, zero));
-			}
-		} else
-			return (arena_malloc_large(choose_arena(), size, zero));
-	}
-}
-
 /* Only handles large allocations that require more than page alignment. */
 void *
 arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment,
diff --git a/src/tcache.c b/src/tcache.c
index 398fc0a..4f4ed6c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1,6 +1,6 @@
 #define	JEMALLOC_TCACHE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
-#ifdef JEMALLOC_TCACHE
+
 /******************************************************************************/
 /* Data. */
 
@@ -436,5 +436,3 @@
 
 	return (false);
 }
-/******************************************************************************/
-#endif /* JEMALLOC_TCACHE */