Optimize arena_prof_tctx_set().

Optimize arena_prof_tctx_set() to avoid reading run metadata when
deciding whether it's actually necessary to write.
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f2954b3..76c5b93 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -555,7 +555,7 @@
 unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
-void	arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
     tcache_t *tcache);
 arena_t	*arena_aalloc(const void *ptr);
@@ -1092,7 +1092,7 @@
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
 
@@ -1102,12 +1102,25 @@
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 
-		if (unlikely(arena_mapbits_large_get(chunk, pageind) != 0)) {
-			arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk,
-			    pageind);
+		if (unlikely(usize > SMALL_MAXCLASS || tctx >
+		    (prof_tctx_t *)(uintptr_t)1U)) {
+			arena_chunk_map_misc_t *elm;
+
+			assert(arena_mapbits_large_get(chunk, pageind) != 0);
+
+			elm = arena_miscelm_get(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun, tctx);
+		} else {
+			/*
+			 * tctx must always be initialized for large runs.
+			 * Assert that the surrounding conditional logic is
+			 * equivalent to checking whether ptr refers to a large
+			 * run.
+			 */
+			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
 		huge_prof_tctx_set(ptr, tctx);
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 2e22711..fe89828 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -332,7 +332,7 @@
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
 prof_tctx_t	*prof_tctx_get(const void *ptr);
-void	prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_malloc_sample_object(const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
@@ -402,13 +402,13 @@
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(ptr, tctx);
+	arena_prof_tctx_set(ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -473,7 +473,7 @@
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_malloc_sample_object(ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -503,7 +503,7 @@
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_malloc_sample_object(ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/src/prof.c b/src/prof.c
index a05792f..b79eba6 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -219,7 +219,7 @@
 prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
-	prof_tctx_set(ptr, tctx);
+	prof_tctx_set(ptr, usize, tctx);
 
 	malloc_mutex_lock(tctx->tdata->lock);
 	tctx->cnts.curobjs++;
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 3af1964..da34d70 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -16,6 +16,27 @@
 	return (fd);
 }
 
+static size_t
+get_lg_prof_sample(void)
+{
+	size_t lg_prof_sample;
+	size_t sz = sizeof(size_t);
+
+	assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure while reading profiling sample rate");
+	return (lg_prof_sample);
+}
+
+static void
+do_prof_reset(size_t lg_prof_sample)
+{
+	assert_d_eq(mallctl("prof.reset", NULL, NULL,
+	    &lg_prof_sample, sizeof(size_t)), 0,
+	    "Unexpected mallctl failure while resetting profile data");
+	assert_zu_eq(lg_prof_sample, get_lg_prof_sample(),
+	    "Expected profile sample rate change");
+}
+
 TEST_BEGIN(test_prof_reset_basic)
 {
 	size_t lg_prof_sample_orig, lg_prof_sample, lg_prof_sample_next;
@@ -30,9 +51,7 @@
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	assert_zu_eq(lg_prof_sample_orig, 0,
 	    "Unexpected profiling sample rate");
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure while reading profiling sample rate");
+	lg_prof_sample = get_lg_prof_sample();
 	assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
@@ -41,10 +60,7 @@
 	for (i = 0; i < 2; i++) {
 		assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure while resetting profile data");
-		sz = sizeof(size_t);
-		assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz,
-		    NULL, 0), 0, "Unexpected mallctl failure while reading "
-		    "profiling sample rate");
+		lg_prof_sample = get_lg_prof_sample();
 		assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 		    "Unexpected profile sample rate change");
 	}
@@ -52,22 +68,15 @@
 	/* Test resets with prof.lg_sample changes. */
 	lg_prof_sample_next = 1;
 	for (i = 0; i < 2; i++) {
-		assert_d_eq(mallctl("prof.reset", NULL, NULL,
-		    &lg_prof_sample_next, sizeof(size_t)), 0,
-		    "Unexpected mallctl failure while resetting profile data");
-		sz = sizeof(size_t);
-		assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz,
-		    NULL, 0), 0, "Unexpected mallctl failure while reading "
-		    "profiling sample rate");
+		do_prof_reset(lg_prof_sample_next);
+		lg_prof_sample = get_lg_prof_sample();
 		assert_zu_eq(lg_prof_sample, lg_prof_sample_next,
 		    "Expected profile sample rate change");
 		lg_prof_sample_next = lg_prof_sample_orig;
 	}
 
 	/* Make sure the test code restored prof.lg_sample. */
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure while reading profiling sample rate");
+	lg_prof_sample = get_lg_prof_sample();
 	assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
@@ -182,6 +191,7 @@
 
 TEST_BEGIN(test_prof_reset)
 {
+	size_t lg_prof_sample_orig;
 	bool active;
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
@@ -195,6 +205,9 @@
 	    "Unexpected pre-existing tdata structures");
 	tdata_count = prof_tdata_count();
 
+	lg_prof_sample_orig = get_lg_prof_sample();
+	do_prof_reset(5);
+
 	active = true;
 	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
 	    0, "Unexpected mallctl failure while activating profiling");
@@ -214,6 +227,8 @@
 	active = false;
 	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
 	    0, "Unexpected mallctl failure while deactivating profiling");
+
+	do_prof_reset(lg_prof_sample_orig);
 }
 TEST_END
 #undef NTHREADS