Clean up the manpage and conditionalize various portions according to how
jemalloc is configured.
Modify arena_malloc() API to avoid unnecessary choose_arena() calls. Remove
unnecessary code from choose_arena().
Enable lazy-lock by default, now that choose_arena() is both faster and out of
the critical path.
Implement objdir support in the build system.
diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL
new file mode 100644
index 0000000..1320ba9
--- /dev/null
+++ b/jemalloc/INSTALL
@@ -0,0 +1,172 @@
+Building and installing jemalloc can be as simple as typing the following while
+in the root directory of the source tree:
+
+ ./configure
+ make
+ make install
+
+=== Advanced configuration =====================================================
+
+The 'configure' script supports numerous options that allow control of which
+functionality is enabled, where jemalloc is installed, etc. Optionally, pass
+any of the following arguments (not a definitive list) to 'configure':
+
+--help
+ Print a definitive list of options.
+
+--prefix=<install-root-dir>
+ Set the base directory in which to install. For example:
+
+ ./configure --prefix=/usr/local
+
+ will cause files to be installed into /usr/local/include, /usr/local/lib,
+ and /usr/local/man.
+
+--with-rpath=<colon-separated-rpath>
+ Embed one or more library paths, so that Crux's internal shared library can
+ find the libraries it is linked to. This works only on ELF-based systems.
+
+--enable-debug
+ Enable assertions and validation code. This incurs a substantial
+ performance hit, but is very useful during application development.
+
+--enable-stats
+ Enable statistics gathering functionality. Use the 'P' option to print
+ detailed allocation statistics at exit, and/or the 'U' option to print a
+ detailed allocation trace log.
+
+--disable-tiny
+ Disable tiny (sub-quantum-sized) object support. Technically it is not
+ legal for a malloc implementation to allocate objects with less than
+ quantum alignment (8 or 16 bytes, depending on architecture), but in
+ practice it never causes any problems if, for example, 4-byte allocationsj
+ are 4-byte-aligned.
+
+--disable-mag
+ Disable thread-specific caches for sub-page-sized objects. Objects are
+ cached and released in bulk using "magazines" -- a term coined by the
+ developers of Solaris's umem allocator.
+
+--disable-balance
+ Disable dynamic rebalancing of thread-->arena assignments.
+
+--enable-dss
+ Enable support for page allocation/deallocation via sbrk(2), in addition to
+ mmap(2).
+
+--enable-fill
+ Enable support for junk/zero filling of memory. Use the 'J' option to
+ control junk filling, or the 'Z' option to control zero filling.
+
+--enable-xmalloc
+ Enable support for optional immediate termination due to out-of-memory
+ errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
+ Use the 'X' option to control termination behavior.
+
+--enable-sysv
+ Enable support for System V semantics, wherein malloc(0) returns NULL
+ rather than a minimal allocation. Use the 'V' option to control System V
+ compatibility.
+
+--enable-dynamic-page-shift
+ Under most conditions, the system page size never changes (usually 4KiB or
+ 8KiB, depending on architecture and configuration), and unless this option
+ is enabled, jemalloc assumes that page size can safely be determined during
+ configuration and hard-coded. Enabling dynamic page size determination has
+ a measurable impact on performance, since the compiler is forced to load
+ the page size from memory rather than embedding immediate values.
+
+--disable-lazy-lock
+ Disable code that wraps pthread_create() to detect when an application
+ switches from single-threaded to multi-threaded mode, so that it can avoid
+ mutex locking/unlocking operations while in single-threaded mode. In
+ practice, this feature usually has little impact on performance unless
+ magazines are disabled.
+
+The following environment variables (not a definitive list) impact configure's
+behavior:
+
+CFLAGS="?"
+ Pass these flags to the compiler. You probably shouldn't define this unless
+ you know what you are doing. (Use EXTRA_CFLAGS instead.)
+
+EXTRA_CFLAGS="?"
+ Append these flags to CFLAGS. This makes it possible to add flags such as
+ -Werror, while allowing the configure script to determine what other flags
+ are appropriate for the specified configuration.
+
+ The configure script specifically checks whether an optimization flag (-O*)
+ is specified in EXTRA_CFLAGS, and refrains from specifying an optimization
+ level if it finds that one has already been specified.
+
+CPPFLAGS="?"
+ Pass these flags to the C preprocessor. Note that CFLAGS is not passed to
+ 'cpp' when 'configure' is looking for include files, so you must use
+ CPPFLAGS instead if you need to help 'configure' find header files.
+
+LD_LIBRARY_PATH="?"
+ 'ld' uses this colon-separated list to find libraries.
+
+LDFLAGS="?"
+ Pass these flags when linking.
+
+PATH="?"
+ 'configure' uses this to find programs.
+
+=== Advanced compilation =======================================================
+
+To run integrated regression tests, type:
+
+ make check
+
+To clean up build results to varying degrees, use the following make targets:
+
+ clean
+ distclean
+ relclean
+
+=== Advanced installation ======================================================
+
+Optionally, define make variables when invoking make, including (not
+exclusively):
+
+INCLUDEDIR="?"
+ Use this as the installation prefix for header files.
+
+LIBDIR="?"
+ Use this as the installation prefix for libraries.
+
+MANDIR="?"
+ Use this as the installation prefix for man pages.
+
+CC="?"
+ Use this to invoke the C compiler.
+
+CFLAGS="?"
+ Pass these flags to the compiler.
+
+CPPFLAGS="?"
+ Pass these flags to the C preprocessor.
+
+LDFLAGS="?"
+ Pass these flags when linking.
+
+PATH="?"
+ Use this to search for programs used during configuration and building.
+
+=== Development ================================================================
+
+If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'
+script rather than 'configure'. This re-generates 'configure', enables
+configuration dependency rules, and enables re-generation of automatically
+generated source files.
+
+The build system supports using an object directory separate from the source
+tree. For example, you can create an 'obj' directory, and from within that
+directory, issue configuration and build commands:
+
+ autoconf
+ mkdir obj
+ cd obj
+ ../configure --enable-autogen
+ make
diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in
index 1652ec9..e8d212e 100644
--- a/jemalloc/Makefile.in
+++ b/jemalloc/Makefile.in
@@ -11,10 +11,8 @@
CC := @CC@
# Configuration parameters.
-BINDIR := @BINDIR@
INCLUDEDIR := @INCLUDEDIR@
LIBDIR := @LIBDIR@
-DATADIR := @DATADIR@
MANDIR := @MANDIR@
# Build parameters.
@@ -34,20 +32,20 @@
REV := 0
# File lists.
-CHDRS := src/jemalloc.h
-CSRCS := src/jemalloc.c
-DSO := lib/libjemalloc.so.$(REV)
-MAN3 := doc/jemalloc.3
+CHDRS := @srcroot@src/jemalloc.h @objroot@src/jemalloc_defs.h
+CSRCS := @srcroot@src/jemalloc.c
+DSOS := @objroot@lib/libjemalloc.so.$(REV) @objroot@lib/libjemalloc.so
+MAN3 := @objroot@doc/jemalloc.3
.PHONY: all dist install check clean distclean relclean
# Default target.
-all: $(DSO)
+all: $(DSOS)
-src/%.o: src/%.c
+@objroot@src/%.o: @srcroot@src/%.c
$(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $+
-$(DSO): $(CSRCS:%.c=%.o)
+$(DSOS): $(CSRCS:@srcroot@%.c=@objroot@%.o)
@mkdir -p $(@D)
gcc -shared -o $@ $+ $(LDFLAGS) $(LIBS)
ln -sf libjemalloc.so.$(REV) lib/libjemalloc.so
@@ -59,7 +57,10 @@
install -m 644 $$h $(INCLUDEDIR); \
done
install -d $(LIBDIR)
- install -m 755 $(DSO) $(LIBDIR)
+ @for s in $(DSOS); do \
+ echo "install -m 755 $$s $(LIBDIR)"; \
+ install -m 755 $$s $(LIBDIR); \
+done
install -d $(MANDIR)
@for m in $(MAN3); do \
echo "install -m 644 $$m $(MANDIR)/man3"; \
@@ -69,9 +70,9 @@
check:
clean:
- rm -f src/*.o
- rm -f lib/libjemalloc.so
- rm -f lib/libjemalloc.so.$(REV)
+ rm -f @objroot@src/*.o
+ rm -f @objroot@lib/libjemalloc.so
+ rm -f @objroot@lib/libjemalloc.so.$(REV)
distclean: clean
rm -f @objroot@config.log
diff --git a/jemalloc/README b/jemalloc/README
new file mode 100644
index 0000000..de3a0a8
--- /dev/null
+++ b/jemalloc/README
@@ -0,0 +1,4 @@
+jemalloc is a general-purpose scalable concurrent malloc(3) implementation.
+
+The INSTALL file contains information on how to configure, build, and install
+jemalloc.
diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac
index fa0c1bc..e0bf8f5 100644
--- a/jemalloc/configure.ac
+++ b/jemalloc/configure.ac
@@ -41,7 +41,7 @@
MANDIR=`eval echo $MANDIR`
AC_SUBST([MANDIR])
-cfgoutputs="Makefile"
+cfgoutputs="Makefile doc/jemalloc.3"
cfghdrs="src/jemalloc_defs.h"
dnl If CFLAGS isn't defined and using gcc, set CFLAGS to something reasonable.
@@ -219,6 +219,12 @@
AC_DEFINE([JEMALLOC_STATS], [ ])
fi
AC_SUBST([enable_stats])
+if test "x$enable_stats" = "x0" ; then
+ roff_stats=".\\\" "
+else
+ roff_stats=""
+fi
+AC_SUBST([roff_stats])
dnl Enable tiny allocations by default.
AC_ARG_ENABLE([tiny],
@@ -235,6 +241,15 @@
AC_DEFINE([JEMALLOC_TINY], [ ])
fi
AC_SUBST([enable_tiny])
+if test "x$enable_tiny" = "x0" ; then
+ roff_tiny=".\\\" "
+ roff_no_tiny=""
+else
+ roff_tiny=""
+ roff_no_tiny=".\\\" "
+fi
+AC_SUBST([roff_tiny])
+AC_SUBST([roff_no_tiny])
dnl Enable magazines by default.
AC_ARG_ENABLE([mag],
@@ -251,6 +266,12 @@
AC_DEFINE([JEMALLOC_MAG], [ ])
fi
AC_SUBST([enable_mag])
+if test "x$enable_mag" = "x0" ; then
+ roff_mag=".\\\" "
+else
+ roff_mag=""
+fi
+AC_SUBST([roff_mag])
dnl Enable dynamic arena load balancing by default.
AC_ARG_ENABLE([balance],
@@ -267,6 +288,12 @@
AC_DEFINE([JEMALLOC_BALANCE], [ ])
fi
AC_SUBST([enable_balance])
+if test "x$enable_balance" = "x0" ; then
+ roff_balance=".\\\" "
+else
+ roff_balance=""
+fi
+AC_SUBST([roff_balance])
dnl Do not enable allocation from DSS by default.
AC_ARG_ENABLE([dss],
@@ -283,6 +310,12 @@
AC_DEFINE([JEMALLOC_DSS], [ ])
fi
AC_SUBST([enable_dss])
+if test "x$enable_dss" = "x0" ; then
+ roff_dss=".\\\" "
+else
+ roff_dss=""
+fi
+AC_SUBST([roff_dss])
dnl Do not support the junk/zero filling option by default.
AC_ARG_ENABLE([fill],
@@ -299,6 +332,12 @@
AC_DEFINE([JEMALLOC_FILL], [ ])
fi
AC_SUBST([enable_fill])
+if test "x$enable_fill" = "x0" ; then
+ roff_fill=".\\\" "
+else
+ roff_fill=""
+fi
+AC_SUBST([roff_fill])
dnl Do not support the xmalloc option by default.
AC_ARG_ENABLE([xmalloc],
@@ -315,6 +354,12 @@
AC_DEFINE([JEMALLOC_XMALLOC], [ ])
fi
AC_SUBST([enable_xmalloc])
+if test "x$enable_xmalloc" = "x0" ; then
+ roff_xmalloc=".\\\" "
+else
+ roff_xmalloc=""
+fi
+AC_SUBST([roff_xmalloc])
dnl Do not support the SYSV option by default.
AC_ARG_ENABLE([sysv],
@@ -331,6 +376,12 @@
AC_DEFINE([JEMALLOC_SYSV], [ ])
fi
AC_SUBST([enable_sysv])
+if test "x$enable_sysv" = "x0" ; then
+ roff_sysv=".\\\" "
+else
+ roff_sysv=""
+fi
+AC_SUBST([roff_sysv])
dnl Do not determine page shift at run time by default.
AC_ARG_ENABLE([dynamic_page_shift],
@@ -380,6 +431,7 @@
dnl jemalloc configuration.
dnl
jemalloc_version=`cat ${srcroot}VERSION`
+AC_DEFINE_UNQUOTED([JEMALLOC_VERSION], ["$jemalloc_version"])
AC_SUBST([jemalloc_version])
dnl ============================================================================
@@ -400,21 +452,24 @@
return 0;
]])],
- AC_MSG_RESULT([yes]),
+ AC_MSG_RESULT([yes])
+ roff_tls="",
AC_MSG_RESULT([no])
+ roff_tls=".\\\" "
AC_DEFINE_UNQUOTED([NO_TLS], [ ]))
+AC_SUBST([roff_tls])
-dnl Do not enable lazy locking by default.
+dnl Enable lazy locking by default.
AC_ARG_ENABLE([lazy_lock],
[AS_HELP_STRING([--enable-lazy-lock],
- [Enable lazy locking (avoid locking unless multiple threads)])],
+ [Disable lazy locking (always lock, even when single-threaded)])],
[if test "x$enable_lazy_lock" = "xno" ; then
enable_lazy_lock="0"
else
enable_lazy_lock="1"
fi
],
-[enable_lazy_lock="0"]
+[enable_lazy_lock="1"]
)
if test "x$enable_lazy_lock" = "x1" ; then
AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
diff --git a/jemalloc/doc/jemalloc.3 b/jemalloc/doc/jemalloc.3.in
similarity index 64%
rename from jemalloc/doc/jemalloc.3
rename to jemalloc/doc/jemalloc.3.in
index b26ec39..8d10b39 100644
--- a/jemalloc/doc/jemalloc.3
+++ b/jemalloc/doc/jemalloc.3.in
@@ -1,5 +1,5 @@
-.\" Copyright (c) 2006-2008 Jason Evans <jasone@canonware.com>.
.\" Copyright (c) 2009 Facebook, Inc. All rights reserved.
+.\" Copyright (c) 2006-2008 Jason Evans <jasone@canonware.com>.
.\" All rights reserved.
.\" Copyright (c) 1980, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
@@ -42,7 +42,7 @@
.Nm malloc , calloc , posix_memalign , realloc , free , malloc_usable_size
.Nd general purpose memory allocation functions
.Sh LIBRARY
-.Lb libc
+.Lb libjemalloc
.Sh SYNOPSIS
.In stdlib.h
.Ft void *
@@ -55,22 +55,23 @@
.Fn realloc "void *ptr" "size_t size"
.Ft void
.Fn free "void *ptr"
+.In jemalloc.h
+.Ft size_t
+.Fn malloc_usable_size "const void *ptr"
.Ft const char *
.Va jemalloc_options ;
.Ft void
.Fo \*(lp*jemalloc_message\*(rp
.Fa "const char *p1" "const char *p2" "const char *p3" "const char *p4"
.Fc
-.In malloc_np.h
-.Ft size_t
-.Fn malloc_usable_size "const void *ptr"
.Sh DESCRIPTION
The
.Fn malloc
function allocates
.Fa size
bytes of uninitialized memory.
-The allocated space is suitably aligned (after possible pointer coercion)
+The allocated space is suitably aligned
+@roff_tiny@(after possible pointer coercion)
for storage of any type of object.
.Pp
The
@@ -187,31 +188,32 @@
The process will call
.Xr abort 3
in these cases.
-.It B
-Double/halve the per-arena lock contention threshold at which a thread is
-randomly re-assigned to an arena.
-This dynamic load balancing tends to push threads away from highly contended
-arenas, which avoids worst case contention scenarios in which threads
-disproportionately utilize arenas.
-However, due to the highly dynamic load that applications may place on the
-allocator, it is impossible for the allocator to know in advance how sensitive
-it should be to contention over arenas.
-Therefore, some applications may benefit from increasing or decreasing this
-threshold parameter.
-This option is not available for some configurations (non-PIC).
+@roff_balance@@roff_tls@.It B
+@roff_balance@@roff_tls@Double/halve the per-arena lock contention threshold at
+@roff_balance@@roff_tls@which a thread is randomly re-assigned to an arena.
+@roff_balance@@roff_tls@This dynamic load balancing tends to push threads away
+@roff_balance@@roff_tls@from highly contended arenas, which avoids worst case
+@roff_balance@@roff_tls@contention scenarios in which threads disproportionately
+@roff_balance@@roff_tls@utilize arenas.
+@roff_balance@@roff_tls@However, due to the highly dynamic load that
+@roff_balance@@roff_tls@applications may place on the allocator, it is
+@roff_balance@@roff_tls@impossible for the allocator to know in advance how
+@roff_balance@@roff_tls@sensitive it should be to contention over arenas.
+@roff_balance@@roff_tls@Therefore, some applications may benefit from increasing
+@roff_balance@@roff_tls@or decreasing this threshold parameter.
.It C
Double/halve the size of the maximum size class that is a multiple of the
cacheline size (64).
Above this size, subpage spacing (256 bytes) is used for size classes.
The default value is 512 bytes.
-.It D
-Use
-.Xr sbrk 2
-to acquire memory in the data storage segment (DSS).
-This option is enabled by default.
-See the
-.Dq M
-option for related information and interactions.
+@roff_dss@.It D
+@roff_dss@Use
+@roff_dss@.Xr sbrk 2
+@roff_dss@to acquire memory in the data storage segment (DSS).
+@roff_dss@This option is enabled by default.
+@roff_dss@See the
+@roff_dss@.Dq M
+@roff_dss@option for related information and interactions.
.It F
Double/halve the per-arena maximum number of dirty unused pages that are
allowed to accumulate before informing the kernel about at least half of those
@@ -222,46 +224,48 @@
The default is 512 pages per arena;
.Ev JEMALLOC_OPTIONS=10f
will prevent any dirty unused pages from accumulating.
-.It G
-When there are multiple threads, use thread-specific caching for objects that
-are smaller than one page.
-This option is enabled by default.
-Thread-specific caching allows many allocations to be satisfied without
-performing any thread synchronization, at the cost of increased memory use.
-See the
-.Dq R
-option for related tuning information.
-This option is not available for some configurations (non-PIC).
-.It J
-Each byte of new memory allocated by
-.Fn malloc
-or
-.Fn realloc
-will be initialized to 0xa5.
-All memory returned by
-.Fn free
-or
-.Fn realloc
-will be initialized to 0x5a.
-This is intended for debugging and will impact performance negatively.
+@roff_mag@@roff_tls@.It G
+@roff_mag@@roff_tls@When there are multiple threads, use thread-specific caching
+@roff_mag@@roff_tls@for objects that are smaller than one page.
+@roff_mag@@roff_tls@This option is enabled by default.
+@roff_mag@@roff_tls@Thread-specific caching allows many allocations to be
+@roff_mag@@roff_tls@satisfied without performing any thread synchronization, at
+@roff_mag@@roff_tls@the cost of increased memory use.
+@roff_mag@@roff_tls@See the
+@roff_mag@@roff_tls@.Dq R
+@roff_mag@@roff_tls@option for related tuning information.
+@roff_fill@.It J
+@roff_fill@Each byte of new memory allocated by
+@roff_fill@.Fn malloc
+@roff_fill@or
+@roff_fill@.Fn realloc
+@roff_fill@will be initialized to 0xa5.
+@roff_fill@All memory returned by
+@roff_fill@.Fn free
+@roff_fill@or
+@roff_fill@.Fn realloc
+@roff_fill@will be initialized to 0x5a.
+@roff_fill@This is intended for debugging and will impact performance
+@roff_fill@negatively.
.It K
Double/halve the virtual memory chunk size.
The default chunk size is 1 MB.
-.It M
-Use
-.Xr mmap 2
-to acquire anonymously mapped memory.
-This option is enabled by default.
-If both the
-.Dq D
-and
-.Dq M
-options are enabled, the allocator prefers the DSS over anonymous mappings,
-but allocation only fails if memory cannot be acquired via either method.
-If neither option is enabled, then the
-.Dq M
-option is implicitly enabled in order to assure that there is a method for
-acquiring memory.
+@roff_dss@.It M
+@roff_dss@Use
+@roff_dss@.Xr mmap 2
+@roff_dss@to acquire anonymously mapped memory.
+@roff_dss@This option is enabled by default.
+@roff_dss@If both the
+@roff_dss@.Dq D
+@roff_dss@and
+@roff_dss@.Dq M
+@roff_dss@options are enabled, the allocator prefers the DSS over anonymous
+@roff_dss@mappings, but allocation only fails if memory cannot be acquired via
+@roff_dss@either method.
+@roff_dss@If neither option is enabled, then the
+@roff_dss@.Dq M
+@roff_dss@option is implicitly enabled in order to assure that there is a method
+@roff_dss@for acquiring memory.
.It N
Double/halve the number of arenas.
The default number of arenas is two times the number of CPUs, or one if there
@@ -279,88 +283,70 @@
quantum (8 or 16 bytes, depending on architecture).
Above this size, cacheline spacing is used for size classes.
The default value is 128 bytes.
-.It R
-Double/halve magazine size, which approximately doubles/halves the number of
-rounds in each magazine.
-Magazines are used by the thread-specific caching machinery to acquire and
-release objects in bulk.
-Increasing the magazine size decreases locking overhead, at the expense of
-increased memory usage.
-This option is not available for some configurations (non-PIC).
-.It U
-Generate
-.Dq utrace
-entries for
-.Xr ktrace 1 ,
-for all operations.
-Consult the source for details on this option.
-.It V
-Attempting to allocate zero bytes will return a
-.Dv NULL
-pointer instead of
-a valid pointer.
-(The default behavior is to make a minimal allocation and return a
-pointer to it.)
-This option is provided for System V compatibility.
-This option is incompatible with the
-.Dq X
-option.
-.It X
-Rather than return failure for any allocation function,
-display a diagnostic message on
-.Dv stderr
-and cause the program to drop
-core (using
-.Xr abort 3 ) .
-This option should be set at compile time by including the following in
-the source code:
-.Bd -literal -offset indent
-jemalloc_options = "X";
-.Ed
-.It Z
-Each byte of new memory allocated by
-.Fn malloc
-or
-.Fn realloc
-will be initialized to 0.
-Note that this initialization only happens once for each byte, so
-.Fn realloc
-calls do not zero memory that was previously allocated.
-This is intended for debugging and will impact performance negatively.
+@roff_mag@@roff_tls@.It R
+@roff_mag@@roff_tls@Double/halve magazine size, which approximately
+@roff_mag@@roff_tls@doubles/halves the number of rounds in each magazine.
+@roff_mag@@roff_tls@Magazines are used by the thread-specific caching machinery
+@roff_mag@@roff_tls@to acquire and release objects in bulk.
+@roff_mag@@roff_tls@Increasing the magazine size decreases locking overhead, at
+@roff_mag@@roff_tls@the expense of increased memory usage.
+@roff_stats@.It U
+@roff_stats@Generate a verbose trace log via
+@roff_stats@.Fn jemalloc_message
+@roff_stats@for all allocation operations.
+@roff_sysv@.It V
+@roff_sysv@Attempting to allocate zero bytes will return a
+@roff_sysv@.Dv NULL
+@roff_sysv@pointer instead of a valid pointer.
+@roff_sysv@(The default behavior is to make a minimal allocation and return a
+@roff_sysv@pointer to it.)
+@roff_sysv@This option is provided for System V compatibility.
+@roff_sysv@@roff_xmalloc@This option is incompatible with the
+@roff_sysv@@roff_xmalloc@.Dq X
+@roff_sysv@@roff_xmalloc@option.
+@roff_xmalloc@.It X
+@roff_xmalloc@Rather than return failure for any allocation function, display a
+@roff_xmalloc@diagnostic message on
+@roff_xmalloc@.Dv stderr
+@roff_xmalloc@and cause the program to drop core (using
+@roff_xmalloc@.Xr abort 3 ) .
+@roff_xmalloc@This option should be set at compile time by including the
+@roff_xmalloc@following in the source code:
+@roff_xmalloc@.Bd -literal -offset indent
+@roff_xmalloc@jemalloc_options = "X";
+@roff_xmalloc@.Ed
+@roff_fill@.It Z
+@roff_fill@Each byte of new memory allocated by
+@roff_fill@.Fn malloc
+@roff_fill@or
+@roff_fill@.Fn realloc
+@roff_fill@will be initialized to 0.
+@roff_fill@Note that this initialization only happens once for each byte, so
+@roff_fill@.Fn realloc
+@roff_fill@calls do not zero memory that was previously allocated.
+@roff_fill@This is intended for debugging and will impact performance
+@roff_fill@negatively.
.El
.Pp
-The
-.Dq J
-and
-.Dq Z
-options are intended for testing and debugging.
-An application which changes its behavior when these options are used
-is flawed.
+@roff_fill@The
+@roff_fill@.Dq J
+@roff_fill@and
+@roff_fill@.Dq Z
+@roff_fill@options are intended for testing and debugging.
+@roff_fill@An application which changes its behavior when these options are used
+@roff_fill@is flawed.
.Sh IMPLEMENTATION NOTES
-Traditionally, allocators have used
-.Xr sbrk 2
-to obtain memory, which is suboptimal for several reasons, including race
-conditions, increased fragmentation, and artificial limitations on maximum
-usable memory.
-This allocator uses both
-.Xr sbrk 2
-and
-.Xr mmap 2
-by default, but it can be configured at run time to use only one or the other.
-If resource limits are not a primary concern, the preferred configuration is
-.Ev JEMALLOC_OPTIONS=dM
-or
-.Ev JEMALLOC_OPTIONS=DM .
-When so configured, the
-.Ar datasize
-resource limit has little practical effect for typical applications; use
-.Ev JEMALLOC_OPTIONS=Dm
-if that is a concern.
-Regardless of allocator configuration, the
-.Ar vmemoryuse
-resource limit can be used to bound the total virtual memory used by a
-process, as described in
-.Xr limits 1 .
+@roff_dss@Traditionally, allocators have used
+@roff_dss@.Xr sbrk 2
+@roff_dss@to obtain memory, which is suboptimal for several reasons, including
+@roff_dss@race conditions, increased fragmentation, and artificial limitations
+@roff_dss@on maximum usable memory.
+@roff_dss@This allocator uses both
+@roff_dss@.Xr sbrk 2
+@roff_dss@and
+@roff_dss@.Xr mmap 2
+@roff_dss@by default, but it can be configured at run time to use only one or
+@roff_dss@the other.
.Pp
This allocator uses multiple arenas in order to reduce lock contention for
threaded programs on multi-processor systems.
@@ -375,13 +361,14 @@
However, it may make sense to reduce the number of arenas if an application
does not make much use of the allocation functions.
.Pp
-In addition to multiple arenas, this allocator supports thread-specific
-caching for small objects (smaller than one page), in order to make it
-possible to completely avoid synchronization for most small allocation requests.
-Such caching allows very fast allocation in the common case, but it increases
-memory usage and fragmentation, since a bounded number of objects can remain
-allocated in each thread cache.
-.Pp
+@roff_mag@In addition to multiple arenas, this allocator supports
+@roff_mag@thread-specific caching for small objects (smaller than one page), in
+@roff_mag@order to make it possible to completely avoid synchronization for most
+@roff_mag@small allocation requests.
+@roff_mag@Such caching allows very fast allocation in the common case, but it
+@roff_mag@increases memory usage and fragmentation, since a bounded number of
+@roff_mag@objects can remain allocated in each thread cache.
+@roff_mag@.Pp
Memory is conceptually broken into equal-sized chunks, where the chunk size is
a power of two that is greater than the page size.
Chunks are always aligned to multiples of the chunk size.
@@ -406,12 +393,16 @@
.Pp
Small objects are managed in groups by page runs.
Each run maintains a bitmap that tracks which regions are in use.
-Allocation requests that are no more than half the quantum (8 or 16, depending
-on architecture) are rounded up to the nearest power of two.
-Allocation requests that are more than half the quantum, but no more than the
-minimum cacheline-multiple size class (see the
+@roff_tiny@Allocation requests that are no more than half the quantum (8 or 16,
+@roff_tiny@depending on architecture) are rounded up to the nearest power of
+@roff_tiny@two.
+Allocation requests that are
+@roff_tiny@more than half the quantum, but
+no more than the minimum cacheline-multiple size class (see the
.Dq Q
-option) are rounded up to the nearest multiple of the quantum.
+option) are rounded up to the nearest multiple of the
+@roff_tiny@quantum.
+@roff_no_tiny@quantum (8 or 16, depending on architecture).
Allocation requests that are more than the minumum cacheline-multiple size
class, but no more than the minimum subpage-multiple size class (see the
.Dq C
@@ -440,26 +431,26 @@
It is probably also a good idea to recompile the program with suitable
options and symbols for debugger support.
.Pp
-If the program starts to give unusual results, coredump or generally behave
-differently without emitting any of the messages mentioned in the next
-section, it is likely because it depends on the storage being filled with
-zero bytes.
-Try running it with the
-.Dq Z
-option set;
-if that improves the situation, this diagnosis has been confirmed.
-If the program still misbehaves,
-the likely problem is accessing memory outside the allocated area.
-.Pp
-Alternatively, if the symptoms are not easy to reproduce, setting the
-.Dq J
-option may help provoke the problem.
-.Pp
-In truly difficult cases, the
-.Dq U
-option, if supported by the kernel, can provide a detailed trace of
-all calls made to these functions.
-.Pp
+@roff_fill@If the program starts to give unusual results, coredump or generally
+@roff_fill@behave differently without emitting any of the messages mentioned in
+@roff_fill@the next section, it is likely because it depends on the storage
+@roff_fill@being filled with zero bytes.
+@roff_fill@Try running it with the
+@roff_fill@.Dq Z
+@roff_fill@option set;
+@roff_fill@if that improves the situation, this diagnosis has been confirmed.
+@roff_fill@If the program still misbehaves,
+@roff_fill@the likely problem is accessing memory outside the allocated area.
+@roff_fill@.Pp
+@roff_fill@Alternatively, if the symptoms are not easy to reproduce, setting the
+@roff_fill@.Dq J
+@roff_fill@option may help provoke the problem.
+@roff_fill@.Pp
+@roff_stats@In truly difficult cases, the
+@roff_stats@.Dq U
+@roff_stats@option can provide a detailed trace of all calls made to these
+@roff_stats@functions.
+@roff_stats@.Pp
Unfortunately this implementation does not provide much detail about
the problems it detects; the performance impact for storing such information
would be prohibitive.
@@ -476,7 +467,7 @@
option is set, all warnings are treated as errors.
.Pp
The
-.Va _malloc_message
+.Va jemalloc_message
variable allows the programmer to override the function which emits
the text strings forming the errors and warnings if for some reason
the
@@ -486,7 +477,7 @@
this function is likely to result in a crash or deadlock.
.Pp
All messages are prefixed by
-.Dq Ao Ar progname Ac Ns Li : (malloc) .
+.Dq <jemalloc>: .
.Sh RETURN VALUES
The
.Fn malloc
@@ -564,15 +555,12 @@
jemalloc_options = "X";
.Ed
.Sh SEE ALSO
-.Xr limits 1 ,
.Xr madvise 2 ,
.Xr mmap 2 ,
.Xr sbrk 2 ,
.Xr alloca 3 ,
.Xr atexit 3 ,
-.Xr getpagesize 3 ,
-.Xr memory 3 ,
-.Xr posix_memalign 3
+.Xr getpagesize 3
.Sh STANDARDS
The
.Fn malloc ,
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index 71b09c4..65ce18e 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -1178,8 +1178,8 @@
static unsigned malloc_ncpus(void);
static bool malloc_init_hard(void);
static void thread_cleanup(void *arg);
-void jemalloc_prefork(void);
-void jemalloc_postfork(void);
+static void jemalloc_prefork(void);
+static void jemalloc_postfork(void);
/*
* End function prototypes.
@@ -1231,9 +1231,10 @@
# define assert(e) do { \
if (!(e)) { \
char line_buf[UMAX2S_BUFSIZE]; \
- jemalloc_message(__FILE__, ":", umax2s(__LINE__, \
- line_buf), ": Failed assertion: "); \
- jemalloc_message("\"", #e, "\"\n", ""); \
+ jemalloc_message("<jemalloc>: ", __FILE__, ":", \
+ umax2s(__LINE__, line_buf)); \
+ jemalloc_message(": Failed assertion: ", "\"", #e, \
+ "\"\n"); \
abort(); \
} \
} while (0)
@@ -1250,15 +1251,17 @@
assert(len == sizeof(malloc_utrace_t));
if (ut->p == NULL && ut->s == 0 && ut->r == NULL)
- malloc_printf("%d x USER malloc_init()\n", getpid());
+ malloc_printf("<jemalloc>:utrace: %d malloc_init()\n",
+ getpid());
else if (ut->p == NULL && ut->r != NULL) {
- malloc_printf("%d x USER %p = malloc(%zu)\n", getpid(), ut->r,
- ut->s);
+ malloc_printf("<jemalloc>:utrace: %d %p = malloc(%zu)\n",
+ getpid(), ut->r, ut->s);
} else if (ut->p != NULL && ut->r != NULL) {
- malloc_printf("%d x USER %p = realloc(%p, %zu)\n", getpid(),
- ut->r, ut->p, ut->s);
+ malloc_printf("<jemalloc>:utrace: %d %p = realloc(%p, %zu)\n",
+ getpid(), ut->r, ut->p, ut->s);
} else
- malloc_printf("%d x USER free(%p)\n", getpid(), ut->p);
+ malloc_printf("<jemalloc>:utrace: %d free(%p)\n", getpid(),
+ ut->p);
return (0);
}
@@ -2247,11 +2250,6 @@
* introduces a bootstrapping issue.
*/
#ifndef NO_TLS
- if (isthreaded == false) {
- /* Avoid the overhead of TLS for single-threaded operation. */
- return (arenas[0]);
- }
-
ret = arenas_map;
if (ret == NULL) {
ret = choose_arena_hard();
@@ -3405,11 +3403,9 @@
}
static inline void *
-arena_malloc(arena_t *arena, size_t size, bool zero)
+arena_malloc(size_t size, bool zero)
{
- assert(arena != NULL);
- assert(arena->magic == ARENA_MAGIC);
assert(size != 0);
assert(QUANTUM_CEILING(size) <= arena_maxclass);
@@ -3418,7 +3414,7 @@
if (opt_mag) {
mag_rack_t *rack = mag_rack;
if (rack == NULL) {
- rack = mag_rack_create(arena);
+ rack = mag_rack_create(choose_arena());
if (rack == NULL)
return (NULL);
mag_rack = rack;
@@ -3427,9 +3423,9 @@
return (mag_rack_alloc(rack, size, zero));
} else
#endif
- return (arena_malloc_small(arena, size, zero));
+ return (arena_malloc_small(choose_arena(), size, zero));
} else
- return (arena_malloc_large(arena, size, zero));
+ return (arena_malloc_large(choose_arena(), size, zero));
}
static inline void *
@@ -3439,7 +3435,7 @@
assert(size != 0);
if (size <= arena_maxclass)
- return (arena_malloc(choose_arena(), size, false));
+ return (arena_malloc(size, false));
else
return (huge_malloc(size, false));
}
@@ -3449,7 +3445,7 @@
{
if (size <= arena_maxclass)
- return (arena_malloc(choose_arena(), size, true));
+ return (arena_malloc(size, true));
else
return (huge_malloc(size, true));
}
@@ -3553,7 +3549,7 @@
if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
&& ceil_size <= arena_maxclass))
- ret = arena_malloc(choose_arena(), ceil_size, false);
+ ret = arena_malloc(ceil_size, false);
else {
size_t run_size;
@@ -4113,7 +4109,7 @@
* need to move the object. In that case, fall back to allocating new
* space and copying.
*/
- ret = arena_malloc(choose_arena(), size, false);
+ ret = arena_malloc(size, false);
if (ret == NULL)
return (NULL);
@@ -5725,7 +5721,7 @@
* is threaded here.
*/
-void
+static void
jemalloc_prefork(void)
{
bool again;
@@ -5773,7 +5769,7 @@
#endif
}
-void
+static void
jemalloc_postfork(void)
{
unsigned i;
diff --git a/jemalloc/src/jemalloc.h b/jemalloc/src/jemalloc.h
index dbff468..21b8de5 100644
--- a/jemalloc/src/jemalloc.h
+++ b/jemalloc/src/jemalloc.h
@@ -28,10 +28,24 @@
*******************************************************************************
*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef JEMALLOC_H_
+#define JEMALLOC_H_
+
+#include "jemalloc_defs.h"
+
+size_t malloc_usable_size(const void *ptr);
+
extern const char *jemalloc_options;
extern void (*jemalloc_message)(const char *p1, const char *p2,
const char *p3, const char *p4);
-void jemalloc_thread_cleanup(void);
-void jemalloc_prefork(void);
-void jemalloc_postfork(void);
+#endif /* JEMALLOC_H_ */
+
+#ifdef __cplusplus
+};
+#endif
+
diff --git a/jemalloc/src/jemalloc_defs.h.in b/jemalloc/src/jemalloc_defs.h.in
index 6ca6018..eae3d0a 100644
--- a/jemalloc/src/jemalloc_defs.h.in
+++ b/jemalloc/src/jemalloc_defs.h.in
@@ -28,6 +28,14 @@
*******************************************************************************
*/
+#ifndef JEMALLOC_DEFS_H_
+#define JEMALLOC_DEFS_H_
+
+/*
+ * jemalloc version string.
+ */
+#undef JEMALLOC_VERSION
+
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
@@ -92,3 +100,5 @@
/* sizeof(void *) == 2^SIZEOF_PTR_2POW. */
#undef SIZEOF_PTR_2POW
+
+#endif /* JEMALLOC_DEFS_H_ */