various changes in preparation for dynamic linking support

prefer using visibility=hidden for __libc internal data, rather than
an accessor function, if the compiler has visibility.

optimize with -O3 for PIC targets (shared library). without heavy
inlining, reloading the GOT register in small functions kills
performance. 20-30% size increase for a single libc.so is not a big
deal, compared to comparaible size increase in every static binaries.

use -Bsymbolic-functions, not -Bsymbolic. global variables are subject
to COPY relocations, and thus binding their addresses in the library
at link time will cause library functions to read the wrong (original)
copies instead of the copies made in the main program's bss section.

add entry point, _start, for dynamic linker.
diff --git a/Makefile b/Makefile
index a47439b..5c17642 100644
--- a/Makefile
+++ b/Makefile
@@ -21,16 +21,16 @@
 GENH = include/bits/alltypes.h
 
 CFLAGS  = -Os -nostdinc -ffreestanding -std=c99 -D_XOPEN_SOURCE=700 -pipe
-LDFLAGS = -nostdlib -shared -Wl,-Bsymbolic
+LDFLAGS = -nostdlib -shared -fPIC -Wl,-e,_start -Wl,-Bsymbolic-functions
 INC     = -I./include -I./src/internal -I./arch/$(ARCH)
-PIC     = -fPIC
+PIC     = -fPIC -O3
 AR      = $(CROSS_COMPILE)ar
 RANLIB  = $(CROSS_COMPILE)ranlib
 OBJCOPY = $(CROSS_COMPILE)objcopy
 
 ALL_INCLUDES = $(sort $(wildcard include/*.h include/*/*.h) $(GENH))
 
-EMPTY_LIB_NAMES = m rt pthread crypt util xnet resolv
+EMPTY_LIB_NAMES = m rt pthread crypt util xnet resolv dl
 EMPTY_LIBS = $(EMPTY_LIB_NAMES:%=lib/lib%.a)
 CRT_LIBS = lib/crt1.o lib/crti.o lib/crtn.o
 LIBC_LIBS = lib/libc.a