external/oprofile 0.9.1
diff --git a/doc/Makefile.am b/doc/Makefile.am
new file mode 100644
index 0000000..be76bac
--- /dev/null
+++ b/doc/Makefile.am
@@ -0,0 +1,79 @@
+RM=rm
+MANDIR=$(DESTDIR)@mandir@/man1
+
+XSLTPROC=xsltproc
+XSLTPROC_FLAGS=@XSLTPROC_FLAGS@
+XHTML_STYLESHEET=$(srcdir)/xsl/xhtml.xsl
+CHUNK_XHTML_STYLESHEET=$(srcdir)/xsl/xhtml-chunk.xsl
+XML_CATALOG_FILES=xsl/catalog.xml
+STYLESHEETS=$(CHUNK_XHTML_STYLESHEET) $(srcdir)/xsl/xhtml-common.xsl
+
+man_MANS = \
+	oprofile.1 \
+	opcontrol.1 \
+	opreport.1 \
+	opannotate.1 \
+	opgprof.1 \
+	ophelp.1 \
+	oparchive.1
+
+htmldir = $(prefix)/share/doc/oprofile
+dist_html_DATA = oprofile.html internals.html
+
+if have_xsltproc
+
+oprofile.html: ${top_srcdir}/doc/oprofile.xml
+	XML_CATALOG_FILES=$(XML_CATALOG_FILES) $(XSLTPROC) $(XSLTPROC_FLAGS) -o $@ --stringparam version @VERSION@ $(XHTML_STYLESHEET) $<
+
+internals.html: ${top_srcdir}/doc/internals.xml
+	XML_CATALOG_FILES=$(XML_CATALOG_FILES) $(XSLTPROC) $(XSLTPROC_FLAGS) -o $@ --stringparam version @VERSION@ $(XHTML_STYLESHEET) $<
+
+# rules to generate oprofile.sf.net/doc files
+
+doc/index.html: ${top_srcdir}/doc/oprofile.xml
+	-mkdir doc/
+	$(XSLTPROC) -o doc/ $(XSLTPROC_FLAGS) --stringparam version @VERSION@ $(CHUNK_XHTML_STYLESHEET) $<
+
+doc/internals/index.html: ${top_srcdir}/doc/internals.xml
+	-mkdir doc/internals/
+	$(XSLTPROC) -o doc/internals/ $(XSLTPROC_FLAGS) --stringparam version @VERSION@ $(CHUNK_XHTML_STYLESHEET) $<
+
+chunk: doc/index.html doc/internals/index.html
+	cp ${top_srcdir}/doc/buffers.png doc/internals/
+
+else
+
+oprofile.html:
+	touch $@
+
+internals.html:
+	touch $@
+
+chunk:
+
+endif
+
+distclean-local:
+	$(RM) -f xsl/catalog-1.xml xsl/catalog.xml
+
+clean-local:
+	$(RM) -f $(generated_mans)
+
+# these files are not cleaned by make uninstall automake bug ?
+uninstall-local:
+	rm -f @mandir@/cat1/oprofile.1.gz
+	@for f in $(LINK_LIST); do		\
+		rm -f $(CATDIR)/cat1/$$f.gz;	\
+	done
+
+EXTRA_DIST = \
+	oprofile.1 \
+	oprofile.1.in \
+	oprofile.xml \
+	internals.xml \
+	xsl/catalog-1.xml.in \
+	xsl/xhtml.xsl \
+	xsl/xhtml-common.xsl \
+	xsl/xhtml-chunk.xsl \
+	srcdoc/Doxyfile.in \
+	srcdoc/Makefile
diff --git a/doc/Makefile.in b/doc/Makefile.in
new file mode 100644
index 0000000..e44f3ff
--- /dev/null
+++ b/doc/Makefile.in
@@ -0,0 +1,506 @@
+# Makefile.in generated by automake 1.9.5 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+subdir = doc
+DIST_COMMON = $(dist_html_DATA) $(srcdir)/Makefile.am \
+	$(srcdir)/Makefile.in $(srcdir)/opannotate.1.in \
+	$(srcdir)/oparchive.1.in $(srcdir)/opcontrol.1.in \
+	$(srcdir)/opgprof.1.in $(srcdir)/ophelp.1.in \
+	$(srcdir)/opreport.1.in $(srcdir)/oprofile.1.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/binutils.m4 \
+	$(top_srcdir)/m4/builtinexpect.m4 \
+	$(top_srcdir)/m4/compileroption.m4 \
+	$(top_srcdir)/m4/configmodule.m4 \
+	$(top_srcdir)/m4/copyifchange.m4 $(top_srcdir)/m4/docbook.m4 \
+	$(top_srcdir)/m4/extradirs.m4 $(top_srcdir)/m4/findkernel.m4 \
+	$(top_srcdir)/m4/kerneloption.m4 \
+	$(top_srcdir)/m4/kernelversion.m4 \
+	$(top_srcdir)/m4/mallocattribute.m4 \
+	$(top_srcdir)/m4/poptconst.m4 \
+	$(top_srcdir)/m4/precompiledheader.m4 $(top_srcdir)/m4/qt.m4 \
+	$(top_srcdir)/m4/resultyn.m4 $(top_srcdir)/m4/sstream.m4 \
+	$(top_srcdir)/m4/typedef.m4 $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES = oprofile.1 opcontrol.1 ophelp.1 opreport.1 \
+	opannotate.1 opgprof.1 oparchive.1
+SOURCES =
+DIST_SOURCES =
+man1dir = $(mandir)/man1
+am__installdirs = "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(htmldir)"
+NROFF = nroff
+MANS = $(man_MANS)
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
+dist_htmlDATA_INSTALL = $(INSTALL_DATA)
+DATA = $(dist_html_DATA)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMDEP_FALSE = @AMDEP_FALSE@
+AMDEP_TRUE = @AMDEP_TRUE@
+AMTAR = @AMTAR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BFD_LIBS = @BFD_LIBS@
+CAT_ENTRY_END = @CAT_ENTRY_END@
+CAT_ENTRY_START = @CAT_ENTRY_START@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATE = @DATE@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DOCBOOK_ROOT = @DOCBOOK_ROOT@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXTRA_CFLAGS_MODULE = @EXTRA_CFLAGS_MODULE@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+KINC = @KINC@
+KSRC = @KSRC@
+KVERS = @KVERS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBERTY_LIBS = @LIBERTY_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MOC = @MOC@
+MODINSTALLDIR = @MODINSTALLDIR@
+OBJEXT = @OBJEXT@
+OPROFILE_DIR = @OPROFILE_DIR@
+OPROFILE_MODULE_ARCH = @OPROFILE_MODULE_ARCH@
+OP_CFLAGS = @OP_CFLAGS@
+OP_CXXFLAGS = @OP_CXXFLAGS@
+OP_DOCDIR = @OP_DOCDIR@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+POPT_LIBS = @POPT_LIBS@
+PTRDIFF_T_TYPE = @PTRDIFF_T_TYPE@
+QT_INCLUDES = @QT_INCLUDES@
+QT_LDFLAGS = @QT_LDFLAGS@
+QT_LIB = @QT_LIB@
+QT_VERSION = @QT_VERSION@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SIZE_T_TYPE = @SIZE_T_TYPE@
+STRIP = @STRIP@
+UIC = @UIC@
+VERSION = @VERSION@
+XML_CATALOG = @XML_CATALOG@
+XSLTPROC = xsltproc
+XSLTPROC_FLAGS = @XSLTPROC_FLAGS@
+X_CFLAGS = @X_CFLAGS@
+X_EXTRA_LIBS = @X_EXTRA_LIBS@
+X_LIBS = @X_LIBS@
+X_PRE_LIBS = @X_PRE_LIBS@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_RANLIB = @ac_ct_RANLIB@
+ac_ct_STRIP = @ac_ct_STRIP@
+am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
+am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
+am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
+am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+datadir = @datadir@
+enable_abi_FALSE = @enable_abi_FALSE@
+enable_abi_TRUE = @enable_abi_TRUE@
+exec_prefix = @exec_prefix@
+have_qt_FALSE = @have_qt_FALSE@
+have_qt_TRUE = @have_qt_TRUE@
+have_xsltproc_FALSE = @have_xsltproc_FALSE@
+have_xsltproc_TRUE = @have_xsltproc_TRUE@
+host_alias = @host_alias@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+kernel_support_FALSE = @kernel_support_FALSE@
+kernel_support_TRUE = @kernel_support_TRUE@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+topdir = @topdir@
+RM = rm
+MANDIR = $(DESTDIR)@mandir@/man1
+XHTML_STYLESHEET = $(srcdir)/xsl/xhtml.xsl
+CHUNK_XHTML_STYLESHEET = $(srcdir)/xsl/xhtml-chunk.xsl
+XML_CATALOG_FILES = xsl/catalog.xml
+STYLESHEETS = $(CHUNK_XHTML_STYLESHEET) $(srcdir)/xsl/xhtml-common.xsl
+man_MANS = \
+	oprofile.1 \
+	opcontrol.1 \
+	opreport.1 \
+	opannotate.1 \
+	opgprof.1 \
+	ophelp.1 \
+	oparchive.1
+
+htmldir = $(prefix)/share/doc/oprofile
+dist_html_DATA = oprofile.html internals.html
+EXTRA_DIST = \
+	oprofile.1 \
+	oprofile.1.in \
+	oprofile.xml \
+	internals.xml \
+	xsl/catalog-1.xml.in \
+	xsl/xhtml.xsl \
+	xsl/xhtml-common.xsl \
+	xsl/xhtml-chunk.xsl \
+	srcdoc/Doxyfile.in \
+	srcdoc/Makefile
+
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  doc/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  doc/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+oprofile.1: $(top_builddir)/config.status $(srcdir)/oprofile.1.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+opcontrol.1: $(top_builddir)/config.status $(srcdir)/opcontrol.1.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+ophelp.1: $(top_builddir)/config.status $(srcdir)/ophelp.1.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+opreport.1: $(top_builddir)/config.status $(srcdir)/opreport.1.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+opannotate.1: $(top_builddir)/config.status $(srcdir)/opannotate.1.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+opgprof.1: $(top_builddir)/config.status $(srcdir)/opgprof.1.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+oparchive.1: $(top_builddir)/config.status $(srcdir)/oparchive.1.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+uninstall-info-am:
+install-man1: $(man1_MANS) $(man_MANS)
+	@$(NORMAL_INSTALL)
+	test -z "$(man1dir)" || $(mkdir_p) "$(DESTDIR)$(man1dir)"
+	@list='$(man1_MANS) $(dist_man1_MANS) $(nodist_man1_MANS)'; \
+	l2='$(man_MANS) $(dist_man_MANS) $(nodist_man_MANS)'; \
+	for i in $$l2; do \
+	  case "$$i" in \
+	    *.1*) list="$$list $$i" ;; \
+	  esac; \
+	done; \
+	for i in $$list; do \
+	  if test -f $(srcdir)/$$i; then file=$(srcdir)/$$i; \
+	  else file=$$i; fi; \
+	  ext=`echo $$i | sed -e 's/^.*\\.//'`; \
+	  case "$$ext" in \
+	    1*) ;; \
+	    *) ext='1' ;; \
+	  esac; \
+	  inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \
+	  inst=`echo $$inst | sed -e 's/^.*\///'`; \
+	  inst=`echo $$inst | sed '$(transform)'`.$$ext; \
+	  echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+	  $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst"; \
+	done
+uninstall-man1:
+	@$(NORMAL_UNINSTALL)
+	@list='$(man1_MANS) $(dist_man1_MANS) $(nodist_man1_MANS)'; \
+	l2='$(man_MANS) $(dist_man_MANS) $(nodist_man_MANS)'; \
+	for i in $$l2; do \
+	  case "$$i" in \
+	    *.1*) list="$$list $$i" ;; \
+	  esac; \
+	done; \
+	for i in $$list; do \
+	  ext=`echo $$i | sed -e 's/^.*\\.//'`; \
+	  case "$$ext" in \
+	    1*) ;; \
+	    *) ext='1' ;; \
+	  esac; \
+	  inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \
+	  inst=`echo $$inst | sed -e 's/^.*\///'`; \
+	  inst=`echo $$inst | sed '$(transform)'`.$$ext; \
+	  echo " rm -f '$(DESTDIR)$(man1dir)/$$inst'"; \
+	  rm -f "$(DESTDIR)$(man1dir)/$$inst"; \
+	done
+install-dist_htmlDATA: $(dist_html_DATA)
+	@$(NORMAL_INSTALL)
+	test -z "$(htmldir)" || $(mkdir_p) "$(DESTDIR)$(htmldir)"
+	@list='$(dist_html_DATA)'; for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  f=$(am__strip_dir) \
+	  echo " $(dist_htmlDATA_INSTALL) '$$d$$p' '$(DESTDIR)$(htmldir)/$$f'"; \
+	  $(dist_htmlDATA_INSTALL) "$$d$$p" "$(DESTDIR)$(htmldir)/$$f"; \
+	done
+
+uninstall-dist_htmlDATA:
+	@$(NORMAL_UNINSTALL)
+	@list='$(dist_html_DATA)'; for p in $$list; do \
+	  f=$(am__strip_dir) \
+	  echo " rm -f '$(DESTDIR)$(htmldir)/$$f'"; \
+	  rm -f "$(DESTDIR)$(htmldir)/$$f"; \
+	done
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+	$(mkdir_p) $(distdir)/srcdoc $(distdir)/xsl
+	@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
+	list='$(DISTFILES)'; for file in $$list; do \
+	  case $$file in \
+	    $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+	    $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
+	  esac; \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+	  if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+	    dir="/$$dir"; \
+	    $(mkdir_p) "$(distdir)$$dir"; \
+	  else \
+	    dir=''; \
+	  fi; \
+	  if test -d $$d/$$file; then \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(MANS) $(DATA)
+installdirs:
+	for dir in "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(htmldir)"; do \
+	  test -z "$$dir" || $(mkdir_p) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-local mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-local
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am: install-dist_htmlDATA install-man
+
+install-exec-am:
+
+install-info: install-info-am
+
+install-man: install-man1
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-dist_htmlDATA uninstall-info-am \
+	uninstall-local uninstall-man
+
+uninstall-man: uninstall-man1
+
+.PHONY: all all-am check check-am clean clean-generic clean-local \
+	distclean distclean-generic distclean-local distdir dvi dvi-am \
+	html html-am info info-am install install-am install-data \
+	install-data-am install-dist_htmlDATA install-exec \
+	install-exec-am install-info install-info-am install-man \
+	install-man1 install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-generic pdf pdf-am ps ps-am uninstall \
+	uninstall-am uninstall-dist_htmlDATA uninstall-info-am \
+	uninstall-local uninstall-man uninstall-man1
+
+
+@have_xsltproc_TRUE@oprofile.html: ${top_srcdir}/doc/oprofile.xml
+@have_xsltproc_TRUE@	XML_CATALOG_FILES=$(XML_CATALOG_FILES) $(XSLTPROC) $(XSLTPROC_FLAGS) -o $@ --stringparam version @VERSION@ $(XHTML_STYLESHEET) $<
+
+@have_xsltproc_TRUE@internals.html: ${top_srcdir}/doc/internals.xml
+@have_xsltproc_TRUE@	XML_CATALOG_FILES=$(XML_CATALOG_FILES) $(XSLTPROC) $(XSLTPROC_FLAGS) -o $@ --stringparam version @VERSION@ $(XHTML_STYLESHEET) $<
+
+# rules to generate oprofile.sf.net/doc files
+
+@have_xsltproc_TRUE@doc/index.html: ${top_srcdir}/doc/oprofile.xml
+@have_xsltproc_TRUE@	-mkdir doc/
+@have_xsltproc_TRUE@	$(XSLTPROC) -o doc/ $(XSLTPROC_FLAGS) --stringparam version @VERSION@ $(CHUNK_XHTML_STYLESHEET) $<
+
+@have_xsltproc_TRUE@doc/internals/index.html: ${top_srcdir}/doc/internals.xml
+@have_xsltproc_TRUE@	-mkdir doc/internals/
+@have_xsltproc_TRUE@	$(XSLTPROC) -o doc/internals/ $(XSLTPROC_FLAGS) --stringparam version @VERSION@ $(CHUNK_XHTML_STYLESHEET) $<
+
+@have_xsltproc_TRUE@chunk: doc/index.html doc/internals/index.html
+@have_xsltproc_TRUE@	cp ${top_srcdir}/doc/buffers.png doc/internals/
+
+@have_xsltproc_FALSE@oprofile.html:
+@have_xsltproc_FALSE@	touch $@
+
+@have_xsltproc_FALSE@internals.html:
+@have_xsltproc_FALSE@	touch $@
+
+@have_xsltproc_FALSE@chunk:
+
+distclean-local:
+	$(RM) -f xsl/catalog-1.xml xsl/catalog.xml
+
+clean-local:
+	$(RM) -f $(generated_mans)
+
+# these files are not cleaned by make uninstall automake bug ?
+uninstall-local:
+	rm -f @mandir@/cat1/oprofile.1.gz
+	@for f in $(LINK_LIST); do		\
+		rm -f $(CATDIR)/cat1/$$f.gz;	\
+	done
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/doc/internals.html b/doc/internals.html
new file mode 100644
index 0000000..2305168
--- /dev/null
+++ b/doc/internals.html
@@ -0,0 +1,1616 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+    <title>OProfile Internals</title>
+    <meta name="generator" content="DocBook XSL Stylesheets V1.68.1" />
+  </head>
+  <body>
+    <div class="book" lang="en" xml:lang="en">
+      <div class="titlepage">
+        <div>
+          <div>
+            <h1 class="title"><a id="oprofile-internals"></a>OProfile Internals</h1>
+          </div>
+          <div>
+            <div class="authorgroup">
+              <div class="author">
+                <h3 class="author"><span class="firstname">John</span> <span class="surname">Levon</span></h3>
+                <div class="affiliation">
+                  <div class="address">
+                    <p>
+                      <code class="email">&lt;<a href="mailto:levon@movementarian.org">levon@movementarian.org</a>&gt;</code>
+                    </p>
+                  </div>
+                </div>
+              </div>
+            </div>
+          </div>
+          <div>
+            <p class="copyright">Copyright © 2003 John Levon</p>
+          </div>
+        </div>
+        <hr />
+      </div>
+      <div class="toc">
+        <p>
+          <b>Table of Contents</b>
+        </p>
+        <dl>
+          <dt>
+            <span class="chapter">
+              <a href="#introduction">1. Introduction</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#overview">1. Overview</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#components">2. Components of the OProfile system</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#arch-specific-components">2.1. Architecture-specific components</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#filesystem">2.2. oprofilefs</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#driver">2.3. Generic kernel driver</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#daemon">2.4. The OProfile daemon</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#post-profiling">2.5. Post-profiling tools</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#performance-counters">2. Performance counter management</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#performance-counters-ui">1. Providing a user interface</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#performance-counters-programming">2. Programming the performance counter registers</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#performance-counters-start">2.1. Starting and stopping the counters</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#id2495021">2.2. IA64 and perfmon</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#collecting-samples">3. Collecting and processing samples</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#receiving-interrupts">1. Receiving interrupts</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#core-structure">2. Core data structures</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#logging-sample">3. Logging a sample</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#logging-stack">4. Logging stack traces</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#synchronising-buffers">5. Synchronising the CPU buffers to the event buffer</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#dentry-cookies">6. Identifying binary images</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#finding-dentry">7. Finding a sample's binary image and offset</a>
+                </span>
+              </dt>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#sample-files">4. Generating sample files</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#processing-buffer">1. Processing the buffer</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#handling-kernel-samples">1.1. Handling kernel samples</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#sample-file-generation">2. Locating and creating sample files</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#sample-file-writing">3. Writing data to a sample file</a>
+                </span>
+              </dt>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#output">5. Generating useful output</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#profile-specification">1. Handling the profile specification</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#sample-file-collating">2. Collating the candidate sample files</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#sample-file-classifying">2.1. Classifying sample files</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#sample-file-inverting">2.2. Creating inverted profile lists</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#generating-profile-data">3. Generating profile data</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#bfd">3.1. Processing the binary image</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#processing-sample-files">3.2. Processing the sample files</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#generating-output">4. Generating output</a>
+                </span>
+              </dt>
+            </dl>
+          </dd>
+          <dt>
+            <span class="glossary">
+              <a href="#glossary">Glossary of OProfile source concepts and types</a>
+            </span>
+          </dt>
+        </dl>
+      </div>
+      <div class="list-of-figures">
+        <p>
+          <b>List of Figures</b>
+        </p>
+        <dl>
+          <dt>3.1. <a href="#id2495193">The OProfile buffers</a></dt>
+        </dl>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="introduction"></a>Chapter 1. Introduction</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#overview">1. Overview</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#components">2. Components of the OProfile system</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#arch-specific-components">2.1. Architecture-specific components</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#filesystem">2.2. oprofilefs</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#driver">2.3. Generic kernel driver</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#daemon">2.4. The OProfile daemon</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#post-profiling">2.5. Post-profiling tools</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+          </dl>
+        </div>
+        <p>
+This document is current for OProfile version 0.9.1cvs.
+This document provides some details on the internal workings of OProfile for the
+interested hacker. This document assumes strong C, working C++, plus some knowledge of
+kernel internals and CPU hardware.
+</p>
+        <div class="note" style="margin-left: 0.5in; margin-right: 0.5in;">
+          <h3 class="title">Note</h3>
+          <p>
+Only the "new" implementation associated with kernel 2.6 and above is covered here. 2.4
+uses a very different kernel module implementation and daemon to produce the sample files.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="overview"></a>1. Overview</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+OProfile is a statistical continuous profiler. In other words, profiles are generated by
+regularly sampling the current registers on each CPU (from an interrupt handler, the
+saved PC value at the time of interrupt is stored), and converting that runtime PC
+value into something meaningful to the programmer.
+</p>
+          <p>
+OProfile achieves this by taking the stream of sampled PC values, along with the detail
+of which task was running at the time of the interrupt, and converting into a file offset
+against a particular binary file. Because applications <code class="function">mmap()</code>
+the code they run (be it <code class="filename">/bin/bash</code>, <code class="filename">/lib/libfoo.so</code>
+or whatever), it's possible to find the relevant binary file and offset by walking
+the task's list of mapped memory areas. Each PC value is thus converted into a tuple
+of binary-image,offset. This is something that the userspace tools can use directly
+to reconstruct where the code came from, including the particular assembly instructions,
+symbol, and source line (via the binary's debug information if present).
+</p>
+          <p>
+Regularly sampling the PC value like this approximates what actually was executed and
+how often - more often than not, this statistical approximation is good enough to
+reflect reality. In common operation, the time between each sample interrupt is regulated
+by a fixed number of clock cycles. This implies that the results will reflect where
+the CPU is spending the most time; this is obviously a very useful information source
+for performance analysis.
+</p>
+          <p>
+Sometimes though, an application programmer needs different kinds of information: for example,
+"which of the source routines cause the most cache misses ?". The rise in importance of
+such metrics in recent years has led many CPU manufacturers to provide hardware performance
+counters capable of measuring these events on the hardware level. Typically, these counters
+increment once per each event, and generate an interrupt on reaching some pre-defined
+number of events. OProfile can use these interrupts to generate samples: then, the
+profile results are a statistical approximation of which code caused how many of the
+given event.
+</p>
+          <p>
+Consider a simplified system that only executes two functions A and B. A
+takes one cycle to execute, whereas B takes 99 cycles. Imagine we run at
+100 cycles a second, and we've set the performance counter to create an
+interrupt after a set number of "events" (in this case an event is one
+clock cycle). It should be clear that the chances of the interrupt
+occurring in function A is 1/100, and 99/100 for function B. Thus, we
+statistically approximate the actual relative performance features of
+the two functions over time. This same analysis works for other types of
+events, providing that the interrupt is tied to the number of events
+occurring (that is, after N events, an interrupt is generated).
+</p>
+          <p>
+There are typically more than one of these counters, so it's possible to set up profiling
+for several different event types. Using these counters gives us a powerful, low-overhead
+way of gaining performance metrics. If OProfile, or the CPU, does not support performance
+counters, then a simpler method is used: the kernel timer interrupt feeds samples
+into OProfile itself.
+</p>
+          <p>
+The rest of this document concerns itself with how we get from receiving samples at
+interrupt time to producing user-readable profile information.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="components"></a>2. Components of the OProfile system</h2>
+              </div>
+            </div>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="arch-specific-components"></a>2.1. Architecture-specific components</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+If OProfile supports the hardware performance counters found on
+a particular architecture, code for managing the details of setting
+up and managing these counters can be found in the kernel source
+tree in the relevant <code class="filename">arch/<span class="emphasis"><em>arch</em></span>/oprofile/</code>
+directory. The architecture-specific implementation works via
+filling in the oprofile_operations structure at init time. This
+provides a set of operations such as <code class="function">setup()</code>,
+<code class="function">start()</code>, <code class="function">stop()</code>, etc.
+that manage the hardware-specific details of fiddling with the
+performance counter registers.
+</p>
+            <p>
+The other important facility available to the architecture code is
+<code class="function">oprofile_add_sample()</code>.  This is where a particular sample
+taken at interrupt time is fed into the generic OProfile driver code.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="filesystem"></a>2.2. oprofilefs</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+OProfile implements a pseudo-filesystem known as "oprofilefs", mounted from
+userspace at <code class="filename">/dev/oprofile</code>. This consists of small
+files for reporting and receiving configuration from userspace, as well
+as the actual character device that the OProfile userspace receives samples
+from. At <code class="function">setup()</code> time, the architecture-specific may
+add further configuration files related to the details of the performance
+counters. For example, on x86, one numbered directory for each hardware
+performance counter is added, with files in each for the event type,
+reset value, etc.
+</p>
+            <p>
+The filesystem also contains a <code class="filename">stats</code> directory with
+a number of useful counters for various OProfile events.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="driver"></a>2.3. Generic kernel driver</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+This lives in <code class="filename">drivers/oprofile/</code>, and forms the core of
+how OProfile works in the kernel. Its job is to take samples delivered
+from the architecture-specific code (via <code class="function">oprofile_add_sample()</code>),
+and buffer this data, in a transformed form as described later, until releasing
+the data to the userspace daemon via the <code class="filename">/dev/oprofile/buffer</code>
+character device.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="daemon"></a>2.4. The OProfile daemon</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The OProfile userspace daemon's job is to take the raw data provided by the
+kernel and write it to the disk. It takes the single data stream from the
+kernel and logs sample data against a number of sample files (found in
+<code class="filename">/var/lib/oprofile/samples/current/</code>. For the benefit
+of the "separate" functionality, the names/paths of these sample files
+are mangled to reflect where the samples were from: this can include
+thread IDs, the binary file path, the event type used, and more.
+</p>
+            <p>
+After this final step from interrupt to disk file, the data is now
+persistent (that is, changes in the running of the system do not invalidate
+stored data). So the post-profiling tools can run on this data at any
+time (assuming the original binary files are still available and unchanged,
+naturally).
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en"><div class="titlepage"><div><div><h3 class="title"><a id="post-profiling"></a>2.5. Post-profiling tools</h3></div></div></div>
+So far, we've collected data, but we've yet to present it in a useful form
+to the user. This is the job of the post-profiling tools. In general form,
+they collate a subset of the available sample files, load and process each one
+correlated against the relevant binary file, and finally produce user-readable
+information.
+</div>
+        </div>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="performance-counters"></a>Chapter 2. Performance counter management</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#performance-counters-ui">1. Providing a user interface</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#performance-counters-programming">2. Programming the performance counter registers</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#performance-counters-start">2.1. Starting and stopping the counters</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#id2495021">2.2. IA64 and perfmon</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+          </dl>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="performance-counters-ui"></a>1. Providing a user interface</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+The performance counter registers need programming in order to set the
+type of event to count, etc. OProfile uses a standard model across all
+CPUs for defining these events as follows :
+</p>
+          <div class="informaltable">
+            <table border="1">
+              <colgroup>
+                <col />
+                <col />
+              </colgroup>
+              <tbody>
+                <tr>
+                  <td>
+                    <code class="option">event</code>
+                  </td>
+                  <td>The event type e.g. DATA_MEM_REFS</td>
+                </tr>
+                <tr>
+                  <td>
+                    <code class="option">unit mask</code>
+                  </td>
+                  <td>The sub-events to count (more detailed specification)</td>
+                </tr>
+                <tr>
+                  <td>
+                    <code class="option">counter</code>
+                  </td>
+                  <td>The hardware counter(s) that can count this event</td>
+                </tr>
+                <tr>
+                  <td>
+                    <code class="option">count</code>
+                  </td>
+                  <td>The reset value (how many events before an interrupt)</td>
+                </tr>
+                <tr>
+                  <td>
+                    <code class="option">kernel</code>
+                  </td>
+                  <td>Whether the counter should increment when in kernel space</td>
+                </tr>
+                <tr>
+                  <td>
+                    <code class="option">user</code>
+                  </td>
+                  <td>Whether the counter should increment when in user space</td>
+                </tr>
+              </tbody>
+            </table>
+          </div>
+          <p>
+The term "unit mask" is borrowed from the Intel architectures, and can
+further specify exactly when a counter is incremented (for example,
+cache-related events can be restricted to particular state transitions
+of the cache lines).
+</p>
+          <p>
+All of the available hardware events and their details are specified in
+the textual files in the <code class="filename">events</code> directory. The
+syntax of these files should be fairly obvious. The user specifies the
+names and configuration details of the chosen counters via
+<span><strong class="command">opcontrol</strong></span>. These are then written to the kernel
+module (in numerical form) via <code class="filename">/dev/oprofile/N/</code>
+where N is the physical hardware counter (some events can only be used
+on specific counters; OProfile hides these details from the user when
+possible). On IA64, the perfmon-based interface behaves somewhat
+differently, as described later.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="performance-counters-programming"></a>2. Programming the performance counter registers</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+We have described how the user interface fills in the desired
+configuration of the counters and transmits the information to the
+kernel. It is the job of the <code class="function">-&gt;setup()</code> method
+to actually program the performance counter registers. Clearly, the
+details of how this is done is architecture-specific; it is also
+model-specific on many architectures. For example, i386 provides methods
+for each model type that programs the counter registers correctly
+(see the <code class="filename">op_model_*</code> files in
+<code class="filename">arch/i386/oprofile</code> for the details). The method
+reads the values stored in the virtual oprofilefs files and programs
+the registers appropriately, ready for starting the actual profiling
+session.
+</p>
+          <p>
+The architecture-specific drivers make sure to save the old register
+settings before doing OProfile setup. They are restored when OProfile
+shuts down. This is useful, for example, on i386, where the NMI watchdog
+uses the same performance counter registers as OProfile; they cannot
+run concurrently, but OProfile makes sure to restore the setup it found
+before it was running.
+</p>
+          <p>
+In addition to programming the counter registers themselves, other setup
+is often necessary. For example, on i386, the local APIC needs
+programming in order to make the counter's overflow interrupt appear as
+an NMI (non-maskable interrupt). This allows sampling (and therefore
+profiling) of regions where "normal" interrupts are masked, enabling
+more reliable profiles.
+</p>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="performance-counters-start"></a>2.1. Starting and stopping the counters</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Initiating a profiling session is done via writing an ASCII '1'
+to the file <code class="filename">/dev/oprofile/enable</code>. This sets up the
+core, and calls into the architecture-specific driver to actually
+enable each configured counter. Again, the details of how this is
+done is model-specific (for example, the Athlon models can disable
+or enable on a per-counter basis, unlike the PPro models).
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="id2495021"></a>2.2. IA64 and perfmon</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The IA64 architecture provides a different interface from the other
+architectures, using the existing perfmon driver. Register programming
+is handled entirely in user-space (see
+<code class="filename">daemon/opd_perfmon.c</code> for the details). A process
+is forked for each CPU, which creates a perfmon context and sets the
+counter registers appropriately via the
+<code class="function">sys_perfmonctl</code> interface. In addition, the actual
+initiation and termination of the profiling session is handled via the
+same interface using <code class="constant">PFM_START</code> and
+<code class="constant">PFM_STOP</code>. On IA64, then, there are no oprofilefs
+files for the performance counters, as the kernel driver does not
+program the registers itself.
+</p>
+            <p>
+Instead, the perfmon driver for OProfile simply registers with the
+OProfile core with an OProfile-specific UUID. During a profiling
+session, the perfmon core calls into the OProfile perfmon driver and
+samples are registered with the OProfile core itself as usual (with
+<code class="function">oprofile_add_sample()</code>).
+</p>
+          </div>
+        </div>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="collecting-samples"></a>Chapter 3. Collecting and processing samples</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#receiving-interrupts">1. Receiving interrupts</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#core-structure">2. Core data structures</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#logging-sample">3. Logging a sample</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#logging-stack">4. Logging stack traces</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#synchronising-buffers">5. Synchronising the CPU buffers to the event buffer</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#dentry-cookies">6. Identifying binary images</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#finding-dentry">7. Finding a sample's binary image and offset</a>
+              </span>
+            </dt>
+          </dl>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="receiving-interrupts"></a>1. Receiving interrupts</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+Naturally, how the overflow interrupts are received is specific
+to the hardware architecture, unless we are in "timer" mode, where the
+logging routine is called directly from the standard kernel timer
+interrupt handler.
+</p>
+          <p>
+On the i386 architecture, the local APIC is programmed such that when a
+counter overflows (that is, it receives an event that causes an integer
+overflow of the register value to zero), an NMI is generated. This calls
+into the general handler <code class="function">do_nmi()</code>; because OProfile
+has registered itself as capable of handling NMI interrupts, this will
+call into the OProfile driver code in
+<code class="filename">arch/i386/oprofile</code>. Here, the saved PC value (the
+CPU saves the register set at the time of interrupt on the stack
+available for inspection) is extracted, and the counters are examined to
+find out which one generated the interrupt. Also determined is whether
+the system was inside kernel or user space at the time of the interrupt.
+These three pieces of information are then forwarded onto the OProfile
+core via <code class="function">oprofile_add_sample()</code>. Finally, the
+counter values are reset to the chosen count value, to ensure another
+interrupt happens after another N events have occurred. Other
+architectures behave in a similar manner.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="core-structure"></a>2. Core data structures</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+Before considering what happens when we log a sample, we shall digress
+for a moment and look at the general structure of the data collection
+system.
+</p>
+          <p>
+OProfile maintains a small buffer for storing the logged samples for
+each CPU on the system. Only this buffer is altered when we actually log
+a sample (remember, we may still be in an NMI context, so no locking is
+possible). The buffer is managed by a two-handed system; the "head"
+iterator dictates where the next sample data should be placed in the
+buffer. Of course, overflow of the buffer is possible, in which case
+the sample is discarded.
+</p>
+          <p>
+It is critical to remember that at this point, the PC value is an
+absolute value, and is therefore only meaningful in the context of which
+task it was logged against. Thus, these per-CPU buffers also maintain
+details of which task each logged sample is for, as described in the
+next section. In addition, we store whether the sample was in kernel
+space or user space (on some architectures and configurations, the address
+space is not sub-divided neatly at a specific PC value, so we must store
+this information).
+</p>
+          <p>
+As well as these small per-CPU buffers, we have a considerably larger
+single buffer. This holds the data that is eventually copied out into
+the OProfile daemon. On certain system events, the per-CPU buffers are
+processed and entered (in mutated form) into the main buffer, known in
+the source as the "event buffer". The "tail" iterator indicates the
+point from which the CPU may be read, up to the position of the "head"
+iterator. This provides an entirely lock-free method for extracting data
+from the CPU buffers. This process is described in detail later in this chapter.
+</p>
+          <div class="figure">
+            <a id="id2495193"></a>
+            <p class="title">
+              <b>Figure 3.1. The OProfile buffers</b>
+            </p>
+            <div>
+              <img src="buffers.png" alt="The OProfile buffers" />
+            </div>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="logging-sample"></a>3. Logging a sample</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+As mentioned, the sample is logged into the buffer specific to the
+current CPU. The CPU buffer is a simple array of pairs of unsigned long
+values; for a sample, they hold the PC value and the counter for the
+sample. (The counter value is later used to translate back into the relevant
+event type the counter was programmed to).
+</p>
+          <p>
+In addition to logging the sample itself, we also log task switches.
+This is simply done by storing the address of the last task to log a
+sample on that CPU in a data structure, and writing a task switch entry
+into the buffer if the new value of <code class="function">current()</code> has
+changed. Note that later we will directly de-reference this pointer;
+this imposes certain restrictions on when and how the CPU buffers need
+to be processed.
+</p>
+          <p>
+Finally, as mentioned, we log whether we have changed between kernel and
+userspace using a similar method. Both of these variables
+(<code class="varname">last_task</code> and <code class="varname">last_is_kernel</code>) are
+reset when the CPU buffer is read.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="logging-stack"></a>4. Logging stack traces</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+OProfile can also provide statistical samples of call chains (on x86). To
+do this, at sample time, the frame pointer chain is traversed, recording
+the return address for each stack frame. This will only work if the code
+was compiled with frame pointers, but we're careful to abort the
+traversal if the frame pointer appears bad. We store the set of return
+addresses straight into the CPU buffer. Note that, since this traversal
+is keyed off the standard sample interrupt, the number of times a
+function appears in a stack trace is not an indicator of how many times
+the call site was executed: rather, it's related to the number of
+samples we took where that call site was involved. Thus, the results for
+stack traces are not necessarily proportional to the call counts:
+typical programs will have many <code class="function">main()</code> samples.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="synchronising-buffers"></a>5. Synchronising the CPU buffers to the event buffer</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+At some point, we have to process the data in each CPU buffer and enter
+it into the main (event) buffer. The file
+<code class="filename">buffer_sync.c</code> contains the relevant code. We
+periodically (currently every <code class="constant">HZ</code>/4 jiffies) start
+the synchronisation process. In addition, we process the buffers on
+certain events, such as an application calling
+<code class="function">munmap()</code>. This is particularly important for
+<code class="function">exit()</code> - because the CPU buffers contain pointers
+to the task structure, if we don't process all the buffers before the
+task is actually destroyed and the task structure freed, then we could
+end up trying to dereference a bogus pointer in one of the CPU buffers.
+</p>
+          <p>
+We also add a notification when a kernel module is loaded; this is so
+that user-space can re-read <code class="filename">/proc/modules</code> to
+determine the load addresses of kernel module text sections. Without
+this notification, samples for a newly-loaded module could get lost or
+be attributed to the wrong module.
+</p>
+          <p>
+The synchronisation itself works in the following manner: first, mutual
+exclusion on the event buffer is taken. Remember, we do not need to do
+that for each CPU buffer, as we only read from the tail iterator (whilst
+interrupts might be arriving at the same buffer, but they will write to
+the position of the head iterator, leaving previously written entries
+intact). Then, we process each CPU buffer in turn. A CPU switch
+notification is added to the buffer first (for
+<code class="option">--separate=cpu</code> support). Then the processing of the
+actual data starts.
+</p>
+          <p>
+As mentioned, the CPU buffer consists of task switch entries and the
+actual samples. When the routine <code class="function">sync_buffer()</code> sees
+a task switch, the process ID and process group ID are recorded into the
+event buffer, along with a dcookie (see below) identifying the
+application binary (e.g. <code class="filename">/bin/bash</code>). The
+<code class="varname">mmap_sem</code> for the task is then taken, to allow safe
+iteration across the tasks' list of mapped areas. Each sample is then
+processed as described in the next section.
+</p>
+          <p>
+After a buffer has been read, the tail iterator is updated to reflect
+how much of the buffer was processed. Note that when we determined how
+much data there was to read in the CPU buffer, we also called
+<code class="function">cpu_buffer_reset()</code> to reset
+<code class="varname">last_task</code> and <code class="varname">last_is_kernel</code>, as
+we've already mentioned. During the processing, more samples may have
+been arriving in the CPU buffer; this is OK because we are careful to
+only update the tail iterator to how much we actually read - on the next
+buffer synchronisation, we will start again from that point.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="dentry-cookies"></a>6. Identifying binary images</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+In order to produce useful profiles, we need to be able to associate a
+particular PC value sample with an actual ELF binary on the disk. This
+leaves us with the problem of how to export this information to
+user-space. We create unique IDs that identify a particular directory
+entry (dentry), and write those IDs into the event buffer. Later on,
+the user-space daemon can call the <code class="function">lookup_dcookie</code>
+system call, which looks up the ID and fills in the full path of
+the binary image in the buffer user-space passes in. These IDs are
+maintained by the code in <code class="filename">fs/dcookies.c</code>; the
+cache lasts for as long as the daemon has the event buffer open.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="finding-dentry"></a>7. Finding a sample's binary image and offset</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+We haven't yet described how we process the absolute PC value into
+something usable by the user-space daemon. When we find a sample entered
+into the CPU buffer, we traverse the list of mappings for the task
+(remember, we will have seen a task switch earlier, so we know which
+task's lists to look at). When a mapping is found that contains the PC
+value, we look up the mapped file's dentry in the dcookie cache. This
+gives the dcookie ID that will uniquely identify the mapped file. Then
+we alter the absolute value such that it is an offset from the start of
+the file being mapped (the mapping need not start at the start of the
+actual file, so we have to consider the offset value of the mapping). We
+store this dcookie ID into the event buffer; this identifies which
+binary the samples following it are against.
+In this manner, we have converted a PC value, which has transitory
+meaning only, into a static offset value for later processing by the
+daemon.
+</p>
+          <p>
+We also attempt to avoid the relatively expensive lookup of the dentry
+cookie value by storing the cookie value directly into the dentry
+itself; then we can simply derive the cookie value immediately when we
+find the correct mapping.
+</p>
+        </div>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="sample-files"></a>Chapter 4. Generating sample files</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#processing-buffer">1. Processing the buffer</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#handling-kernel-samples">1.1. Handling kernel samples</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#sample-file-generation">2. Locating and creating sample files</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#sample-file-writing">3. Writing data to a sample file</a>
+              </span>
+            </dt>
+          </dl>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="processing-buffer"></a>1. Processing the buffer</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+Now we can move onto user-space in our description of how raw interrupt
+samples are processed into useful information. As we described in
+previous sections, the kernel OProfile driver creates a large buffer of
+sample data consisting of offset values, interspersed with
+notification of changes in context. These context changes indicate how
+following samples should be attributed, and include task switches, CPU
+changes, and which dcookie the sample value is against. By processing
+this buffer entry-by-entry, we can determine where the samples should
+be accredited to. This is particularly important when using the 
+<code class="option">--separate</code>.
+</p>
+          <p>
+The file <code class="filename">daemon/opd_trans.c</code> contains the basic routine
+for the buffer processing. The <code class="varname">struct transient</code>
+structure is used to hold changes in context. Its members are modified
+as we process each entry; it is passed into the routines in
+<code class="filename">daemon/opd_sfile.c</code> for actually logging the sample
+to a particular sample file (which will be held in
+<code class="filename">/var/lib/oprofile/samples/current</code>).
+</p>
+          <p>
+The buffer format is designed for conciseness, as high sampling rates
+can easily generate a lot of data. Thus, context changes are prefixed
+by an escape code, identified by <code class="function">is_escape_code()</code>.
+If an escape code is found, the next entry in the buffer identifies
+what type of context change is being read. These are handed off to
+various handlers (see the <code class="varname">handlers</code> array), which
+modify the transient structure as appropriate. If it's not an escape
+code, then it must be a PC offset value, and the very next entry will
+be the numeric hardware counter. These values are read and recorded
+in the transient structure; we then do a lookup to find the correct
+sample file, and log the sample, as described in the next section.
+</p>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="handling-kernel-samples"></a>1.1. Handling kernel samples</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Samples from kernel code require a little special handling. Because
+the binary text which the sample is against does not correspond to
+any file that the kernel directly knows about, the OProfile driver
+stores the absolute PC value in the buffer, instead of the file offset.
+Of course, we need an offset against some particular binary. To handle
+this, we keep a list of loaded modules by parsing
+<code class="filename">/proc/modules</code> as needed. When a module is loaded,
+a notification is placed in the OProfile buffer, and this triggers a
+re-read. We store the module name, and the loading address and size.
+This is also done for the main kernel image, as specified by the user.
+The absolute PC value is matched against each address range, and
+modified into an offset when the matching module is found. See 
+<code class="filename">daemon/opd_kernel.c</code> for the details.
+</p>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="sample-file-generation"></a>2. Locating and creating sample files</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+We have a sample value and its satellite data stored in a
+<code class="varname">struct transient</code>, and we must locate an
+actual sample file to store the sample in, using the context
+information in the transient structure as a key. The transient data to
+sample file lookup is handled in
+<code class="filename">daemon/opd_sfile.c</code>. A hash is taken of the
+transient values that are relevant (depending upon the setting of
+<code class="option">--separate</code>, some values might be irrelevant), and the
+hash value is used to lookup the list of currently open sample files.
+Of course, the sample file might not be found, in which case we need
+to create and open it.
+</p>
+          <p>
+OProfile uses a rather complex scheme for naming sample files, in order
+to make selecting relevant sample files easier for the post-profiling
+utilities. The exact details of the scheme are given in
+<code class="filename">oprofile-tests/pp_interface</code>, but for now it will
+suffice to remember that the filename will include only relevant
+information for the current settings, taken from the transient data. A
+fully-specified filename looks something like :
+</p>
+          <code class="computeroutput">
+/var/lib/oprofile/samples/current/{root}/usr/bin/xmms/{dep}/{root}/lib/tls/libc-2.3.2.so/CPU_CLK_UNHALTED.100000.0.28082.28089.0
+</code>
+          <p>
+It should be clear that this identifies such information as the
+application binary, the dependent (library) binary, the hardware event,
+and the process and thread ID. Typically, not all this information is
+needed, in which cases some values may be replaced with the token
+<code class="filename">all</code>.
+</p>
+          <p>
+The code that generates this filename and opens the file is found in
+<code class="filename">daemon/opd_mangling.c</code>. You may have realised that
+at this point, we do not have the binary image file names, only the
+dcookie values. In order to determine a file name, a dcookie value is
+looked up in the dcookie cache. This is to be found in
+<code class="filename">daemon/opd_cookie.c</code>. Since dcookies are both
+persistent and unique during a sampling session, we can cache the
+values. If the value is not found in the cache, then we ask the kernel
+to do the lookup from value to file name for us by calling
+<code class="function">lookup_dcookie()</code>. This looks up the value in a
+kernel-side cache (see <code class="filename">fs/dcookies.c</code>) and returns
+the fully-qualified file name to userspace.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="sample-file-writing"></a>3. Writing data to a sample file</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+Each specific sample file is a hashed collection, where the key is
+the PC offset from the transient data, and the value is the number of
+samples recorded against that offset. The files are
+<code class="function">mmap()</code>ed into the daemon's memory space. The code
+to actually log the write against the sample file can be found in
+<code class="filename">libdb/</code>.
+</p>
+          <p>
+For recording stack traces, we have a more complicated sample filename
+mangling scheme that allows us to identify cross-binary calls. We use
+the same sample file format, where the key is a 64-bit value composed
+from the from,to pair of offsets.
+</p>
+        </div>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="output"></a>Chapter 5. Generating useful output</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#profile-specification">1. Handling the profile specification</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#sample-file-collating">2. Collating the candidate sample files</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#sample-file-classifying">2.1. Classifying sample files</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#sample-file-inverting">2.2. Creating inverted profile lists</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#generating-profile-data">3. Generating profile data</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#bfd">3.1. Processing the binary image</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#processing-sample-files">3.2. Processing the sample files</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#generating-output">4. Generating output</a>
+              </span>
+            </dt>
+          </dl>
+        </div>
+        <p>
+All of the tools used to generate human-readable output have to take
+roughly the same steps to collect the data for processing. First, the
+profile specification given by the user has to be parsed. Next, a list
+of sample files matching the specification has to obtained. Using this
+list, we need to locate the binary file for each sample file, and then
+use them to extract meaningful data, before a final collation and
+presentation to the user.
+</p>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="profile-specification"></a>1. Handling the profile specification</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+The profile specification presented by the user is parsed in
+the function <code class="function">profile_spec::create()</code>. This
+creates an object representing the specification. Then we
+use <code class="function">profile_spec::generate_file_list()</code>
+to search for all sample files and match them against the
+<code class="varname">profile_spec</code>.
+</p>
+          <p>
+To enable this matching process to work, the attributes of
+each sample file is encoded in its filename. This is a low-tech
+approach to matching specifications against candidate sample
+files, but it works reasonably well. A typical sample file
+might look like these:
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+/var/lib/oprofile/samples/current/{root}/bin/ls/{dep}/{root}/bin/ls/{cg}/{root}/bin/ls/CPU_CLK_UNHALTED.100000.0.all.all.all
+/var/lib/oprofile/samples/current/{root}/bin/ls/{dep}/{root}/bin/ls/CPU_CLK_UNHALTED.100000.0.all.all.all
+/var/lib/oprofile/samples/current/{root}/bin/ls/{dep}/{root}/bin/ls/CPU_CLK_UNHALTED.100000.0.7423.7424.0
+/var/lib/oprofile/samples/current/{kern}/r128/{dep}/{kern}/r128/CPU_CLK_UNHALTED.100000.0.all.all.all
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+This looks unnecessarily complex, but it's actually fairly simple. First
+we have the session of the sample, here
+<code class="filename">/var/lib/oprofile/samples/current</code>. This could
+equally well be inside an archive from <span><strong class="command">oparchive</strong></span>.
+Next we have one of the tokens <code class="filename">{root}</code> or
+<code class="filename">{kern}</code>. <code class="filename">{root}</code> indicates
+that the binary is found on a file system, and we will encode its path
+in the next section (e.g. <code class="filename">/bin/ls</code>).
+<code class="filename">{kern}</code> indicates a kernel module - on 2.6 kernels
+the path information is not available from the kernel, so we have to
+special-case kernel modules like this; we encode merely the name of the
+module as loaded.
+</p>
+          <p>
+Next there is a <code class="filename">{dep}</code> token, indicating another
+token/path which identifies the dependent binary image. This is used even for
+the "primary" binary (i.e. the one that was
+<code class="function">execve()</code>d), as it simplifies processing. Finally,
+if this sample file is a normal flat profile, the actual file is next in
+the path. If it's a call-graph sample file, we need one further
+specification, to allow us to identify cross-binary arcs in the call
+graph.
+</p>
+          <p>
+The actual sample file name is dot-separated, where the fields are, in
+order: event name, event count, unit mask, task group ID, task ID, and
+CPU number.
+</p>
+          <p>
+This sample file can be reliably parsed (with
+<code class="function">parse_filename()</code>) into a
+<code class="varname">filename_spec</code>. Finally, we can check whether to
+include the sample file in the final results by comparing this
+<code class="varname">filename_spec</code> against the
+<code class="varname">profile_spec</code> the user specified (for the interested,
+see <code class="function">valid_candidate()</code> and
+<code class="function">profile_spec::match</code>). Then comes the really
+complicated bit...
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="sample-file-collating"></a>2. Collating the candidate sample files</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+At this point we have a duplicate-free list of sample files we need
+to process. But first we need to do some further arrangement: we
+need to classify each sample file, and we may also need to "invert"
+the profiles.
+</p>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="sample-file-classifying"></a>2.1. Classifying sample files</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+It's possible for utilities like <span><strong class="command">opreport</strong></span> to show 
+data in columnar format: for example, we might want to show the results
+of two threads within a process side-by-side. To do this, we need
+to classify each sample file into classes - the classes correspond
+with each <span><strong class="command">opreport</strong></span> column. The function that handles
+this is <code class="function">arrange_profiles()</code>. Each sample file
+is added to a particular class. If the sample file is the first in
+its class, a template is generated from the sample file. Each template
+describes a particular class (thus, in our example above, each template
+will have a different thread ID, and this uniquely identifies each
+class).
+</p>
+            <p>
+Each class has a list of "profile sets" matching that class's template.
+A profile set is either a profile of the primary binary image, or any of
+its dependent images. After all sample files have been listed in one of
+the profile sets belonging to the classes, we have to name each class and
+perform error-checking. This is done by
+<code class="function">identify_classes()</code>; each class is checked to ensure
+that its "axis" is the same as all the others. This is needed because
+<span><strong class="command">opreport</strong></span> can't produce results in 3D format: we can
+only differ in one aspect, such as thread ID or event name.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="sample-file-inverting"></a>2.2. Creating inverted profile lists</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Remember that if we're using certain profile separation options, such as
+"--separate=lib", a single binary could be a dependent image to many
+different binaries. For example, the C library image would be a
+dependent image for most programs that have been profiled. As it
+happens, this can cause severe performance problems: without some
+re-arrangement, these dependent binary images would be opened each
+time we need to process sample files for each program.
+</p>
+            <p>
+The solution is to "invert" the profiles via
+<code class="function">invert_profiles()</code>. We create a new data structure
+where the dependent binary is first, and the primary binary images using
+that dependent binary are listed as sub-images. This helps our
+performance problem, as now we only need to open each dependent image
+once, when we process the list of inverted profiles.
+</p>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="generating-profile-data"></a>3. Generating profile data</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+Things don't get any simpler at this point, unfortunately. At this point
+we've collected and classified the sample files into the set of inverted
+profiles, as described in the previous section. Now we need to process
+each inverted profile and make something of the data. The entry point
+for this is <code class="function">populate_for_image()</code>.
+</p>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="bfd"></a>3.1. Processing the binary image</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The first thing we do with an inverted profile is attempt to open the
+binary image (remember each inverted profile set is only for one binary
+image, but may have many sample files to process). The
+<code class="varname">op_bfd</code> class provides an abstracted interface to
+this; internally it uses <code class="filename">libbfd</code>. The main purpose
+of this class is to process the symbols for the binary image; this is
+also where symbol filtering happens. This is actually quite tricky, but
+should be clear from the source.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="processing-sample-files"></a>3.2. Processing the sample files</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The class <code class="varname">profile_container</code> is a hold-all that
+contains all the processed results. It is a container of
+<code class="varname">profile_t</code> objects. The
+<code class="function">add_sample_files()</code> method uses
+<code class="filename">libdb</code> to open the given sample file and add the
+key/value types to the <code class="varname">profile_t</code>. Once this has been
+done, <code class="function">profile_container::add()</code> is passed the
+<code class="varname">profile_t</code> plus the <code class="varname">op_bfd</code> for
+processing.
+</p>
+            <p>
+<code class="function">profile_container::add()</code> walks through the symbols
+collected in the <code class="varname">op_bfd</code>.
+<code class="function">op_bfd::get_symbol_range()</code> gives us the start and
+end of the symbol as an offset from the start of the binary image,
+then we interrogate the <code class="varname">profile_t</code> for the relevant samples
+for that offset range. We create a <code class="varname">symbol_entry</code>
+object for this symbol and fill it in. If needed, here we also collect
+debug information from the <code class="varname">op_bfd</code>, and possibly
+record the detailed sample information (as used by <span><strong class="command">opreport
+-d</strong></span> and <span><strong class="command">opannotate</strong></span>).
+Finally the <code class="varname">symbol_entry</code> is added to
+a private container of <code class="varname">profile_container</code> - this
+<code class="varname">symbol_container</code> holds all such processed symbols.
+</p>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="generating-output"></a>4. Generating output</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+After the processing described in the previous section, we've now got
+full details of what we need to output stored in the
+<code class="varname">profile_container</code> on a symbol-by-symbol basis. To
+produce output, we need to replay that data and format it suitably.
+</p>
+          <p>
+<span><strong class="command">opreport</strong></span> first asks the
+<code class="varname">profile_container</code> for a
+<code class="varname">symbol_collection</code> (this is also where thresholding
+happens).
+This is sorted, then a
+<code class="varname">opreport_formatter</code> is initialised.
+This object initialises a set of field formatters as requested. Then
+<code class="function">opreport_formatter::output()</code> is called. This
+iterates through the (sorted) <code class="varname">symbol_collection</code>;
+for each entry, the selected fields (as set by the
+<code class="varname">format_flags</code> options) are output by calling the
+field formatters, with the <code class="varname">symbol_entry</code> passed in.
+</p>
+        </div>
+      </div>
+      <div class="glossary">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="glossary"></a>Glossary of OProfile source concepts and types</h2>
+            </div>
+          </div>
+        </div>
+        <dl>
+          <dt>application image</dt>
+          <dd>
+            <p>
+The primary binary image used by an application. This is derived
+from the kernel and corresponds to the binary started upon running
+an application: for example, <code class="filename">/bin/bash</code>.
+</p>
+          </dd>
+          <dt>binary image</dt>
+          <dd>
+            <p>
+An ELF file containing executable code: this includes kernel modules,
+the kernel itself (a.k.a. <code class="filename">vmlinux</code>), shared libraries,
+and application binaries.
+</p>
+          </dd>
+          <dt>dcookie</dt>
+          <dd>
+            <p>
+Short for "dentry cookie". A unique ID that can be looked up to provide
+the full path name of a binary image.
+</p>
+          </dd>
+          <dt>dependent image</dt>
+          <dd>
+            <p>
+A binary image that is dependent upon an application, used with
+per-application separation. Most commonly, shared libraries. For example,
+if <code class="filename">/bin/bash</code> is running and we take
+some samples inside the C library itself due to <span><strong class="command">bash</strong></span>
+calling library code, then the image <code class="filename">/lib/libc.so</code>
+would be dependent upon <code class="filename">/bin/bash</code>.
+</p>
+          </dd>
+          <dt>merging</dt>
+          <dd>
+            <p>
+This refers to the ability to merge several distinct sample files
+into one set of data at runtime, in the post-profiling tools. For example,
+per-thread sample files can be merged into one set of data, because
+they are compatible (i.e. the aggregation of the data is meaningful),
+but it's not possible to merge sample files for two different events,
+because there would be no useful meaning to the results.
+</p>
+          </dd>
+          <dt>profile class</dt>
+          <dd>
+            <p>
+A collection of profile data that has been collected under the same
+class template. For example, if we're using <span><strong class="command">opreport</strong></span>
+to show results after profiling with two performance counters enabled
+profiling <code class="constant">DATA_MEM_REFS</code> and <code class="constant">CPU_CLK_UNHALTED</code>,
+there would be two profile classes, one for each event. Or if we're on
+an SMP system and doing per-cpu profiling, and we request
+<span><strong class="command">opreport</strong></span> to show results for each CPU side-by-side,
+there would be a profile class for each CPU.
+</p>
+          </dd>
+          <dt>profile specification</dt>
+          <dd>
+            <p>
+The parameters the user passes to the post-profiling tools that limit
+what sample files are used. This specification is matched against
+the available sample files to generate a selection of profile data.
+</p>
+          </dd>
+          <dt>profile template</dt>
+          <dd>
+            <p>
+The parameters that define what goes in a particular profile class.
+This includes a symbolic name (e.g. "cpu:1") and the code-usable
+equivalent.
+</p>
+          </dd>
+        </dl>
+      </div>
+    </div>
+  </body>
+</html>
diff --git a/doc/internals.xml b/doc/internals.xml
new file mode 100644
index 0000000..d2e89f4
--- /dev/null
+++ b/doc/internals.xml
@@ -0,0 +1,972 @@
+<?xml version="1.0" encoding='ISO-8859-1'?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd">
+
+<book id="oprofile-internals">
+<bookinfo>
+	<title>OProfile Internals</title>
+ 
+	<authorgroup>
+		<author>
+			<firstname>John</firstname>
+			<surname>Levon</surname>
+			<affiliation>
+				<address><email>levon@movementarian.org</email></address>
+			</affiliation>
+		</author>
+	</authorgroup>
+
+	<copyright>
+		<year>2003</year>
+		<holder>John Levon</holder>
+	</copyright>
+</bookinfo>
+
+<toc></toc>
+
+<chapter id="introduction">
+<title>Introduction</title>
+
+<para>
+This document is current for OProfile version <oprofileversion />.
+This document provides some details on the internal workings of OProfile for the
+interested hacker. This document assumes strong C, working C++, plus some knowledge of
+kernel internals and CPU hardware.
+</para>
+<note>
+<para>
+Only the "new" implementation associated with kernel 2.6 and above is covered here. 2.4
+uses a very different kernel module implementation and daemon to produce the sample files.
+</para>
+</note>
+
+<sect1 id="overview">
+<title>Overview</title>
+<para>
+OProfile is a statistical continuous profiler. In other words, profiles are generated by
+regularly sampling the current registers on each CPU (from an interrupt handler, the
+saved PC value at the time of interrupt is stored), and converting that runtime PC
+value into something meaningful to the programmer.
+</para>
+<para>
+OProfile achieves this by taking the stream of sampled PC values, along with the detail
+of which task was running at the time of the interrupt, and converting into a file offset
+against a particular binary file. Because applications <function>mmap()</function>
+the code they run (be it <filename>/bin/bash</filename>, <filename>/lib/libfoo.so</filename>
+or whatever), it's possible to find the relevant binary file and offset by walking
+the task's list of mapped memory areas. Each PC value is thus converted into a tuple
+of binary-image,offset. This is something that the userspace tools can use directly
+to reconstruct where the code came from, including the particular assembly instructions,
+symbol, and source line (via the binary's debug information if present).
+</para>
+<para>
+Regularly sampling the PC value like this approximates what actually was executed and
+how often - more often than not, this statistical approximation is good enough to
+reflect reality. In common operation, the time between each sample interrupt is regulated
+by a fixed number of clock cycles. This implies that the results will reflect where
+the CPU is spending the most time; this is obviously a very useful information source
+for performance analysis.
+</para>
+<para>
+Sometimes though, an application programmer needs different kinds of information: for example,
+"which of the source routines cause the most cache misses ?". The rise in importance of
+such metrics in recent years has led many CPU manufacturers to provide hardware performance
+counters capable of measuring these events on the hardware level. Typically, these counters
+increment once per each event, and generate an interrupt on reaching some pre-defined
+number of events. OProfile can use these interrupts to generate samples: then, the
+profile results are a statistical approximation of which code caused how many of the
+given event.
+</para>
+<para>
+Consider a simplified system that only executes two functions A and B. A
+takes one cycle to execute, whereas B takes 99 cycles. Imagine we run at
+100 cycles a second, and we've set the performance counter to create an
+interrupt after a set number of "events" (in this case an event is one
+clock cycle). It should be clear that the chances of the interrupt
+occurring in function A is 1/100, and 99/100 for function B. Thus, we
+statistically approximate the actual relative performance features of
+the two functions over time. This same analysis works for other types of
+events, providing that the interrupt is tied to the number of events
+occurring (that is, after N events, an interrupt is generated).
+</para>
+<para>
+There are typically more than one of these counters, so it's possible to set up profiling
+for several different event types. Using these counters gives us a powerful, low-overhead
+way of gaining performance metrics. If OProfile, or the CPU, does not support performance
+counters, then a simpler method is used: the kernel timer interrupt feeds samples
+into OProfile itself.
+</para>
+<para>
+The rest of this document concerns itself with how we get from receiving samples at
+interrupt time to producing user-readable profile information.
+</para>
+</sect1>
+
+<sect1 id="components">
+<title>Components of the OProfile system</title>
+
+<sect2 id="arch-specific-components">
+<title>Architecture-specific components</title>
+<para>
+If OProfile supports the hardware performance counters found on
+a particular architecture, code for managing the details of setting
+up and managing these counters can be found in the kernel source
+tree in the relevant <filename>arch/<emphasis>arch</emphasis>/oprofile/</filename>
+directory. The architecture-specific implementation works via
+filling in the oprofile_operations structure at init time. This
+provides a set of operations such as <function>setup()</function>,
+<function>start()</function>, <function>stop()</function>, etc.
+that manage the hardware-specific details of fiddling with the
+performance counter registers.
+</para>
+<para>
+The other important facility available to the architecture code is
+<function>oprofile_add_sample()</function>.  This is where a particular sample
+taken at interrupt time is fed into the generic OProfile driver code.
+</para>
+</sect2>
+
+<sect2 id="filesystem">
+<title>oprofilefs</title>
+<para>
+OProfile implements a pseudo-filesystem known as "oprofilefs", mounted from
+userspace at <filename>/dev/oprofile</filename>. This consists of small
+files for reporting and receiving configuration from userspace, as well
+as the actual character device that the OProfile userspace receives samples
+from. At <function>setup()</function> time, the architecture-specific may
+add further configuration files related to the details of the performance
+counters. For example, on x86, one numbered directory for each hardware
+performance counter is added, with files in each for the event type,
+reset value, etc.
+</para>
+<para>
+The filesystem also contains a <filename>stats</filename> directory with
+a number of useful counters for various OProfile events.
+</para>
+</sect2>
+
+<sect2 id="driver">
+<title>Generic kernel driver</title>
+<para>
+This lives in <filename>drivers/oprofile/</filename>, and forms the core of
+how OProfile works in the kernel. Its job is to take samples delivered
+from the architecture-specific code (via <function>oprofile_add_sample()</function>),
+and buffer this data, in a transformed form as described later, until releasing
+the data to the userspace daemon via the <filename>/dev/oprofile/buffer</filename>
+character device.
+</para>
+</sect2>
+
+<sect2 id="daemon">
+<title>The OProfile daemon</title>
+<para>
+The OProfile userspace daemon's job is to take the raw data provided by the
+kernel and write it to the disk. It takes the single data stream from the
+kernel and logs sample data against a number of sample files (found in
+<filename>/var/lib/oprofile/samples/current/</filename>. For the benefit
+of the "separate" functionality, the names/paths of these sample files
+are mangled to reflect where the samples were from: this can include
+thread IDs, the binary file path, the event type used, and more.
+</para>
+<para>
+After this final step from interrupt to disk file, the data is now
+persistent (that is, changes in the running of the system do not invalidate
+stored data). So the post-profiling tools can run on this data at any
+time (assuming the original binary files are still available and unchanged,
+naturally).
+</para>
+</sect2>
+
+<sect2 id="post-profiling">
+<title>Post-profiling tools</title>
+So far, we've collected data, but we've yet to present it in a useful form
+to the user. This is the job of the post-profiling tools. In general form,
+they collate a subset of the available sample files, load and process each one
+correlated against the relevant binary file, and finally produce user-readable
+information.
+</sect2>
+
+</sect1>
+
+</chapter>
+
+<chapter id="performance-counters">
+<title>Performance counter management</title>
+
+<sect1 id ="performance-counters-ui">
+<title>Providing a user interface</title>
+
+<para>
+The performance counter registers need programming in order to set the
+type of event to count, etc. OProfile uses a standard model across all
+CPUs for defining these events as follows :
+</para>
+<informaltable frame="all">
+<tgroup cols='2'> 
+<tbody>
+<row><entry><option>event</option></entry><entry>The event type e.g. DATA_MEM_REFS</entry></row>
+<row><entry><option>unit mask</option></entry><entry>The sub-events to count (more detailed specification)</entry></row>
+<row><entry><option>counter</option></entry><entry>The hardware counter(s) that can count this event</entry></row>
+<row><entry><option>count</option></entry><entry>The reset value (how many events before an interrupt)</entry></row>
+<row><entry><option>kernel</option></entry><entry>Whether the counter should increment when in kernel space</entry></row>
+<row><entry><option>user</option></entry><entry>Whether the counter should increment when in user space</entry></row>
+</tbody>
+</tgroup>
+</informaltable>
+<para>
+The term "unit mask" is borrowed from the Intel architectures, and can
+further specify exactly when a counter is incremented (for example,
+cache-related events can be restricted to particular state transitions
+of the cache lines).
+</para>
+<para>
+All of the available hardware events and their details are specified in
+the textual files in the <filename>events</filename> directory. The
+syntax of these files should be fairly obvious. The user specifies the
+names and configuration details of the chosen counters via
+<command>opcontrol</command>. These are then written to the kernel
+module (in numerical form) via <filename>/dev/oprofile/N/</filename>
+where N is the physical hardware counter (some events can only be used
+on specific counters; OProfile hides these details from the user when
+possible). On IA64, the perfmon-based interface behaves somewhat
+differently, as described later.
+</para>
+
+</sect1>
+
+<sect1 id="performance-counters-programming">
+<title>Programming the performance counter registers</title>
+
+<para>
+We have described how the user interface fills in the desired
+configuration of the counters and transmits the information to the
+kernel. It is the job of the <function>-&gt;setup()</function> method
+to actually program the performance counter registers. Clearly, the
+details of how this is done is architecture-specific; it is also
+model-specific on many architectures. For example, i386 provides methods
+for each model type that programs the counter registers correctly
+(see the <filename>op_model_*</filename> files in
+<filename>arch/i386/oprofile</filename> for the details). The method
+reads the values stored in the virtual oprofilefs files and programs
+the registers appropriately, ready for starting the actual profiling
+session.
+</para>
+<para>
+The architecture-specific drivers make sure to save the old register
+settings before doing OProfile setup. They are restored when OProfile
+shuts down. This is useful, for example, on i386, where the NMI watchdog
+uses the same performance counter registers as OProfile; they cannot
+run concurrently, but OProfile makes sure to restore the setup it found
+before it was running.
+</para>
+<para>
+In addition to programming the counter registers themselves, other setup
+is often necessary. For example, on i386, the local APIC needs
+programming in order to make the counter's overflow interrupt appear as
+an NMI (non-maskable interrupt). This allows sampling (and therefore
+profiling) of regions where "normal" interrupts are masked, enabling
+more reliable profiles.
+</para>
+
+<sect2 id="performance-counters-start">
+<title>Starting and stopping the counters</title>
+<para>
+Initiating a profiling session is done via writing an ASCII '1'
+to the file <filename>/dev/oprofile/enable</filename>. This sets up the
+core, and calls into the architecture-specific driver to actually
+enable each configured counter. Again, the details of how this is
+done is model-specific (for example, the Athlon models can disable
+or enable on a per-counter basis, unlike the PPro models).
+</para>
+</sect2>
+
+<sect2>
+<title>IA64 and perfmon</title>
+<para>
+The IA64 architecture provides a different interface from the other
+architectures, using the existing perfmon driver. Register programming
+is handled entirely in user-space (see
+<filename>daemon/opd_perfmon.c</filename> for the details). A process
+is forked for each CPU, which creates a perfmon context and sets the
+counter registers appropriately via the
+<function>sys_perfmonctl</function> interface. In addition, the actual
+initiation and termination of the profiling session is handled via the
+same interface using <constant>PFM_START</constant> and
+<constant>PFM_STOP</constant>. On IA64, then, there are no oprofilefs
+files for the performance counters, as the kernel driver does not
+program the registers itself.
+</para>
+<para>
+Instead, the perfmon driver for OProfile simply registers with the
+OProfile core with an OProfile-specific UUID. During a profiling
+session, the perfmon core calls into the OProfile perfmon driver and
+samples are registered with the OProfile core itself as usual (with
+<function>oprofile_add_sample()</function>).
+</para>
+</sect2>
+
+</sect1>
+
+</chapter>
+
+<chapter id="collecting-samples">
+<title>Collecting and processing samples</title>
+
+<sect1 id="receiving-interrupts">
+<title>Receiving interrupts</title>
+<para>
+Naturally, how the overflow interrupts are received is specific
+to the hardware architecture, unless we are in "timer" mode, where the
+logging routine is called directly from the standard kernel timer
+interrupt handler.
+</para>
+<para>
+On the i386 architecture, the local APIC is programmed such that when a
+counter overflows (that is, it receives an event that causes an integer
+overflow of the register value to zero), an NMI is generated. This calls
+into the general handler <function>do_nmi()</function>; because OProfile
+has registered itself as capable of handling NMI interrupts, this will
+call into the OProfile driver code in
+<filename>arch/i386/oprofile</filename>. Here, the saved PC value (the
+CPU saves the register set at the time of interrupt on the stack
+available for inspection) is extracted, and the counters are examined to
+find out which one generated the interrupt. Also determined is whether
+the system was inside kernel or user space at the time of the interrupt.
+These three pieces of information are then forwarded onto the OProfile
+core via <function>oprofile_add_sample()</function>. Finally, the
+counter values are reset to the chosen count value, to ensure another
+interrupt happens after another N events have occurred. Other
+architectures behave in a similar manner.
+</para>
+</sect1>
+ 
+<sect1 id="core-structure">
+<title>Core data structures</title>
+<para>
+Before considering what happens when we log a sample, we shall digress
+for a moment and look at the general structure of the data collection
+system.
+</para>
+<para>
+OProfile maintains a small buffer for storing the logged samples for
+each CPU on the system. Only this buffer is altered when we actually log
+a sample (remember, we may still be in an NMI context, so no locking is
+possible). The buffer is managed by a two-handed system; the "head"
+iterator dictates where the next sample data should be placed in the
+buffer. Of course, overflow of the buffer is possible, in which case
+the sample is discarded.
+</para>
+<para>
+It is critical to remember that at this point, the PC value is an
+absolute value, and is therefore only meaningful in the context of which
+task it was logged against. Thus, these per-CPU buffers also maintain
+details of which task each logged sample is for, as described in the
+next section. In addition, we store whether the sample was in kernel
+space or user space (on some architectures and configurations, the address
+space is not sub-divided neatly at a specific PC value, so we must store
+this information).
+</para>
+<para>
+As well as these small per-CPU buffers, we have a considerably larger
+single buffer. This holds the data that is eventually copied out into
+the OProfile daemon. On certain system events, the per-CPU buffers are
+processed and entered (in mutated form) into the main buffer, known in
+the source as the "event buffer". The "tail" iterator indicates the
+point from which the CPU may be read, up to the position of the "head"
+iterator. This provides an entirely lock-free method for extracting data
+from the CPU buffers. This process is described in detail later in this chapter.
+</para>
+<figure><title>The OProfile buffers</title>
+<graphic fileref="buffers.png" />
+</figure>
+</sect1>
+
+<sect1 id="logging-sample">
+<title>Logging a sample</title>
+<para>
+As mentioned, the sample is logged into the buffer specific to the
+current CPU. The CPU buffer is a simple array of pairs of unsigned long
+values; for a sample, they hold the PC value and the counter for the
+sample. (The counter value is later used to translate back into the relevant
+event type the counter was programmed to).
+</para>
+<para>
+In addition to logging the sample itself, we also log task switches.
+This is simply done by storing the address of the last task to log a
+sample on that CPU in a data structure, and writing a task switch entry
+into the buffer if the new value of <function>current()</function> has
+changed. Note that later we will directly de-reference this pointer;
+this imposes certain restrictions on when and how the CPU buffers need
+to be processed.
+</para>
+<para>
+Finally, as mentioned, we log whether we have changed between kernel and
+userspace using a similar method. Both of these variables
+(<varname>last_task</varname> and <varname>last_is_kernel</varname>) are
+reset when the CPU buffer is read.
+</para>
+</sect1>
+
+<sect1 id="logging-stack">
+<title>Logging stack traces</title>
+<para>
+OProfile can also provide statistical samples of call chains (on x86). To
+do this, at sample time, the frame pointer chain is traversed, recording
+the return address for each stack frame. This will only work if the code
+was compiled with frame pointers, but we're careful to abort the
+traversal if the frame pointer appears bad. We store the set of return
+addresses straight into the CPU buffer. Note that, since this traversal
+is keyed off the standard sample interrupt, the number of times a
+function appears in a stack trace is not an indicator of how many times
+the call site was executed: rather, it's related to the number of
+samples we took where that call site was involved. Thus, the results for
+stack traces are not necessarily proportional to the call counts:
+typical programs will have many <function>main()</function> samples.
+</para>
+</sect1>
+
+<sect1 id="synchronising-buffers">
+<title>Synchronising the CPU buffers to the event buffer</title>
+<!-- FIXME: update when percpu patch goes in -->
+<para>
+At some point, we have to process the data in each CPU buffer and enter
+it into the main (event) buffer. The file
+<filename>buffer_sync.c</filename> contains the relevant code. We
+periodically (currently every <constant>HZ</constant>/4 jiffies) start
+the synchronisation process. In addition, we process the buffers on
+certain events, such as an application calling
+<function>munmap()</function>. This is particularly important for
+<function>exit()</function> - because the CPU buffers contain pointers
+to the task structure, if we don't process all the buffers before the
+task is actually destroyed and the task structure freed, then we could
+end up trying to dereference a bogus pointer in one of the CPU buffers.
+</para>
+<para>
+We also add a notification when a kernel module is loaded; this is so
+that user-space can re-read <filename>/proc/modules</filename> to
+determine the load addresses of kernel module text sections. Without
+this notification, samples for a newly-loaded module could get lost or
+be attributed to the wrong module.
+</para>
+<para>
+The synchronisation itself works in the following manner: first, mutual
+exclusion on the event buffer is taken. Remember, we do not need to do
+that for each CPU buffer, as we only read from the tail iterator (whilst
+interrupts might be arriving at the same buffer, but they will write to
+the position of the head iterator, leaving previously written entries
+intact). Then, we process each CPU buffer in turn. A CPU switch
+notification is added to the buffer first (for
+<option>--separate=cpu</option> support). Then the processing of the
+actual data starts.
+</para>
+<para>
+As mentioned, the CPU buffer consists of task switch entries and the
+actual samples. When the routine <function>sync_buffer()</function> sees
+a task switch, the process ID and process group ID are recorded into the
+event buffer, along with a dcookie (see below) identifying the
+application binary (e.g. <filename>/bin/bash</filename>). The
+<varname>mmap_sem</varname> for the task is then taken, to allow safe
+iteration across the tasks' list of mapped areas. Each sample is then
+processed as described in the next section.
+</para>
+<para>
+After a buffer has been read, the tail iterator is updated to reflect
+how much of the buffer was processed. Note that when we determined how
+much data there was to read in the CPU buffer, we also called
+<function>cpu_buffer_reset()</function> to reset
+<varname>last_task</varname> and <varname>last_is_kernel</varname>, as
+we've already mentioned. During the processing, more samples may have
+been arriving in the CPU buffer; this is OK because we are careful to
+only update the tail iterator to how much we actually read - on the next
+buffer synchronisation, we will start again from that point.
+</para>
+</sect1>
+
+<sect1 id="dentry-cookies">
+<title>Identifying binary images</title>
+<para>
+In order to produce useful profiles, we need to be able to associate a
+particular PC value sample with an actual ELF binary on the disk. This
+leaves us with the problem of how to export this information to
+user-space. We create unique IDs that identify a particular directory
+entry (dentry), and write those IDs into the event buffer. Later on,
+the user-space daemon can call the <function>lookup_dcookie</function>
+system call, which looks up the ID and fills in the full path of
+the binary image in the buffer user-space passes in. These IDs are
+maintained by the code in <filename>fs/dcookies.c</filename>; the
+cache lasts for as long as the daemon has the event buffer open.
+</para>
+</sect1>
+
+<sect1 id="finding-dentry">
+<title>Finding a sample's binary image and offset</title>
+<para>
+We haven't yet described how we process the absolute PC value into
+something usable by the user-space daemon. When we find a sample entered
+into the CPU buffer, we traverse the list of mappings for the task
+(remember, we will have seen a task switch earlier, so we know which
+task's lists to look at). When a mapping is found that contains the PC
+value, we look up the mapped file's dentry in the dcookie cache. This
+gives the dcookie ID that will uniquely identify the mapped file. Then
+we alter the absolute value such that it is an offset from the start of
+the file being mapped (the mapping need not start at the start of the
+actual file, so we have to consider the offset value of the mapping). We
+store this dcookie ID into the event buffer; this identifies which
+binary the samples following it are against.
+In this manner, we have converted a PC value, which has transitory
+meaning only, into a static offset value for later processing by the
+daemon.
+</para>
+<para>
+We also attempt to avoid the relatively expensive lookup of the dentry
+cookie value by storing the cookie value directly into the dentry
+itself; then we can simply derive the cookie value immediately when we
+find the correct mapping.
+</para>
+</sect1>
+
+</chapter>
+
+<chapter id="sample-files">
+<title>Generating sample files</title>
+
+<sect1 id="processing-buffer">
+<title>Processing the buffer</title>
+
+<para>
+Now we can move onto user-space in our description of how raw interrupt
+samples are processed into useful information. As we described in
+previous sections, the kernel OProfile driver creates a large buffer of
+sample data consisting of offset values, interspersed with
+notification of changes in context. These context changes indicate how
+following samples should be attributed, and include task switches, CPU
+changes, and which dcookie the sample value is against. By processing
+this buffer entry-by-entry, we can determine where the samples should
+be accredited to. This is particularly important when using the 
+<option>--separate</option>.
+</para>
+<para>
+The file <filename>daemon/opd_trans.c</filename> contains the basic routine
+for the buffer processing. The <varname>struct transient</varname>
+structure is used to hold changes in context. Its members are modified
+as we process each entry; it is passed into the routines in
+<filename>daemon/opd_sfile.c</filename> for actually logging the sample
+to a particular sample file (which will be held in
+<filename>/var/lib/oprofile/samples/current</filename>).
+</para>
+<para>
+The buffer format is designed for conciseness, as high sampling rates
+can easily generate a lot of data. Thus, context changes are prefixed
+by an escape code, identified by <function>is_escape_code()</function>.
+If an escape code is found, the next entry in the buffer identifies
+what type of context change is being read. These are handed off to
+various handlers (see the <varname>handlers</varname> array), which
+modify the transient structure as appropriate. If it's not an escape
+code, then it must be a PC offset value, and the very next entry will
+be the numeric hardware counter. These values are read and recorded
+in the transient structure; we then do a lookup to find the correct
+sample file, and log the sample, as described in the next section.
+</para>
+
+<sect2 id="handling-kernel-samples">
+<title>Handling kernel samples</title>
+
+<para>
+Samples from kernel code require a little special handling. Because
+the binary text which the sample is against does not correspond to
+any file that the kernel directly knows about, the OProfile driver
+stores the absolute PC value in the buffer, instead of the file offset.
+Of course, we need an offset against some particular binary. To handle
+this, we keep a list of loaded modules by parsing
+<filename>/proc/modules</filename> as needed. When a module is loaded,
+a notification is placed in the OProfile buffer, and this triggers a
+re-read. We store the module name, and the loading address and size.
+This is also done for the main kernel image, as specified by the user.
+The absolute PC value is matched against each address range, and
+modified into an offset when the matching module is found. See 
+<filename>daemon/opd_kernel.c</filename> for the details.
+</para>
+
+</sect2>
+
+
+</sect1>
+
+<sect1 id="sample-file-generation">
+<title>Locating and creating sample files</title>
+
+<para>
+We have a sample value and its satellite data stored in a
+<varname>struct transient</varname>, and we must locate an
+actual sample file to store the sample in, using the context
+information in the transient structure as a key. The transient data to
+sample file lookup is handled in
+<filename>daemon/opd_sfile.c</filename>. A hash is taken of the
+transient values that are relevant (depending upon the setting of
+<option>--separate</option>, some values might be irrelevant), and the
+hash value is used to lookup the list of currently open sample files.
+Of course, the sample file might not be found, in which case we need
+to create and open it.
+</para>
+<para>
+OProfile uses a rather complex scheme for naming sample files, in order
+to make selecting relevant sample files easier for the post-profiling
+utilities. The exact details of the scheme are given in
+<filename>oprofile-tests/pp_interface</filename>, but for now it will
+suffice to remember that the filename will include only relevant
+information for the current settings, taken from the transient data. A
+fully-specified filename looks something like :
+</para>
+<computeroutput>
+/var/lib/oprofile/samples/current/{root}/usr/bin/xmms/{dep}/{root}/lib/tls/libc-2.3.2.so/CPU_CLK_UNHALTED.100000.0.28082.28089.0
+</computeroutput>
+<para>
+It should be clear that this identifies such information as the
+application binary, the dependent (library) binary, the hardware event,
+and the process and thread ID. Typically, not all this information is
+needed, in which cases some values may be replaced with the token
+<filename>all</filename>.
+</para>
+<para>
+The code that generates this filename and opens the file is found in
+<filename>daemon/opd_mangling.c</filename>. You may have realised that
+at this point, we do not have the binary image file names, only the
+dcookie values. In order to determine a file name, a dcookie value is
+looked up in the dcookie cache. This is to be found in
+<filename>daemon/opd_cookie.c</filename>. Since dcookies are both
+persistent and unique during a sampling session, we can cache the
+values. If the value is not found in the cache, then we ask the kernel
+to do the lookup from value to file name for us by calling
+<function>lookup_dcookie()</function>. This looks up the value in a
+kernel-side cache (see <filename>fs/dcookies.c</filename>) and returns
+the fully-qualified file name to userspace.
+</para>
+
+</sect1>
+
+<sect1 id="sample-file-writing">
+<title>Writing data to a sample file</title>
+
+<para>
+Each specific sample file is a hashed collection, where the key is
+the PC offset from the transient data, and the value is the number of
+samples recorded against that offset. The files are
+<function>mmap()</function>ed into the daemon's memory space. The code
+to actually log the write against the sample file can be found in
+<filename>libdb/</filename>.
+</para>
+<para>
+For recording stack traces, we have a more complicated sample filename
+mangling scheme that allows us to identify cross-binary calls. We use
+the same sample file format, where the key is a 64-bit value composed
+from the from,to pair of offsets.
+</para>
+
+</sect1>
+
+</chapter>
+
+<chapter id="output">
+<title>Generating useful output</title>
+
+<para>
+All of the tools used to generate human-readable output have to take
+roughly the same steps to collect the data for processing. First, the
+profile specification given by the user has to be parsed. Next, a list
+of sample files matching the specification has to obtained. Using this
+list, we need to locate the binary file for each sample file, and then
+use them to extract meaningful data, before a final collation and
+presentation to the user.
+</para>
+
+<sect1 id="profile-specification">
+<title>Handling the profile specification</title>
+
+<para>
+The profile specification presented by the user is parsed in
+the function <function>profile_spec::create()</function>. This
+creates an object representing the specification. Then we
+use <function>profile_spec::generate_file_list()</function>
+to search for all sample files and match them against the
+<varname>profile_spec</varname>.
+</para>
+
+<para>
+To enable this matching process to work, the attributes of
+each sample file is encoded in its filename. This is a low-tech
+approach to matching specifications against candidate sample
+files, but it works reasonably well. A typical sample file
+might look like these:
+</para>
+<screen>
+/var/lib/oprofile/samples/current/{root}/bin/ls/{dep}/{root}/bin/ls/{cg}/{root}/bin/ls/CPU_CLK_UNHALTED.100000.0.all.all.all
+/var/lib/oprofile/samples/current/{root}/bin/ls/{dep}/{root}/bin/ls/CPU_CLK_UNHALTED.100000.0.all.all.all
+/var/lib/oprofile/samples/current/{root}/bin/ls/{dep}/{root}/bin/ls/CPU_CLK_UNHALTED.100000.0.7423.7424.0
+/var/lib/oprofile/samples/current/{kern}/r128/{dep}/{kern}/r128/CPU_CLK_UNHALTED.100000.0.all.all.all
+</screen>
+<para>
+This looks unnecessarily complex, but it's actually fairly simple. First
+we have the session of the sample, here
+<filename>/var/lib/oprofile/samples/current</filename>. This could
+equally well be inside an archive from <command>oparchive</command>.
+Next we have one of the tokens <filename>{root}</filename> or
+<filename>{kern}</filename>. <filename>{root}</filename> indicates
+that the binary is found on a file system, and we will encode its path
+in the next section (e.g. <filename>/bin/ls</filename>).
+<filename>{kern}</filename> indicates a kernel module - on 2.6 kernels
+the path information is not available from the kernel, so we have to
+special-case kernel modules like this; we encode merely the name of the
+module as loaded.
+</para>
+<para>
+Next there is a <filename>{dep}</filename> token, indicating another
+token/path which identifies the dependent binary image. This is used even for
+the "primary" binary (i.e. the one that was
+<function>execve()</function>d), as it simplifies processing. Finally,
+if this sample file is a normal flat profile, the actual file is next in
+the path. If it's a call-graph sample file, we need one further
+specification, to allow us to identify cross-binary arcs in the call
+graph.
+</para>
+<para>
+The actual sample file name is dot-separated, where the fields are, in
+order: event name, event count, unit mask, task group ID, task ID, and
+CPU number.
+</para>
+<para>
+This sample file can be reliably parsed (with
+<function>parse_filename()</function>) into a
+<varname>filename_spec</varname>. Finally, we can check whether to
+include the sample file in the final results by comparing this
+<varname>filename_spec</varname> against the
+<varname>profile_spec</varname> the user specified (for the interested,
+see <function>valid_candidate()</function> and
+<function>profile_spec::match</function>). Then comes the really
+complicated bit...
+</para>
+
+</sect1>
+
+<sect1 id="sample-file-collating">
+<title>Collating the candidate sample files</title>
+
+<para>
+At this point we have a duplicate-free list of sample files we need
+to process. But first we need to do some further arrangement: we
+need to classify each sample file, and we may also need to "invert"
+the profiles.
+</para>
+
+<sect2 id="sample-file-classifying">
+<title>Classifying sample files</title>
+
+<para>
+It's possible for utilities like <command>opreport</command> to show 
+data in columnar format: for example, we might want to show the results
+of two threads within a process side-by-side. To do this, we need
+to classify each sample file into classes - the classes correspond
+with each <command>opreport</command> column. The function that handles
+this is <function>arrange_profiles()</function>. Each sample file
+is added to a particular class. If the sample file is the first in
+its class, a template is generated from the sample file. Each template
+describes a particular class (thus, in our example above, each template
+will have a different thread ID, and this uniquely identifies each
+class).
+</para>
+
+<para>
+Each class has a list of "profile sets" matching that class's template.
+A profile set is either a profile of the primary binary image, or any of
+its dependent images. After all sample files have been listed in one of
+the profile sets belonging to the classes, we have to name each class and
+perform error-checking. This is done by
+<function>identify_classes()</function>; each class is checked to ensure
+that its "axis" is the same as all the others. This is needed because
+<command>opreport</command> can't produce results in 3D format: we can
+only differ in one aspect, such as thread ID or event name.
+</para>
+
+</sect2>
+
+<sect2 id="sample-file-inverting">
+<title>Creating inverted profile lists</title>
+
+<para>
+Remember that if we're using certain profile separation options, such as
+"--separate=lib", a single binary could be a dependent image to many
+different binaries. For example, the C library image would be a
+dependent image for most programs that have been profiled. As it
+happens, this can cause severe performance problems: without some
+re-arrangement, these dependent binary images would be opened each
+time we need to process sample files for each program.
+</para>
+
+<para>
+The solution is to "invert" the profiles via
+<function>invert_profiles()</function>. We create a new data structure
+where the dependent binary is first, and the primary binary images using
+that dependent binary are listed as sub-images. This helps our
+performance problem, as now we only need to open each dependent image
+once, when we process the list of inverted profiles.
+</para>
+
+</sect2>
+
+</sect1>
+
+<sect1 id="generating-profile-data">
+<title>Generating profile data</title>
+
+<para>
+Things don't get any simpler at this point, unfortunately. At this point
+we've collected and classified the sample files into the set of inverted
+profiles, as described in the previous section. Now we need to process
+each inverted profile and make something of the data. The entry point
+for this is <function>populate_for_image()</function>.
+</para>
+
+<sect2 id="bfd">
+<title>Processing the binary image</title>
+<para>
+The first thing we do with an inverted profile is attempt to open the
+binary image (remember each inverted profile set is only for one binary
+image, but may have many sample files to process). The
+<varname>op_bfd</varname> class provides an abstracted interface to
+this; internally it uses <filename>libbfd</filename>. The main purpose
+of this class is to process the symbols for the binary image; this is
+also where symbol filtering happens. This is actually quite tricky, but
+should be clear from the source.
+</para>
+</sect2>
+
+<sect2 id="processing-sample-files">
+<title>Processing the sample files</title>
+<para>
+The class <varname>profile_container</varname> is a hold-all that
+contains all the processed results. It is a container of
+<varname>profile_t</varname> objects. The
+<function>add_sample_files()</function> method uses
+<filename>libdb</filename> to open the given sample file and add the
+key/value types to the <varname>profile_t</varname>. Once this has been
+done, <function>profile_container::add()</function> is passed the
+<varname>profile_t</varname> plus the <varname>op_bfd</varname> for
+processing.
+</para>
+<para>
+<function>profile_container::add()</function> walks through the symbols
+collected in the <varname>op_bfd</varname>.
+<function>op_bfd::get_symbol_range()</function> gives us the start and
+end of the symbol as an offset from the start of the binary image,
+then we interrogate the <varname>profile_t</varname> for the relevant samples
+for that offset range. We create a <varname>symbol_entry</varname>
+object for this symbol and fill it in. If needed, here we also collect
+debug information from the <varname>op_bfd</varname>, and possibly
+record the detailed sample information (as used by <command>opreport
+-d</command> and <command>opannotate</command>).
+Finally the <varname>symbol_entry</varname> is added to
+a private container of <varname>profile_container</varname> - this
+<varname>symbol_container</varname> holds all such processed symbols.
+</para>
+</sect2>
+
+</sect1>
+
+<sect1 id="generating-output">
+<title>Generating output</title>
+
+<para>
+After the processing described in the previous section, we've now got
+full details of what we need to output stored in the
+<varname>profile_container</varname> on a symbol-by-symbol basis. To
+produce output, we need to replay that data and format it suitably.
+</para>
+<para>
+<command>opreport</command> first asks the
+<varname>profile_container</varname> for a
+<varname>symbol_collection</varname> (this is also where thresholding
+happens).
+This is sorted, then a
+<varname>opreport_formatter</varname> is initialised.
+This object initialises a set of field formatters as requested. Then
+<function>opreport_formatter::output()</function> is called. This
+iterates through the (sorted) <varname>symbol_collection</varname>;
+for each entry, the selected fields (as set by the
+<varname>format_flags</varname> options) are output by calling the
+field formatters, with the <varname>symbol_entry</varname> passed in.
+</para>
+
+</sect1>
+
+</chapter>
+
+<glossary id="glossary">
+<title>Glossary of OProfile source concepts and types</title>
+
+<glossentry><glossterm>application image</glossterm>
+<glossdef><para>
+The primary binary image used by an application. This is derived
+from the kernel and corresponds to the binary started upon running
+an application: for example, <filename>/bin/bash</filename>.
+</para></glossdef></glossentry>
+
+<glossentry><glossterm>binary image</glossterm>
+<glossdef><para>
+An ELF file containing executable code: this includes kernel modules,
+the kernel itself (a.k.a. <filename>vmlinux</filename>), shared libraries,
+and application binaries.
+</para></glossdef></glossentry>
+
+<glossentry><glossterm>dcookie</glossterm>
+<glossdef><para>
+Short for "dentry cookie". A unique ID that can be looked up to provide
+the full path name of a binary image.
+</para></glossdef></glossentry>
+
+<glossentry><glossterm>dependent image</glossterm>
+<glossdef><para>
+A binary image that is dependent upon an application, used with
+per-application separation. Most commonly, shared libraries. For example,
+if <filename>/bin/bash</filename> is running and we take
+some samples inside the C library itself due to <command>bash</command>
+calling library code, then the image <filename>/lib/libc.so</filename>
+would be dependent upon <filename>/bin/bash</filename>.
+</para></glossdef></glossentry>
+
+<glossentry><glossterm>merging</glossterm>
+<glossdef><para>
+This refers to the ability to merge several distinct sample files
+into one set of data at runtime, in the post-profiling tools. For example,
+per-thread sample files can be merged into one set of data, because
+they are compatible (i.e. the aggregation of the data is meaningful),
+but it's not possible to merge sample files for two different events,
+because there would be no useful meaning to the results.
+</para></glossdef></glossentry>
+
+<glossentry><glossterm>profile class</glossterm>
+<glossdef><para>
+A collection of profile data that has been collected under the same
+class template. For example, if we're using <command>opreport</command>
+to show results after profiling with two performance counters enabled
+profiling <constant>DATA_MEM_REFS</constant> and <constant>CPU_CLK_UNHALTED</constant>,
+there would be two profile classes, one for each event. Or if we're on
+an SMP system and doing per-cpu profiling, and we request
+<command>opreport</command> to show results for each CPU side-by-side,
+there would be a profile class for each CPU.
+</para></glossdef></glossentry>
+
+<glossentry><glossterm>profile specification</glossterm>
+<glossdef><para>
+The parameters the user passes to the post-profiling tools that limit
+what sample files are used. This specification is matched against
+the available sample files to generate a selection of profile data.
+</para></glossdef></glossentry>
+
+<glossentry><glossterm>profile template</glossterm>
+<glossdef><para>
+The parameters that define what goes in a particular profile class.
+This includes a symbolic name (e.g. "cpu:1") and the code-usable
+equivalent.
+</para></glossdef></glossentry>
+
+</glossary>
+
+</book>
diff --git a/doc/opannotate.1.in b/doc/opannotate.1.in
new file mode 100644
index 0000000..4a5d5d0
--- /dev/null
+++ b/doc/opannotate.1.in
@@ -0,0 +1,111 @@
+.TH OPANNOTATE 1 "@DATE@" "oprofile @VERSION@"
+.UC 4
+.SH NAME
+opannotate \- produce source or assembly annotated with profile data
+.SH SYNOPSIS
+.br
+.B opannotate
+[
+.I options
+]
+[profile specification]
+.SH DESCRIPTION
+
+.B opannotate
+outputs annotated source and/or assembly from profile data of an OProfile
+session.
+
+.SH OPTIONS
+.TP
+.BI "--assembly / -a"
+Output annotated assembly. If this is combined with --source, then mixed
+source / assembly annotations are output.
+.br
+.TP
+.BI "--demangle / -D none|smart|normal"
+none: no demangling. normal: use default demangler (default) smart: use
+pattern-matching to make C++ symbol demangling more readable.
+.br
+.TP
+.BI "--exclude-dependent / -x"
+Do not include application-specific images for libraries, kernel modules
+and the kernel. This option only makes sense if the profile session
+used --separate.
+.br
+.TP
+.BI "--exclude-file [files]"
+Exclude all files in the given comma-separated list of glob patterns.
+.br
+.TP
+.BI "--exclude-symbols / -e [symbols]"
+Exclude all the symbols in the given comma-separated list.
+.br
+.TP
+.BI "--help / -? / --usage"
+Show help message.
+.br
+.TP
+.BI "--image-path / -p [paths]"
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+.br
+.TP
+.BI "--include-file [files]"
+Only include files in the given comma-separated list of glob patterns.
+.br
+.TP
+.BI "--include-symbols / -i [symbols]"
+Only include symbols in the given comma-separated list.
+.br
+.TP
+.BI "--objdump-params [params]"
+Pass the given parameters as extra values when calling objdump.
+.br
+.TP
+.BI "--output-dir / -o [dir]"
+Output directory. This makes opannotate output one annotated file for each
+source file. This option can't be used in conjunction with --assembly.
+.br
+.TP
+.BI "--search-dirs / -d [paths]"
+Comma-separated list of paths to search for source files. You may need to use
+this option when the debug information for an image contains relative paths.
+.br
+.TP
+.BI "--base-dirs / -b [paths]"
+Comma-separated list of paths to strip from debug source files, prior to
+looking for them in --search-dirs.
+.br
+.TP
+.BI "--source / -s"
+Output annotated source. This requires debugging information to be available
+for the binaries.
+.br
+.TP
+.BI "--threshold / -t [percentage]"
+Only output data for symbols that have more than the given percentage
+of total samples.
+.br
+.TP
+.BI "--verbose / -V [options]"
+Give verbose debugging output.
+.br
+.TP
+.BI "--version / -v"
+Show version.
+
+.SH ENVIRONMENT
+No special environment variables are recognised by opannotate.
+
+.SH FILES
+.TP
+.I /var/lib/oprofile/samples/
+The location of the generated sample files.
+
+.SH VERSION
+.TP
+This man page is current for @PACKAGE@-@VERSION@.
+
+.SH SEE ALSO
+.BR @OP_DOCDIR@,
+.BR oprofile(1)
diff --git a/doc/oparchive.1.in b/doc/oparchive.1.in
new file mode 100644
index 0000000..8b9301c
--- /dev/null
+++ b/doc/oparchive.1.in
@@ -0,0 +1,63 @@
+.TH OPARCHIVE 1 "@DATE@" "oprofile @VERSION@"
+.UC 4
+.SH NAME
+oparchive \- produce archive of oprofile data for offline analysis
+.SH SYNOPSIS
+.br
+.B oparchive
+[
+.I options
+]
+[profile specification]
+.B -o
+[directory]
+.SH DESCRIPTION
+
+.B oparchive
+generates a directory populated with executable, debug, and oprofile sample
+files. This directory can be move to another machine via tar and analyzed
+without further use of the data collection machine.
+
+.SH OPTIONS
+.TP
+.BI "--help / -? / --usage"
+Show help message.
+.br
+.TP
+.BI "--version / -v"
+Show version.
+.br
+.TP
+.BI "--verbose / -V [options]"
+Give verbose debugging output.
+.br
+.TP
+.BI "--image-path / -p [paths]"
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+.br
+.TP
+.BI "--output-directory / -o [directory]"
+Output to the given directory. There is no default. This must be specified.
+.br
+.TP
+.BI "--exclude-dependent / -x"
+Do not include application-specific images for libraries, kernel modules
+and the kernel. This option only makes sense if the profile session
+used --separate.
+
+.SH ENVIRONMENT
+No special environment variables are recognised by oparchive.
+
+.SH FILES
+.TP
+.I /var/lib/oprofile/samples/
+The location of the generated sample files.
+
+.SH VERSION
+.TP
+This man page is current for @PACKAGE@-@VERSION@.
+
+.SH SEE ALSO
+.BR @OP_DOCDIR@,
+.BR oprofile(1)
diff --git a/doc/opcontrol.1.in b/doc/opcontrol.1.in
new file mode 100644
index 0000000..7dcc745
--- /dev/null
+++ b/doc/opcontrol.1.in
@@ -0,0 +1,149 @@
+.TH OPCONTROL 1 "@DATE@" "oprofile @VERSION@"
+.UC 4
+.SH NAME
+opcontrol \- control OProfile profiling
+.SH SYNOPSIS
+.br
+.B opcontrol
+[
+.I options
+]
+.SH DESCRIPTION
+.B opcontrol
+can be used to start profiling, end a profiling session,
+dump profile data, and set up the profiling parameters.
+
+.SH OPTIONS
+.TP
+.BI "--help"
+Show help message.
+.br
+.TP
+.BI "--version"
+Show version.
+.br
+.TP
+.BI "--list-events"
+Shows the monitorable events.
+.br
+.TP
+.BI "--init"
+Load the OProfile module if required and make the OProfile driver
+interface available.
+.br
+.TP
+.BI "--setup"
+Followed by list options for profiling setup. Store setup 
+in ~root/.oprofile/daemonrc. Optional.
+.br
+.TP
+.BI "--status"
+Show configuration information.
+.br
+.TP
+.BI "--start-daemon"
+Start the oprofile daemon without starting profiling. Not available
+in 2.2/2.4 kernels.
+.br
+.TP
+.BI "--start"
+Start data collection with either arguments provided by --setup
+of information saved in ~root/.oprofile/daemonrc.
+.br
+.TP
+.BI "--dump"
+Force a flush of the collected profiling data to the daemon.
+.br
+.TP
+.BI "--stop"
+Stop data collection. Not available in 2.2/2.4 kernels.
+.br
+.TP
+.BI "--shutdown"
+Stop data collection and kill the daemon.
+.br
+.TP
+.BI "--reset"
+Clear out data from current session, but leaves saved sessions.
+.br
+.TP
+.BI "--save="sessionname
+Save data from current session to sessionname.
+.br
+.TP
+.BI "--deinit"
+Shut down daemon. Unload the oprofile module and oprofilefs.
+.br
+.TP
+.BI "--buffer-size="num
+Set kernel buffer to num samples.
+.br
+.TP
+.BI "--cpu-buffer-size="num
+Set kernel per cpu buffer to num samples (2.6 only). If you profile at high
+rate it can help to increase this if the log file show excessive count of
+sample lost cpu buffer overflow.
+.br
+.TP
+.BI "--event="[event|"default"]
+Add an event to measure for the hardware performance counters,
+or "default" for the default event. The event is of the form
+"CPU_CLK_UNHALTED:30000:0:1:1" where the numeric values are
+count, unit mask, kernel-space counting, user-space counting,
+respectively.
+.br
+.TP
+.BI "--separate="[none,lib,kernel,thread,cpu,all]
+Separate samples based on the given separator. 'lib' separates
+dynamically linked library samples per application. 'kernel' separates
+kernel and kernel module samples per application; 'kernel'
+implies 'library'. 'thread' gives separation for each thread and
+task.  'cpu' separates for each CPU. 'all' implies all of the above
+options and 'none' turns off separation.
+.br
+.TP
+.BI "--callgraph=#depth"
+Enable callgraph sample collection with a maximum depth. Use 0 to disable
+callgraph profiling. This option is currently only usable on x86, using a
+2.6+ kernel with callgraph support enabled.
+.br
+.TP
+.BI "--image="[name,name...|"all"]
+Only profile the given absolute paths to binaries, or "all" to profile
+everything (the default).
+.br
+.TP
+.BI "--vmlinux="file
+vmlinux kernel image.
+.br
+.TP
+.BI "--no-vmlinux"
+Use this when you don't have a kernel vmlinux file, and you don't want to
+profile the kernel.
+.br
+.TP
+.BI "--verbose"
+Be verbose in the daemon log. This has a high overhead.
+.br
+.TP
+.BI "--kernel-range="start,end
+Set kernel range vma address in hexadecimal.
+
+.SH ENVIRONMENT
+No special environment variables are recognised by opreport.
+
+.SH FILES
+.TP
+.I /root/.oprofile/daemonrc
+Configuration file for opcontrol
+.TP
+.I /var/lib/oprofile/samples/
+The location of the generated sample files.
+
+.SH VERSION
+.TP
+This man page is current for @PACKAGE@-@VERSION@.
+
+.SH SEE ALSO
+.BR @OP_DOCDIR@,
+.BR oprofile(1)
diff --git a/doc/opgprof.1.in b/doc/opgprof.1.in
new file mode 100644
index 0000000..1909d54
--- /dev/null
+++ b/doc/opgprof.1.in
@@ -0,0 +1,59 @@
+.TH OPGPROF 1 "@DATE@" "oprofile @VERSION@"
+.UC 4
+.SH NAME
+opgprof \- produce gprof-format profile data
+.SH SYNOPSIS
+.br
+.B opgprof
+[
+.I options
+]
+[profile specification]
+.SH DESCRIPTION
+
+.B opgprof
+outputs gprof-format profile data for a given binary image,
+from an OProfile session.
+
+.SH OPTIONS
+.TP
+.BI "--help / -? / --usage"
+Show help message.
+.br
+.TP
+.BI "--version / -v"
+Show version.
+.br
+.TP
+.BI "--verbose / -V [options]"
+Give verbose debugging output.
+.br
+.TP
+.BI "--image-path / -p [paths]"
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+.br
+.TP
+.BI "--threshold / -t [percentage]"
+Only output data for symbols that have more than the given percentage
+of total samples.
+.br
+.TP
+.BI "--output-filename / -o [file]"
+Output to the given file instead of the default, gmon.out
+
+.SH ENVIRONMENT
+No special environment variables are recognised by opgprof.
+
+.SH FILES
+.TP
+.I /var/lib/oprofile/samples/
+The location of the generated sample files.
+
+.SH VERSION
+.TP
+This man page is current for @PACKAGE@-@VERSION@.
+
+.SH SEE ALSO
+.BR @OP_DOCDIR@,
+.BR oprofile(1)
diff --git a/doc/ophelp.1.in b/doc/ophelp.1.in
new file mode 100644
index 0000000..f8d6832
--- /dev/null
+++ b/doc/ophelp.1.in
@@ -0,0 +1,54 @@
+.TH OPHELP 1 "@DATE@" "oprofile @VERSION@"
+.UC 4
+.SH NAME
+ophelp \- list OProfile events
+.SH SYNOPSIS
+.br
+.B ophelp
+[
+.I options
+]
+[event name]
+.SH DESCRIPTION
+
+By default,
+.B ophelp
+lists the available performance counter options. If you
+give it a symbolic event name, it will return the hardware
+value (e.g. "ophelp DATA_MEM_REFS").
+
+.SH OPTIONS
+.TP
+.BI "--cpu-type / -c"
+Show the events for the given numerical CPU type.
+.br
+.TP
+.BI "--get-cpu-type / -r"
+Show the symbolic CPU name.
+.br
+.TP
+.BI "--help / -? / --usage"
+Show help message.
+.br
+.TP
+.BI "--version / -v"
+Show version.
+
+.SH ENVIRONMENT
+No special environment variables are recognised by ophelp.
+
+.SH FILES
+.TP
+.I $prefix/share/oprofile/
+Event description files used by OProfile.
+.TP
+.I /var/lib/oprofile/samples/
+The location of the generated sample files.
+
+.SH VERSION
+.TP
+This man page is current for @PACKAGE@-@VERSION@.
+
+.SH SEE ALSO
+.BR @OP_DOCDIR@,
+.BR oprofile(1)
diff --git a/doc/opreport.1.in b/doc/opreport.1.in
new file mode 100644
index 0000000..035d26c
--- /dev/null
+++ b/doc/opreport.1.in
@@ -0,0 +1,128 @@
+.TH OPREPORT 1 "@DATE@" "oprofile @VERSION@"
+.UC 4
+.SH NAME
+opreport \- produce symbol or binary image summaries
+.SH SYNOPSIS
+.br
+.B opreport
+[
+.I options
+]
+[profile specification]
+.SH DESCRIPTION
+
+.B opreport
+outputs binary image summaries, or per-symbol data, from OProfile profiling
+sessions.
+
+.SH OPTIONS
+.TP
+.BI "--accumulated / -a"
+Accumulate sample and percentage counts in the symbol list.
+.br
+.TP
+.BI "--debug-info / -g"
+Show source file and line for each symbol.
+.br
+.TP
+.BI "--demangle / -D none|smart|normal"
+none: no demangling. normal: use default demangler (default) smart: use
+pattern-matching to make C++ symbol demangling more readable.
+.br
+.TP
+.BI "--callgraph / -c"
+Show call graph information if available.
+.br
+.TP
+.BI "--details / -d"
+Show per-instruction details for all selected symbols.
+.br
+.TP
+.BI "--exclude-dependent / -x"
+Do not include application-specific images for libraries, kernel modules
+and the kernel. This option only makes sense if the profile session
+used --separate.
+.br
+.TP
+.BI "--exclude-symbols / -e [symbols]"
+Exclude all the symbols in the given comma-separated list.
+.br
+.TP
+.BI "--global-percent / -%"
+Make all percentages relative to the whole profile.
+.br
+.TP
+.BI "--help / -? / --usage"
+Show help message.
+.br
+.TP
+.BI "--image-path / -p [paths]"
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+.br
+.TP
+.BI "--include-symbols / -i [symbols]"
+Only include symbols in the given comma-separated list.
+.br
+.TP
+.BI "--long-filenames / -l"
+Output full paths instead of basenames.
+.br
+.TP
+.BI "--merge / -m [lib,cpu,tid,tgid,unitmask,all]"
+Merge any profiles separated in a --separate session.
+.br
+.TP
+.BI "--no-header"
+Don't output a header detailing profiling parameters.
+.br
+.TP
+.BI "--output-file / -o [file]"
+Output to the given file instead of stdout.
+.br
+.TP
+.BI "--reverse-sort / -r"
+Reverse the sort from the default.
+.br
+.TP
+.BI "--show-address / -w"
+Show each symbol's VMA address.
+.br
+.TP
+.BI "--sort / -s [vma,sample,symbol,debug,image]"
+Sort the list of symbols by, respectively, symbol address,
+number of samples, symbol name, debug filename and line number,
+binary image filename.
+.br
+.TP
+.BI "--symbols / -l"
+List per-symbol information instead of a binary image summary.
+.br
+.TP
+.BI "--threshold / -t [percentage]"
+Only output data for symbols that have more than the given percentage
+of total samples.
+.br
+.TP
+.BI "--verbose / -V [options]"
+Give verbose debugging output.
+.br
+.TP
+.BI "--version / -v"
+Show version.
+
+.SH ENVIRONMENT
+No special environment variables are recognised by opreport.
+
+.SH FILES
+.TP
+.I /var/lib/oprofile/samples/
+The location of the generated sample files.
+
+.SH VERSION
+.TP
+This man page is current for @PACKAGE@-@VERSION@.
+
+.SH SEE ALSO
+.BR @OP_DOCDIR@,
+.BR oprofile(1)
diff --git a/doc/oprofile.1 b/doc/oprofile.1
new file mode 100644
index 0000000..d5d7363
--- /dev/null
+++ b/doc/oprofile.1
@@ -0,0 +1,186 @@
+.TH OPROFILE 1 "Mon 18 July 2005" "oprofile 0.9.1"
+.UC 4
+.SH NAME
+oprofile \- a system-wide profiler
+.SH SYNOPSIS
+.br
+.B opcontrol
+[
+.I options
+]
+.br
+.B opreport
+[
+.I options
+]
+[ profile specification ]
+.br
+.B opannotate
+[
+.I options
+]
+[ profile specification ]
+.br
+.B oparchive
+[
+.I options
+]
+[ profile specification ]
+.br
+.B opgprof
+[
+.I options
+]
+[ profile specification ]
+.br
+.SH DESCRIPTION
+OProfile is a profiling system for systems running Linux
+2.2, 2.4, and 2.6. Profiling runs transparently in the background and profile
+data can be collected at any time. OProfile makes use of the hardware
+performance counters provided on Intel, AMD, and other processors,
+and uses a timer-interrupt based mechanism on CPUs without counters.
+OProfile can profile the whole system in high detail.
+.br
+For a gentle guide to using OProfile, please read the HTML documentation
+listed in SEE ALSO.
+.br
+.SH OPCONTROL
+.B opcontrol
+is used for starting and stopping the OProfile daemon, and providing set-up
+parameters.
+.SH OPREPORT
+.B opreport
+gives image and symbol-based profile summaries for the whole system or
+a subset of binary images.
+.SH OPANNOTATE
+.B opannotate
+can produce annotated source or mixed source and assembly output.
+.SH OPARCHIVE
+.B oparchive
+produces oprofile archive for offline analysis
+.SH OPGPROF
+.B opgprof
+can produce a gprof-format profile for a single binary.
+
+.SH PROFILE SPECIFICATIONS
+All of the post-profiling tools can take profile specifications,
+which is some combination of the following parameters. Enclosing
+part of a profile specification in curly braces { } can be used
+for differential profiles with
+.B opreport
+.
+
+.TP
+.BI "session:"sessionlist
+A comma-separated list of session names to resolve in. Absence of this
+tag, unlike all others, means "the current session", equivalent to
+specifying "session:current".
+.br
+.TP
+.BI "session-exclude:"sessionlist
+A comma-separated list of sessions to exclude.
+.br
+.TP
+.BI "image:"imagelist
+A comma-separated list of image names to resolve. Each entry may be relative
+path, glob-style name, or full path, e.g.
+opreport 'image:/usr/bin/oprofiled,*op*,./oprofpp'
+.br
+.TP
+.BI "image-exclude:"imagelist
+Same as image:, but the matching images are excluded.
+.br
+.TP
+.BI "lib-image:"imagelist
+Same as image:, but only for images that are for
+a particular primary binary image (namely, an application). This only
+makes sense to use if you're using --separate.
+This includes kernel modules and the kernel when using
+--separate=kernel.
+.br
+.TP
+.BI "lib-image-exclude:"imagelist
+Same as <option>lib-image:</option>, but the matching images
+are excluded.
+.br
+.TP
+.BI "event:"eventname
+The symbolic event name to match on, e.g. event:DATA_MEM_REFS.
+.br
+.TP
+.BI "count:"eventcount
+The event count to match on, e.g. event:DATA_MEM_REFS count:30000.
+.br
+.TP
+.BI "unit-mask:"maskvalue
+The unit mask value of the event to match on, e.g. unit-mask:1.
+.br
+.TP
+.BI "cpu:"cpulist
+Only consider profiles for the given numbered CPU (starting from zero).
+This is only useful when using CPU profile separation.
+.br
+.TP
+.BI "tgid:"pidlist
+Only consider profiles for the given task groups. Unless some program is
+using threads, the task group ID of a process is the same as its process
+ID. This option corresponds to the POSIX notion of a thread group. This
+is only useful when using per-process profile separation.
+.br
+.TP
+.BI "tid:"tidlist
+Only consider profiles for the given threads. When using recent thread
+libraries, all threads in a process share the same task group ID, but
+have different thread IDs. You can use this option in combination with
+tgid: to restrict the results to particular threads within a process.
+This is only useful when using per-process profile separation.
+
+.SH ENVIRONMENT
+No special environment variables are recognised by oprofile.
+
+.SH FILES
+.TP
+.I $HOME/.oprofile/
+Configuration files
+.TP
+.I /root/.oprofile/daemonrc
+Configuration file for opcontrol
+.TP
+.I $prefix/share/oprofile/
+Event description files used by OProfile.
+.TP
+.I /var/lib/oprofile/oprofiled.log
+The user-space daemon logfile.
+.TP
+.I /var/lib/oprofile/opdev, /var/lib/oprofile/ophashmapdev, /var/lib/oprofile/opnotedev
+The device files for communication with the Linux 2.4 kernel module. 
+.TP
+.I /dev/oprofile
+The device filesystem for communication with the Linux 2.6 kernel module. 
+.TP
+.I /var/lib/oprofile/samples/
+The location of the generated sample files.
+
+.SH VERSION
+.TP
+This man page is current for oprofile-0.9.1.
+
+.SH SEE ALSO
+.BR /usr/local/oprofile/share/doc/oprofile/,
+.BR opcontrol(1),
+.BR opreport(1),
+.BR opannotate(1),
+.BR oparchive(1),
+.BR opgprof(1),
+.BR gprof(1),
+.BR readprofile(1),
+.BR CPU vendor architecture manuals
+
+.SH COPYRIGHT
+oprofile is Copyright (C) 1998-2004 University of Manchester, UK, John Levon,
+and others.
+OProfile is released under the GNU General Public License, Version 2,
+or (at your option) any later version.
+.SH AUTHORS
+John Levon <levon@movementarian.org> is the primary author. See the documentation
+for other contributors.
diff --git a/doc/oprofile.1.in b/doc/oprofile.1.in
new file mode 100644
index 0000000..bce94a9
--- /dev/null
+++ b/doc/oprofile.1.in
@@ -0,0 +1,186 @@
+.TH OPROFILE 1 "@DATE@" "oprofile @VERSION@"
+.UC 4
+.SH NAME
+oprofile \- a system-wide profiler
+.SH SYNOPSIS
+.br
+.B opcontrol
+[
+.I options
+]
+.br
+.B opreport
+[
+.I options
+]
+[ profile specification ]
+.br
+.B opannotate
+[
+.I options
+]
+[ profile specification ]
+.br
+.B oparchive
+[
+.I options
+]
+[ profile specification ]
+.br
+.B opgprof
+[
+.I options
+]
+[ profile specification ]
+.br
+.SH DESCRIPTION
+OProfile is a profiling system for systems running Linux
+2.2, 2.4, and 2.6. Profiling runs transparently in the background and profile
+data can be collected at any time. OProfile makes use of the hardware
+performance counters provided on Intel, AMD, and other processors,
+and uses a timer-interrupt based mechanism on CPUs without counters.
+OProfile can profile the whole system in high detail.
+.br
+For a gentle guide to using OProfile, please read the HTML documentation
+listed in SEE ALSO.
+.br
+.SH OPCONTROL
+.B opcontrol
+is used for starting and stopping the OProfile daemon, and providing set-up
+parameters.
+.SH OPREPORT
+.B opreport
+gives image and symbol-based profile summaries for the whole system or
+a subset of binary images.
+.SH OPANNOTATE
+.B opannotate
+can produce annotated source or mixed source and assembly output.
+.SH OPARCHIVE
+.B oparchive
+produces oprofile archive for offline analysis
+.SH OPGPROF
+.B opgprof
+can produce a gprof-format profile for a single binary.
+
+.SH PROFILE SPECIFICATIONS
+All of the post-profiling tools can take profile specifications,
+which is some combination of the following parameters. Enclosing
+part of a profile specification in curly braces { } can be used
+for differential profiles with
+.B opreport
+.
+
+.TP
+.BI "session:"sessionlist
+A comma-separated list of session names to resolve in. Absence of this
+tag, unlike all others, means "the current session", equivalent to
+specifying "session:current".
+.br
+.TP
+.BI "session-exclude:"sessionlist
+A comma-separated list of sessions to exclude.
+.br
+.TP
+.BI "image:"imagelist
+A comma-separated list of image names to resolve. Each entry may be relative
+path, glob-style name, or full path, e.g.
+opreport 'image:/usr/bin/oprofiled,*op*,./oprofpp'
+.br
+.TP
+.BI "image-exclude:"imagelist
+Same as image:, but the matching images are excluded.
+.br
+.TP
+.BI "lib-image:"imagelist
+Same as image:, but only for images that are for
+a particular primary binary image (namely, an application). This only
+makes sense to use if you're using --separate.
+This includes kernel modules and the kernel when using
+--separate=kernel.
+.br
+.TP
+.BI "lib-image-exclude:"imagelist
+Same as <option>lib-image:</option>, but the matching images
+are excluded.
+.br
+.TP
+.BI "event:"eventname
+The symbolic event name to match on, e.g. event:DATA_MEM_REFS.
+.br
+.TP
+.BI "count:"eventcount
+The event count to match on, e.g. event:DATA_MEM_REFS count:30000.
+.br
+.TP
+.BI "unit-mask:"maskvalue
+The unit mask value of the event to match on, e.g. unit-mask:1.
+.br
+.TP
+.BI "cpu:"cpulist
+Only consider profiles for the given numbered CPU (starting from zero).
+This is only useful when using CPU profile separation.
+.br
+.TP
+.BI "tgid:"pidlist
+Only consider profiles for the given task groups. Unless some program is
+using threads, the task group ID of a process is the same as its process
+ID. This option corresponds to the POSIX notion of a thread group. This
+is only useful when using per-process profile separation.
+.br
+.TP
+.BI "tid:"tidlist
+Only consider profiles for the given threads. When using recent thread
+libraries, all threads in a process share the same task group ID, but
+have different thread IDs. You can use this option in combination with
+tgid: to restrict the results to particular threads within a process.
+This is only useful when using per-process profile separation.
+
+.SH ENVIRONMENT
+No special environment variables are recognised by oprofile.
+
+.SH FILES
+.TP
+.I $HOME/.oprofile/
+Configuration files
+.TP
+.I /root/.oprofile/daemonrc
+Configuration file for opcontrol
+.TP
+.I $prefix/share/oprofile/
+Event description files used by OProfile.
+.TP
+.I /var/lib/oprofile/oprofiled.log
+The user-space daemon logfile.
+.TP
+.I /var/lib/oprofile/opdev, /var/lib/oprofile/ophashmapdev, /var/lib/oprofile/opnotedev
+The device files for communication with the Linux 2.4 kernel module. 
+.TP
+.I /dev/oprofile
+The device filesystem for communication with the Linux 2.6 kernel module. 
+.TP
+.I /var/lib/oprofile/samples/
+The location of the generated sample files.
+
+.SH VERSION
+.TP
+This man page is current for @PACKAGE@-@VERSION@.
+
+.SH SEE ALSO
+.BR @OP_DOCDIR@,
+.BR opcontrol(1),
+.BR opreport(1),
+.BR opannotate(1),
+.BR oparchive(1),
+.BR opgprof(1),
+.BR gprof(1),
+.BR readprofile(1),
+.BR CPU vendor architecture manuals
+
+.SH COPYRIGHT
+oprofile is Copyright (C) 1998-2004 University of Manchester, UK, John Levon,
+and others.
+OProfile is released under the GNU General Public License, Version 2,
+or (at your option) any later version.
+.SH AUTHORS
+John Levon <levon@movementarian.org> is the primary author. See the documentation
+for other contributors.
diff --git a/doc/oprofile.html b/doc/oprofile.html
new file mode 100644
index 0000000..57cfd7e
--- /dev/null
+++ b/doc/oprofile.html
@@ -0,0 +1,4392 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+    <title>OProfile manual</title>
+    <meta name="generator" content="DocBook XSL Stylesheets V1.68.1" />
+  </head>
+  <body>
+    <div class="book" lang="en" xml:lang="en">
+      <div class="titlepage">
+        <div>
+          <div>
+            <h1 class="title"><a id="oprofile-guide"></a>OProfile manual</h1>
+          </div>
+          <div>
+            <div class="authorgroup">
+              <div class="author">
+                <h3 class="author"><span class="firstname">John</span> <span class="surname">Levon</span></h3>
+                <div class="affiliation">
+                  <div class="address">
+                    <p>
+                      <code class="email">&lt;<a href="mailto:levon@movementarian.org">levon@movementarian.org</a>&gt;</code>
+                    </p>
+                  </div>
+                </div>
+              </div>
+            </div>
+          </div>
+          <div>
+            <p class="copyright">Copyright © 2000-2004 Victoria University of Manchester, John Levon and others</p>
+          </div>
+        </div>
+        <hr />
+      </div>
+      <div class="toc">
+        <p>
+          <b>Table of Contents</b>
+        </p>
+        <dl>
+          <dt>
+            <span class="chapter">
+              <a href="#introduction">1. Introduction</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#applications">1. Applications of OProfile</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#requirements">2. System requirements</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#resources">3. Internet resources</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#install">4. Installation</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#uninstall">5. Uninstalling OProfile</a>
+                </span>
+              </dt>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#overview">2. Overview</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#getting-started">1. Getting started</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#tools-overview">2. Tools summary</a>
+                </span>
+              </dt>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#controlling">3. Controlling the profiler</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#controlling-daemon">1. Using <span><strong class="command">opcontrol</strong></span></a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opcontrolexamples">1.1. Examples</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#eventspec">1.2. Specifying performance counter events</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#oprofile-gui">2. Using <span><strong class="command">oprof_start</strong></span></a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#detailed-parameters">3. Configuration details</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#hardware-counters">3.1. Hardware performance counters</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#rtc">3.2. OProfile in RTC mode</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#timer">3.3. OProfile in timer interrupt mode</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#p4">3.4. Pentium 4 support</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#ia64">3.5. Intel Itanium 2 support</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#ppc64">3.6. PowerPC64 support</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#misuse">3.7. Dangerous counter settings</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#results">4. Obtaining results</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#profile-spec">1. Profile specifications</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#profile-spec-examples">1.1. Examples</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#profile-spec-details">1.2. Profile specification parameters</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#id2682936">1.3. Locating and managing binary images</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#no-results">1.4. What to do when you don't get any results</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#opreport">2. Image summaries and symbol summaries (<span><strong class="command">opreport</strong></span>)</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opreport-merging">2.1. Merging separate profiles</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opreport-comparison">2.2. Side-by-side multiple results</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opreport-callgraph">2.3. Callgraph output</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opreport-diff">2.4. Differential profiles with <span><strong class="command">opreport</strong></span></a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opreport-anon">2.5. Anonymous executable mappings</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opreport-options">2.6. Options for <span><strong class="command">opreport</strong></span></a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#opannotate">3. Outputting annotated source (<span><strong class="command">opannotate</strong></span>)</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opannotate-finding-source">3.1. Locating source files</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opannotate-details">3.2. Usage of <span><strong class="command">opannotate</strong></span></a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#opgprof">4. <span><strong class="command">gprof</strong></span>-compatible output (<span><strong class="command">opgprof</strong></span>)</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#opgprof-details">4.1. Usage of <span><strong class="command">opgprof</strong></span></a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#oparchive">5. Archiving measurements (<span><strong class="command">oparchive</strong></span>)</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#oparchive-details">5.1. Usage of <span><strong class="command">oparchive</strong></span></a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#interpreting">5. Interpreting profiling results</a>
+            </span>
+          </dt>
+          <dd>
+            <dl>
+              <dt>
+                <span class="sect1">
+                  <a href="#irq-latency">1. Profiling interrupt latency</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#kernel-profiling">2. Kernel profiling</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#irq-masking">2.1. Interrupt masking</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#idle">2.2. Idle time</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#kernel-modules">2.3. Profiling kernel modules</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#interpreting-callgraph">3. Interpreting call-graph profiles</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#debug-info">4. Inaccuracies in annotated source</a>
+                </span>
+              </dt>
+              <dd>
+                <dl>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#effect-of-optimizations">4.1. Side effects of optimizations</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#prologues">4.2. Prologues and epilogues</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#inlined-function">4.3. Inlined functions</a>
+                    </span>
+                  </dt>
+                  <dt>
+                    <span class="sect2">
+                      <a href="#wrong-linenr-info">4.4. Inaccuracy in line number information</a>
+                    </span>
+                  </dt>
+                </dl>
+              </dd>
+              <dt>
+                <span class="sect1">
+                  <a href="#symbol-without-debug-info">5. Assembly functions</a>
+                </span>
+              </dt>
+              <dt>
+                <span class="sect1">
+                  <a href="#hidden-cost">6. Other discrepancies</a>
+                </span>
+              </dt>
+            </dl>
+          </dd>
+          <dt>
+            <span class="chapter">
+              <a href="#ack">6. Acknowledgments</a>
+            </span>
+          </dt>
+        </dl>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="introduction"></a>Chapter 1. Introduction</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#applications">1. Applications of OProfile</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#requirements">2. System requirements</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#resources">3. Internet resources</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#install">4. Installation</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#uninstall">5. Uninstalling OProfile</a>
+              </span>
+            </dt>
+          </dl>
+        </div>
+        <p>
+This manual applies to OProfile version 0.9.1cvs.
+OProfile is a profiling system for Linux 2.2/2.4/2.6 systems on a number of architectures. It is capable of profiling
+all parts of a running system, from the kernel (including modules and interrupt handlers) to shared libraries
+to binaries. It runs transparently in the background collecting information at a low overhead. These
+features make it ideal for profiling entire systems to determine bottle necks in real-world systems.
+</p>
+        <p>
+Many CPUs provide "performance counters", hardware registers that can count "events"; for example,
+cache misses, or CPU cycles. OProfile provides profiles of code based on the number of these occurring events:
+repeatedly, every time a certain (configurable) number of events has occurred, the PC value is recorded.
+This information is aggregated into profiles for each binary image.</p>
+        <p>
+Some hardware setups do not allow OProfile to use performance counters: in these cases, no
+events are available, and OProfile operates in timer/RTC mode, as described in later chapters.
+</p>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="applications"></a>1. Applications of OProfile</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+OProfile is useful in a number of situations. You might want to use OProfile when you :
+</p>
+          <div class="itemizedlist">
+            <ul type="disc">
+              <li>
+                <p>need low overhead</p>
+              </li>
+              <li>
+                <p>cannot use highly intrusive profiling methods</p>
+              </li>
+              <li>
+                <p>need to profile interrupt handlers</p>
+              </li>
+              <li>
+                <p>need to profile an application and its shared libraries</p>
+              </li>
+              <li>
+                <p>need to capture the performance behaviour of entire system</p>
+              </li>
+              <li>
+                <p>want to examine hardware effects such as cache misses</p>
+              </li>
+              <li>
+                <p>want detailed source annotation</p>
+              </li>
+              <li>
+                <p>want instruction-level profiles</p>
+              </li>
+              <li>
+                <p>want call-graph profiles</p>
+              </li>
+            </ul>
+          </div>
+          <p>
+OProfile is not a panacea. OProfile might not be a complete solution when you :
+</p>
+          <div class="itemizedlist">
+            <ul type="disc">
+              <li>
+                <p>require call graph profiles on platforms other than 2.6/x86</p>
+              </li>
+              <li>
+                <p>don't have root permissions</p>
+              </li>
+              <li>
+                <p>require 100% instruction-accurate profiles</p>
+              </li>
+              <li>
+                <p>need function call counts or an interstitial profiling API</p>
+              </li>
+              <li>
+                <p>cannot tolerate any disturbance to the system whatsoever</p>
+              </li>
+              <li>
+                <p>need to profile interpreted or dynamically compiled code such as Java or Python</p>
+              </li>
+            </ul>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="requirements"></a>2. System requirements</h2>
+              </div>
+            </div>
+          </div>
+          <div class="variablelist">
+            <dl>
+              <dt>
+                <span class="term">Linux kernel 2.2/2.4/2.6</span>
+              </dt>
+              <dd>
+                <p>
+			OProfile uses a kernel module that can be compiled for
+			2.2.11 or later and 2.4. Versions 2.4.10 or above are recommended, and required if you use the
+			boot-time kernel option <code class="option">nosmp</code>.  2.6 kernels are supported with the in-kernel
+			OProfile driver. Note that only 32-bit x86 and IA64 are supported on 2.2/2.4 kernels.
+
+			</p>
+                <p>
+			PPC64 processors (Power4/Power5/PPC970) require a recent (&gt; 2.6.5) kernel with the line 
+			<code class="constant">#define PV_970</code> present in <code class="filename">include/asm-ppc64/processor.h</code>.
+
+		</p>
+              </dd>
+              <dt>
+                <span class="term">modutils 2.4.6 or above</span>
+              </dt>
+              <dd>
+                <p>
+			You should have installed modutils 2.4.6 or higher (in fact earlier versions work well in almost all
+			cases).
+		</p>
+              </dd>
+              <dt>
+                <span class="term">Supported architecture</span>
+              </dt>
+              <dd>
+                <p>
+			For Intel IA32, a CPU with either a P6 generation or Pentium 4 core is
+			required. In marketing terms this translates to anything
+			between an Intel Pentium Pro (not Pentium Classics) and
+			a Pentium 4 / Xeon, including all Celerons.  The AMD
+			Athlon, and Duron CPUs are also supported.  Other IA32
+			CPU types only support the RTC mode of OProfile; please
+			see later in this manual for details.  Hyper-threaded Pentium IVs
+			are not supported in 2.4. For 2.4 kernels, the Intel
+			IA-64 CPUs are also supported. For 2.6 kernels, there is additionally
+			support for Alpha processors, MIPS, ARM, x86-64, sparc64, ppc64, and,
+			in timer mode, PA-RISC and s390.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">Uniprocessor or SMP</span>
+              </dt>
+              <dd>
+                <p>
+			SMP machines are fully supported.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">Required libraries</span>
+              </dt>
+              <dd>
+                <p>
+			These libraries are required : <code class="filename">popt</code>, <code class="filename">bfd</code>,
+			<code class="filename">liberty</code> (debian users: libiberty is provided in binutils-dev package), <code class="filename">dl</code>,
+			plus the standard C++ libraries.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">Bash version 2</span>
+              </dt>
+              <dd>
+                <p>
+			The <span><strong class="command">opcontrol</strong></span> script requires bash version 2 at least to be installed
+			as <code class="filename">/bin/bash</code> or <code class="filename">/bin/bash2</code>
+		</p>
+              </dd>
+              <dt>
+                <span class="term">OProfile GUI</span>
+              </dt>
+              <dd>
+                <p>
+			The use of the GUI to start the profiler requires the <code class="filename">Qt 2</code> library. <code class="filename">Qt 3</code> should
+			also work.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <span class="acronym">ELF</span>
+                </span>
+              </dt>
+              <dd>
+                <p>
+			Probably not too strenuous a requirement, but older <span class="acronym">A.OUT</span> binaries/libraries are not supported.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">K&amp;R coding style</span>
+              </dt>
+              <dd>
+                <p>
+			OK, so it's not really a requirement, but I wish it was...
+		</p>
+              </dd>
+            </dl>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="resources"></a>3. Internet resources</h2>
+              </div>
+            </div>
+          </div>
+          <div class="variablelist">
+            <dl>
+              <dt>
+                <span class="term">Web page</span>
+              </dt>
+              <dd>
+                <p>
+			There is a web page (which you may be reading now) at
+			<a href="http://oprofile.sf.net/">http://oprofile.sf.net/</a>.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">Download</span>
+              </dt>
+              <dd>
+                <p>
+			You can download a source tarball or get anonymous CVS at the sourceforge page,
+			<a href="http://sf.net/projects/oprofile/">http://sf.net/projects/oprofile/</a>.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">Mailing list</span>
+              </dt>
+              <dd>
+                <p>
+			There is a low-traffic OProfile-specific mailing list, details at
+			<a href="http://sf.net/mail/?group_id=16191">http://sf.net/mail/?group_id=16191</a>.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">Bug tracker</span>
+              </dt>
+              <dd>
+                <p>
+			There is a bug tracker for OProfile at SourceForge,
+			<a href="http://sf.net/tracker/?group_id=16191&amp;atid=116191">http://sf.net/tracker/?group_id=16191&amp;atid=116191</a>.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">IRC channel</span>
+              </dt>
+              <dd>
+                <p>
+			Several OProfile developers and users sometimes hang out on channel <span><strong class="command">#oprofile</strong></span>
+			on the <a href="http://freenode.info">freenode</a> network. 
+		</p>
+              </dd>
+            </dl>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="install"></a>4. Installation</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+First you need to build OProfile and install it. <span><strong class="command">./configure</strong></span>, <span><strong class="command">make</strong></span>, <span><strong class="command">make install</strong></span>
+is often all you need, but note these arguments to <span><strong class="command">./configure</strong></span> :
+</p>
+          <div class="variablelist">
+            <dl>
+              <dt>
+                <span class="term">
+                  <code class="option">--with-linux</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+			Use this option to specify the location of the kernel source tree you wish
+			to compile against. The kernel module is built against this source and
+			will only work with a running kernel built from the same source with
+			exact same options, so it is important you specify this option if you need
+			to.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--with-kernel-support</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+			Use this option with 2.6 and above kernels to indicate the 
+	    		kernel provides the OProfile device driver.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--with-qt-dir/includes/libraries</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+			Specify the location of Qt headers and libraries. It defaults to searching in
+			<code class="constant">$QTDIR</code> if these are not specified.
+		</p>
+              </dd>
+              <dt>
+                <a id="enable-abi"></a>
+                <span class="term">
+                  <code class="option">--enable-abi</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+			Activate code within the OProfile sample collection daemon
+			<span><strong class="command">oprofiled</strong></span> which records information about the binary
+			format of sample files in <code class="filename">/var/lib/oprofile/abi</code>, to
+			permit their transport between hosts using the
+			<span><strong class="command">opimport</strong></span> utility. See <a href="#opimport"><code class="filename">opimport</code></a>. This option is primarily intended for embedded
+			systems or remote analysis of production machines; if you will be
+			performing all sample analysis on the same machine as you are profiling,
+			it is safe to omit this option.
+		</p>
+              </dd>
+              <dt>
+                <a id="disable-werror"></a>
+                <span class="term">
+                  <code class="option">--disable-werror</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+			Development versions of OProfile build by
+			default with <code class="option">-Werror</code>. This option turns
+			<code class="option">-Werror</code> off.
+		</p>
+              </dd>
+              <dt>
+                <a id="disable-optimization"></a>
+                <span class="term">
+                  <code class="option">--disable-optimization</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+			Disable the <code class="option">-O2</code> compiler flag
+			(useful if you discover an OProfile bug and want to give a useful
+			back-trace etc.)
+		</p>
+              </dd>
+            </dl>
+          </div>
+          <p>
+You'll need to have a configured kernel source for the current kernel
+to build the module for 2.4 kernels.  Since all distributions provide different kernels it's unlikely the running kernel match the configured source
+you installed. The safest way is to recompile your own kernel, run it and compile oprofile. It is also recommended that if you have a
+uniprocessor machine, you enable the local APIC / IO_APIC support for
+your kernel (this is automatically enabled for SMP kernels). With many BIOS, kernel &gt;= 2.6.9 and UP kernel it's not sufficient to enable the local APIC you must also turn it on explicitely at boot time by providing "lapic" option to the kernel. On
+machines with power management, such as laptops, the power management
+must be turned off when using OProfile with 2.4 kernels. The power management software
+in the BIOS cannot handle the non-maskable interrupts (NMIs) used by
+OProfile for data collection. If you use the NMI watchdog, be aware that
+the watchdog is disabled when profiling starts, and not re-enabled until the
+OProfile module is removed (or, in 2.6, when OProfile is not running). If you compile OProfile for
+a 2.2 kernel you must be root to compile the module. If you are using
+2.6 kernels or higher, you do not need kernel source, as long as the
+OProfile driver is enabled; additionally, you should not need to disable
+power management.
+</p>
+          <p>
+Please note that you must save or have available the <code class="filename">vmlinux</code> file
+generated during a kernel compile, as OProfile needs it (you can use
+<code class="option">--no-vmlinux</code>, but this will prevent kernel profiling).
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="uninstall"></a>5. Uninstalling OProfile</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+You must have the source tree available to uninstall OProfile; a <span><strong class="command">make uninstall</strong></span> will
+remove all installed files except your configuration file in the directory <code class="filename">~/.oprofile</code>.
+</p>
+        </div>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="overview"></a>Chapter 2. Overview</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#getting-started">1. Getting started</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#tools-overview">2. Tools summary</a>
+              </span>
+            </dt>
+          </dl>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="getting-started"></a>1. Getting started</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+Before you can use OProfile, you must set it up. The minimum setup required for this
+is to tell OProfile where the <code class="filename">vmlinux</code> file corresponding to the
+running kernel is, for example :
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">opcontrol --vmlinux=/boot/vmlinux-`uname -r`</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+If you don't want to profile the kernel itself,
+you can tell OProfile you don't have a <code class="filename">vmlinux</code> file :
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">opcontrol --no-vmlinux</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+Now we are ready to start the daemon (<span><strong class="command">oprofiled</strong></span>) which collects
+the profile data :
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">opcontrol --start</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+When I want to stop profiling, I can do so with :
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">opcontrol --shutdown</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+Note that unlike <span><strong class="command">gprof</strong></span>, no instrumentation (<code class="option">-pg</code>
+and <code class="option">-a</code> options to <span><strong class="command">gcc</strong></span>)
+is necessary.
+</p>
+          <p>
+Periodically (or on <span><strong class="command">opcontrol --shutdown</strong></span> or <span><strong class="command">opcontrol --dump</strong></span>)
+the profile data is written out into the <code class="filename">/var/lib/oprofile/samples</code> directory.
+These profile files cover shared libraries, applications, the kernel (vmlinux), and kernel modules.
+You can clear the profile data (at any time) with <span><strong class="command">opcontrol --reset</strong></span>.
+</p>
+          <p>
+You can get summaries of this data in a number of ways at any time. To get a summary of
+data across the entire system for all of these profiles, you can do :
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">opreport</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+Or to get a more detailed summary, for a particular image, you can do something like :
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">opreport -l /boot/vmlinux-`uname -r`</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+There are also a number of other ways of presenting the data, as described later in this manual.
+Note that OProfile will choose a default profiling setup for you. However, there are a number
+of options you can pass to <span><strong class="command">opcontrol</strong></span> if you need to change something,
+also detailed later.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="tools-overview"></a>2. Tools summary</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+This section gives a brief description of the available OProfile utilities and their purpose.
+</p>
+          <div class="variablelist">
+            <dl>
+              <dt>
+                <span class="term">
+                  <code class="filename">ophelp</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This utility lists the available events and short descriptions.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">opcontrol</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Used for controlling the OProfile data collection, discussed in <a href="#controlling" title="Chapter 3. Controlling the profiler">Chapter 3, <i>Controlling the profiler</i></a>.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">opreport</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This is the main tool for retrieving useful profile data, described in
+		<a href="#opreport" title="2. Image summaries and symbol summaries (opreport)">Section 2, &#8220;Image summaries and symbol summaries (<span><strong class="command">opreport</strong></span>)&#8221;</a>.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">opannotate</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This utility can be used to produce annotated source, assembly or mixed source/assembly.
+		Source level annotation is available only if the application was compiled with 
+		debugging symbols. See <a href="#opannotate" title="3. Outputting annotated source (opannotate)">Section 3, &#8220;Outputting annotated source (<span><strong class="command">opannotate</strong></span>)&#8221;</a>.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">opgprof</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This utility can output gprof-style data files for a binary, for use with
+		<span><strong class="command">gprof -p</strong></span>. See <a href="#opgprof" title="4. gprof-compatible output (opgprof)">Section 4, &#8220;<span><strong class="command">gprof</strong></span>-compatible output (<span><strong class="command">opgprof</strong></span>)&#8221;</a>.
+	</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="filename">oparchive</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This utility can be used to collect executables, debuginfo,
+		and sample files and copy the files into an archive.
+		The archive is self-contained and can be moved to another
+		machine for further analysis.
+		See <a href="#oparchive" title="5. Archiving measurements (oparchive)">Section 5, &#8220;Archiving measurements (<span><strong class="command">oparchive</strong></span>)&#8221;</a>.
+	</p>
+              </dd>
+              <dt>
+                <a id="opimport"></a>
+                <span class="term">
+                  <code class="filename">opimport</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		This utility converts sample database files from a foreign binary format (abi) to
+		the native format. This is useful only when moving sample files between hosts,
+		for analysis on platforms other than the one used for collection. The abi format
+		of the file to be imported is described in a text file located in
+		<code class="filename">/var/lib/oprofile/abi</code>, if the <code class="option">--enable-abi</code>
+		configure-time option was enabled. Furthermore, the <span><strong class="command">opimport</strong></span>
+		tool is not built unless <code class="option">--enable-abi</code> is given. See <a href="#enable-abi"><code class="option">--enable-abi</code></a>.
+	</p>
+              </dd>
+            </dl>
+          </div>
+        </div>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="controlling"></a>Chapter 3. Controlling the profiler</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#controlling-daemon">1. Using <span><strong class="command">opcontrol</strong></span></a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opcontrolexamples">1.1. Examples</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#eventspec">1.2. Specifying performance counter events</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#oprofile-gui">2. Using <span><strong class="command">oprof_start</strong></span></a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#detailed-parameters">3. Configuration details</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#hardware-counters">3.1. Hardware performance counters</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#rtc">3.2. OProfile in RTC mode</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#timer">3.3. OProfile in timer interrupt mode</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#p4">3.4. Pentium 4 support</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#ia64">3.5. Intel Itanium 2 support</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#ppc64">3.6. PowerPC64 support</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#misuse">3.7. Dangerous counter settings</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+          </dl>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="controlling-daemon"></a>1. Using <span><strong class="command">opcontrol</strong></span></h2>
+              </div>
+            </div>
+          </div>
+          <p>
+In this section we describe the configuration and control of the profiling system
+with opcontrol in more depth.
+The <span><strong class="command">opcontrol</strong></span> script has a default setup, but you
+can alter this with the options given below. In particular,
+if your hardware supports performance counters, you can configure them.
+There are a number of counters (for example, counter 0 and counter 1
+on the Pentium III). Each of these counters can be programmed with
+an event to count, such as cache misses or MMX operations. The event
+chosen for each counter is reflected in the profile data collected
+by OProfile: functions and binaries at the top of the profiles reflect
+that most of the chosen events happened within that code.
+</p>
+          <p>
+Additionally, each counter has a "count" value: this corresponds to how
+detailed the profile is. The lower the value, the more frequently profile
+samples are taken. A counter can choose to sample only kernel code, user-space code,
+or both (both is the default). Finally, some events have a "unit mask"
+- this is a value that further restricts the types of event that are counted. 
+The event types and unit masks for your CPU are listed by <span><strong class="command">opcontrol
+--list-events</strong></span>.
+</p>
+          <p>
+The <span><strong class="command">opcontrol</strong></span> script provides the following actions :
+</p>
+          <div class="variablelist">
+            <dl>
+              <dt>
+                <span class="term">
+                  <code class="option">--init</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Loads the OProfile module if required and makes the OProfile driver
+		interface available.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--setup</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		    Followed by list arguments for profiling set up. List of arguments
+		    saved in <code class="filename">/root/.oprofile/daemonrc</code>.
+		    Giving this option is not necessary; you can just directly pass one
+		    of the setup options, e.g. <span><strong class="command">opcontrol --no-vmlinux</strong></span>.
+		  </p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--status</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Show configuration information.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--start-daemon</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		    Start the oprofile daemon without starting actual profiling. The profiling
+		can then be started using <code class="option">--start</code>. This is useful for avoiding
+		measuring the cost of daemon startup, as <code class="option">--start</code> is a simple
+		write to a file in oprofilefs. Not available in 2.2/2.4 kernels.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--start</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		    Start data collection with either arguments provided by <code class="option">--setup</code>
+		or information saved in <code class="filename">/root/.oprofile/daemonrc</code>. Specifying
+		the addition <code class="option">--verbose</code> makes the daemon generate lots of debug data
+		whilst it is running.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--dump</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		    Force a flush of the collected profiling data to the daemon.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--stop</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		    Stop data collection (this separate step is not possible with 2.2 or 2.4 kernels).
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--shutdown</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		    Stop data collection and kill the daemon.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--reset</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		    Clears out data from current session, but leaves saved sessions.
+		</p>
+              </dd>
+              <dt>
+                <span class="term"><code class="option">--save=</code>session_name</span>
+              </dt>
+              <dd>
+                <p>
+		    Save data from current session to session_name.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--deinit</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+                Shuts down daemon. Unload the OProfile module and oprofilefs.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--list-events</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		    List event types and unit masks.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--help</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		    Generate usage messages.
+		</p>
+              </dd>
+            </dl>
+          </div>
+          <p>
+There are a number of possible settings, of which, only
+<code class="option">--vmlinux</code> (or <code class="option">--no-vmlinux</code>)
+is required. These settings are stored in <code class="filename">~/.oprofile/daemonrc</code>.
+</p>
+          <div class="variablelist">
+            <dl>
+              <dt>
+                <span class="term"><code class="option">--buffer-size=</code>num</span>
+              </dt>
+              <dd>
+                <p>
+		Number of samples in kernel buffer.
+		</p>
+              </dd>
+              <dt>
+                <span class="term"><code class="option">--cpu-buffer-size=</code>num</span>
+              </dt>
+              <dd>
+                <p>
+		Number of samples in kernel per-cpu buffer (2.6 only). If you
+		profile at high rate it can help to increase this if the log
+		file show excessive count of sample lost cpu buffer overflow. 
+		</p>
+              </dd>
+              <dt>
+                <span class="term"><code class="option">--event=</code>[eventspec]</span>
+              </dt>
+              <dd>
+                <p>
+		Use the given performance counter event to profile.
+		See <a href="#eventspec" title="1.2. Specifying performance counter events">Section 1.2, &#8220;Specifying performance counter events&#8221;</a> below.
+		</p>
+              </dd>
+              <dt>
+                <span class="term"><code class="option">--separate=</code>[none,lib,kernel,thread,cpu,all]</span>
+              </dt>
+              <dd>
+                <p>
+		By default, every profile is stored in a single file. Thus, for example,
+		samples in the C library are all accredited to the <code class="filename">/lib/libc.o</code>
+		profile. However, you choose to create separate sample files by specifying
+		one of the below options.
+		</p>
+                <div class="informaltable">
+                  <table border="1">
+                    <colgroup>
+                      <col />
+                      <col />
+                    </colgroup>
+                    <tbody>
+                      <tr>
+                        <td>
+                          <code class="option">none</code>
+                        </td>
+                        <td>No profile separation (default)</td>
+                      </tr>
+                      <tr>
+                        <td>
+                          <code class="option">lib</code>
+                        </td>
+                        <td>Create per-application profiles for libraries</td>
+                      </tr>
+                      <tr>
+                        <td>
+                          <code class="option">kernel</code>
+                        </td>
+                        <td>Create per-application profiles for the kernel and kernel modules</td>
+                      </tr>
+                      <tr>
+                        <td>
+                          <code class="option">thread</code>
+                        </td>
+                        <td>Create profiles for each thread and each task</td>
+                      </tr>
+                      <tr>
+                        <td>
+                          <code class="option">cpu</code>
+                        </td>
+                        <td>Create profiles for each CPU</td>
+                      </tr>
+                      <tr>
+                        <td>
+                          <code class="option">all</code>
+                        </td>
+                        <td>All of the above options</td>
+                      </tr>
+                    </tbody>
+                  </table>
+                </div>
+                <p>
+		Note  that <code class="option">--separate=kernel</code> also turns on <code class="option">--separate=lib</code>.
+		
+		When using <code class="option">--separate=kernel</code>, samples in hardware interrupts, soft-irqs, or other
+		asynchronous kernel contexts are credited to the task currently running. This means you will see
+		seemingly nonsense profiles such as <code class="filename">/bin/bash</code> showing samples for the PPP modules,
+		etc.
+		</p>
+                <p>
+		On 2.2/2.4 only kernel threads already started when profiling begins are correctly profiled;
+		newly started kernel thread samples are credited to the vmlinux (kernel) profile.
+		</p>
+                <p>
+		Using <code class="option">--separate=thread</code> creates a lot
+		of sample files if you leave OProfile running for a while; it's most
+		useful when used for short sessions, or when using image filtering.
+		</p>
+              </dd>
+              <dt>
+                <span class="term"><code class="option">--callgraph=</code>#depth</span>
+              </dt>
+              <dd>
+                <p>
+		Enable call-graph sample collection with a maximum depth. Use 0 to disable
+		callgraph profiling. Currently this requires a recent
+		2.6 kernel, and x86.
+		</p>
+              </dd>
+              <dt>
+                <span class="term"><code class="option">--image=</code>image,[images]|"all"</span>
+              </dt>
+              <dd>
+                <p>
+		Image filtering. If you specify one or more absolute
+		paths to binaries, OProfile will only produce profile results for those
+		binary images. This is useful for restricting the sometimes voluminous
+		output you may get otherwise, especially with
+		<code class="option">--separate=thread</code>. Note that if you are using
+		<code class="option">--separate=lib</code> or
+		<code class="option">--separate=kernel</code>, then if you specification an
+		application binary, the shared libraries and kernel code
+		<span class="emphasis"><em>are</em></span> included. Specify the value
+		"all" to profile everything (the default).
+		</p>
+              </dd>
+              <dt>
+                <span class="term"><code class="option">--vmlinux=</code>file</span>
+              </dt>
+              <dd>
+                <p>
+		vmlinux kernel image.
+		</p>
+              </dd>
+              <dt>
+                <span class="term">
+                  <code class="option">--no-vmlinux</code>
+                </span>
+              </dt>
+              <dd>
+                <p>
+		Use this when you don't have a kernel vmlinux file, and you don't want
+		to profile the kernel. This still counts the total number of kernel samples,
+		but can't give symbol-based results for the kernel or any modules.
+		</p>
+              </dd>
+            </dl>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="opcontrolexamples"></a>1.1. Examples</h3>
+                </div>
+              </div>
+            </div>
+            <div class="sect3" lang="en" xml:lang="en">
+              <div class="titlepage">
+                <div>
+                  <div>
+                    <h4 class="title"><a id="examplesperfctr"></a>1.1.1. Intel performance counter setup</h4>
+                  </div>
+                </div>
+              </div>
+              <p>
+Here, we have a Pentium III running at 800MHz, and we want to look at where data memory
+references are happening most, and also get results for CPU time.
+</p>
+              <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+                <tr>
+                  <td>
+                    <pre class="screen">
+# opcontrol --event=CPU_CLK_UNHALTED:400000 --event=DATA_MEM_REFS:10000
+# opcontrol --vmlinux=/boot/2.6.0/vmlinux
+# opcontrol --start
+</pre>
+                  </td>
+                </tr>
+              </table>
+            </div>
+            <div class="sect3" lang="en" xml:lang="en">
+              <div class="titlepage">
+                <div>
+                  <div>
+                    <h4 class="title"><a id="examplesrtc"></a>1.1.2. RTC mode</h4>
+                  </div>
+                </div>
+              </div>
+              <p>
+Here, we have an Intel laptop without support for performance counters, running on 2.4 kernels.
+</p>
+              <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+                <tr>
+                  <td>
+                    <pre class="screen">
+# ophelp -r
+CPU with RTC device
+# opcontrol --vmlinux=/boot/2.4.13/vmlinux --event=RTC_INTERRUPTS:1024
+# opcontrol --start
+</pre>
+                  </td>
+                </tr>
+              </table>
+            </div>
+            <div class="sect3" lang="en" xml:lang="en">
+              <div class="titlepage">
+                <div>
+                  <div>
+                    <h4 class="title"><a id="examplesstartdaemon"></a>1.1.3. Starting the daemon separately</h4>
+                  </div>
+                </div>
+              </div>
+              <p>
+If we're running 2.6 kernels, we can use <code class="option">--start-daemon</code> to avoid
+the profiler startup affecting results.
+</p>
+              <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+                <tr>
+                  <td>
+                    <pre class="screen">
+# opcontrol --vmlinux=/boot/2.6.0/vmlinux
+# opcontrol --start-daemon
+# my_favourite_benchmark --init
+# opcontrol --start ; my_favourite_benchmark --run ; opcontrol --stop
+</pre>
+                  </td>
+                </tr>
+              </table>
+            </div>
+            <div class="sect3" lang="en" xml:lang="en">
+              <div class="titlepage">
+                <div>
+                  <div>
+                    <h4 class="title"><a id="exampleseparate"></a>1.1.4. Separate profiles for libraries and the kernel</h4>
+                  </div>
+                </div>
+              </div>
+              <p>
+Here, we want to see a profile of the OProfile daemon itself, including when
+it was running inside the kernel driver, and its use of shared libraries.
+</p>
+              <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+                <tr>
+                  <td>
+                    <pre class="screen">
+# opcontrol --separate=kernel --vmlinux=/boot/2.6.0/vmlinux
+# opcontrol --start
+# my_favourite_stress_test --run
+# opreport -l -p /lib/modules/2.6.0/kernel /usr/local/bin/oprofiled
+</pre>
+                  </td>
+                </tr>
+              </table>
+            </div>
+            <div class="sect3" lang="en" xml:lang="en">
+              <div class="titlepage">
+                <div>
+                  <div>
+                    <h4 class="title"><a id="examplessessions"></a>1.1.5. Profiling sessions</h4>
+                  </div>
+                </div>
+              </div>
+              <p>
+It can often be useful to split up profiling data into several different
+time periods. For example, you may want to collect data on an application's
+startup separately from the normal runtime data. You can use the simple
+command <span><strong class="command">opcontrol --save</strong></span> to do this. For example :
+</p>
+              <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+                <tr>
+                  <td>
+                    <pre class="screen">
+# opcontrol --save=blah
+</pre>
+                  </td>
+                </tr>
+              </table>
+              <p>
+will create a sub-directory in <code class="filename">/var/lib/oprofile/samples</code> containing the samples
+up to that point (the current session's sample files are moved into this
+directory). You can then pass this session name as a parameter to the post-profiling
+analysis tools, to only get data up to the point you named the
+session. If you do not want to save a session, you can do
+<span><strong class="command">rm -rf /var/lib/oprofile/samples/sessionname</strong></span> or, for the
+current session, <span><strong class="command">opcontrol --reset</strong></span>.
+</p>
+            </div>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="eventspec"></a>1.2. Specifying performance counter events</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The <code class="option">--event</code> option to <span><strong class="command">opcontrol</strong></span>
+takes a specification that indicates how the details of each
+hardware performance counter should be setup. If you want to
+revert to OProfile's default setting (<code class="option">--event</code>
+is strictly optional), use <code class="option">--event=default</code>.
+</p>
+            <p>
+You can pass multiple event specifications. OProfile will allocate
+hardware counters as necessary. Note that some combinations are not
+allowed by the CPU; running <span><strong class="command">opcontrol --list-events</strong></span> gives the details
+of each event. The event specification is a colon-separated string
+of the form <code class="option"><span class="emphasis"><em>name</em></span>:<span class="emphasis"><em>count</em></span>:<span class="emphasis"><em>unitmask</em></span>:<span class="emphasis"><em>kernel</em></span>:<span class="emphasis"><em>user</em></span></code> as described in this table:
+</p>
+            <div class="note" style="margin-left: 0.5in; margin-right: 0.5in;">
+              <h3 class="title">Note</h3>
+              <p>
+For the PowerPC platforms, all events specified must be in the same group; i.e., the group number
+appended to the event name (e.g. <code class="constant">&lt;<span class="emphasis"><em>some-event-name</em></span>&gt;_GRP9</code>) must be the same.
+</p>
+            </div>
+            <p>
+</p>
+            <div class="informaltable">
+              <table border="1">
+                <colgroup>
+                  <col />
+                  <col />
+                </colgroup>
+                <tbody>
+                  <tr>
+                    <td>
+                      <code class="option">name</code>
+                    </td>
+                    <td>The symbolic event name, e.g. <code class="constant">CPU_CLK_UNHALTED</code></td>
+                  </tr>
+                  <tr>
+                    <td>
+                      <code class="option">count</code>
+                    </td>
+                    <td>The counter reset value, e.g. 100000</td>
+                  </tr>
+                  <tr>
+                    <td>
+                      <code class="option">unitmask</code>
+                    </td>
+                    <td>The unit mask, as given in the events list, e.g. 0x0f</td>
+                  </tr>
+                  <tr>
+                    <td>
+                      <code class="option">kernel</code>
+                    </td>
+                    <td>Whether to profile kernel code</td>
+                  </tr>
+                  <tr>
+                    <td>
+                      <code class="option">user</code>
+                    </td>
+                    <td>Whether to profile userspace code</td>
+                  </tr>
+                </tbody>
+              </table>
+            </div>
+            <p>
+The last three values are optional, if you omit them (e.g. <code class="option">--event=DATA_MEM_REFS:30000</code>),
+they will be set to the default values (a unit mask of 0, and profiling both kernel and
+userspace code). Note that some events require a unit mask.
+</p>
+            <p>
+If OProfile is using RTC mode, and you want to alter the default counter value,
+you can use something like <code class="option">--event=RTC_INTERRUPTS:2048</code>. Note the last
+three values here are ignored.
+If OProfile is using timer-interrupt mode, there is no configuration possible.
+</p>
+            <p>
+The table below lists the events selected by default
+(<code class="option">--event=default</code>) for the various computer architectures:
+</p>
+            <div class="informaltable">
+              <table border="1">
+                <colgroup>
+                  <col />
+                  <col />
+                  <col />
+                </colgroup>
+                <tbody>
+                  <tr>
+                    <td>Processor</td>
+                    <td>cpu_type</td>
+                    <td>Default event</td>
+                  </tr>
+                  <tr>
+                    <td>Alpha EV4</td>
+                    <td>alpha/ev4</td>
+                    <td>CYCLES:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Alpha EV5</td>
+                    <td>alpha/ev5</td>
+                    <td>CYCLES:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Alpha PCA56</td>
+                    <td>alpha/pca56</td>
+                    <td>CYCLES:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Alpha EV6</td>
+                    <td>alpha/ev6</td>
+                    <td>CYCLES:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Alpha EV67</td>
+                    <td>alpha/ev67</td>
+                    <td>CYCLES:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>ARM/XScale PMU1</td>
+                    <td>arm/xscale1</td>
+                    <td>CPU_CYCLES:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>ARM/XScale PMU2</td>
+                    <td>arm/xscale2</td>
+                    <td>CPU_CYCLES:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Athlon</td>
+                    <td>i386/athlon</td>
+                    <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Pentium Pro</td>
+                    <td>i386/ppro</td>
+                    <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Pentium II</td>
+                    <td>i386/pii</td>
+                    <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Pentium III</td>
+                    <td>i386/piii</td>
+                    <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Pentium M (P6 core)</td>
+                    <td>i386/p6_mobile</td>
+                    <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Pentium 4 (non-HT)</td>
+                    <td>i386/p4</td>
+                    <td>GLOBAL_POWER_EVENTS:100000:1:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Pentium 4 (HT)</td>
+                    <td>i386/p4-ht</td>
+                    <td>GLOBAL_POWER_EVENTS:100000:1:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Hammer</td>
+                    <td>x86-64/hammer</td>
+                    <td>CPU_CLK_UNHALTED:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Itanium</td>
+                    <td>ia64/itanium</td>
+                    <td>CPU_CYCLES:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>Itanium 2</td>
+                    <td>ia64/itanium2</td>
+                    <td>CPU_CYCLES:100000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>TIMER_INT</td>
+                    <td>timer</td>
+                    <td>None selectable</td>
+                  </tr>
+                  <tr>
+                    <td>IBM iseries</td>
+                    <td>PowerPC 4/5/970</td>
+                    <td>CYCLES:10000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>IBM pseries</td>
+                    <td>PowerPC 4/5/970</td>
+                    <td>CYCLES:10000:0:1:1</td>
+                  </tr>
+                  <tr>
+                    <td>IBM s390</td>
+                    <td>timer</td>
+                    <td>None selectable</td>
+                  </tr>
+                  <tr>
+                    <td>IBM s390x</td>
+                    <td>timer</td>
+                    <td>None selectable</td>
+                  </tr>
+                </tbody>
+              </table>
+            </div>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="oprofile-gui"></a>2. Using <span><strong class="command">oprof_start</strong></span></h2>
+              </div>
+            </div>
+          </div>
+          <p>
+The <span><strong class="command">oprof_start</strong></span> application provides a convenient way to start the profiler.
+Note that <span><strong class="command">oprof_start</strong></span> is just a wrapper around the <span><strong class="command">opcontrol</strong></span> script,
+so it does not provide more services than the script itself.
+</p>
+          <p>
+After <span><strong class="command">oprof_start</strong></span> is started you can select the event type for each counter;
+the sampling rate and other related parameters are explained in <a href="#controlling-daemon" title="1. Using opcontrol">Section 1, &#8220;Using <span><strong class="command">opcontrol</strong></span>&#8221;</a>.
+The "Configuration" section allows you to set general parameters such as the buffer size, kernel filename
+etc. The counter setup interface should be self-explanatory; <a href="#hardware-counters" title="3.1. Hardware performance counters">Section 3.1, &#8220;Hardware performance counters&#8221;</a> and related 
+links contain information on using unit masks.
+</p>
+          <p>
+A status line shows the current status of the profiler: how long it has been running, and the average
+number of interrupts received per second and the total, over all processors.
+Note that quitting <span><strong class="command">oprof_start</strong></span> does not stop the profiler.
+</p>
+          <p>
+Your configuration is saved in the same file as <span><strong class="command">opcontrol</strong></span> uses; that is,
+<code class="filename">~/.oprofile/daemonrc</code>.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="detailed-parameters"></a>3. Configuration details</h2>
+              </div>
+            </div>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="hardware-counters"></a>3.1. Hardware performance counters</h3>
+                </div>
+              </div>
+            </div>
+            <div class="note" style="margin-left: 0.5in; margin-right: 0.5in;">
+              <h3 class="title">Note</h3>
+              <p>
+Your CPU type may not include the requisite support for hardware performance counters, in which case
+you must use OProfile in RTC mode in 2.4 (see <a href="#rtc" title="3.2. OProfile in RTC mode">Section 3.2, &#8220;OProfile in RTC mode&#8221;</a>), or timer mode in 2.6 (see <a href="#timer" title="3.3. OProfile in timer interrupt mode">Section 3.3, &#8220;OProfile in timer interrupt mode&#8221;</a>). 
+You do not really need to read this section unless you are interested in using 
+events other than the default event chosen by OProfile.
+</p>
+            </div>
+            <p>
+The Intel hardware performance counters are detailed in the Intel IA-32 Architecture Manual, Volume 3, available
+from <a href="http://developer.intel.com/">http://developer.intel.com/</a>. The AMD Athlon/Duron
+implementation is detailed in <a href="http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/22007.pdf">
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/22007.pdf</a>.
+For PowerPC64 processors in IBM iSeries and pSeries systems, processor documentation
+is available at <a href="http://www-306.ibm.com/chips/techlib/techlib.nsf/productfamilies/PowerPC/">
+http://www-306.ibm.com/chips/techlib/techlib.nsf/productfamilies/PowerPC</a>.  (For example, the
+specific publication containing information on the performance monitor unit for the PowerPC970 is 
+"IBM PowerPC 970FX RISC Microprocessor User's Manual.")
+These processors are capable of delivering an interrupt when a counter overflows.
+This is the basic mechanism on which OProfile is based. The delivery mode is <span class="acronym">NMI</span>,
+so blocking interrupts in the kernel does not prevent profiling. When the interrupt handler is called,
+the current <span class="acronym">PC</span> value and the current task are recorded into the profiling structure.
+This allows the overflow event to be attached to a specific assembly instruction in a binary image.
+The daemon receives this data from the kernel, and writes it to the sample files.
+</p>
+            <p>
+If we use an event such as <code class="constant">CPU_CLK_UNHALTED</code> or <code class="constant">INST_RETIRED</code>
+(<code class="constant">GLOBAL_POWER_EVENTS</code> or <code class="constant">INSTR_RETIRED</code>, respectively, on the Pentium 4), we can
+use the overflow counts as an estimate of actual time spent in each part of code. Alternatively we can profile interesting
+data such as the cache behaviour of routines with the other available counters.
+</p>
+            <p>
+However there are several caveats. First, there are those issues listed in the Intel manual. There is a delay
+between the counter overflow and the interrupt delivery that can skew results on a small scale - this means
+you cannot rely on the profiles at the instruction level as being perfectly accurate.
+If you are using an "event-mode" counter such as the cache counters, a count registered against it doesn't mean
+that it is responsible for that event. However, it implies that the counter overflowed in the dynamic
+vicinity of that instruction, to within a few instructions. Further details on this problem can be found in 
+<a href="#interpreting" title="Chapter 5. Interpreting profiling results">Chapter 5, <i>Interpreting profiling results</i></a> and also in the Digital paper "ProfileMe: A Hardware Performance Counter".
+</p>
+            <p>
+Each counter has several configuration parameters.
+First, there is the unit mask: this simply further specifies what to count.
+Second, there is the counter value, discussed below. Third, there is a parameter whether to increment counts
+whilst in kernel or user space. You can configure these separately for each counter.
+</p>
+            <p>
+After each overflow event, the counter will be re-initialized
+such that another overflow will occur after this many events have been counted. Thus, higher
+values mean less-detailed profiling, and lower values mean more detail, but higher overhead.
+Picking a good value for this
+parameter is, unfortunately, somewhat of a black art. It is of course dependent on the event
+you have chosen.
+Specifying too large a value will mean not enough interrupts are generated
+to give a realistic profile (though this problem can be ameliorated by profiling for <span class="emphasis"><em>longer</em></span>).
+Specifying too small a value can lead to higher performance overhead.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="rtc"></a>3.2. OProfile in RTC mode</h3>
+                </div>
+              </div>
+            </div>
+            <div class="note" style="margin-left: 0.5in; margin-right: 0.5in;">
+              <h3 class="title">Note</h3>
+              <p>
+This section applies to 2.2/2.4 kernels only.
+</p>
+            </div>
+            <p>
+Some CPU types do not provide the needed hardware support to use the hardware performance counters. This includes
+some laptops, classic Pentiums, and other CPU types not yet supported by OProfile (such as Cyrix). 
+On these machines, OProfile falls
+back to using the real-time clock interrupt to collect samples. This interrupt is also used by the <span><strong class="command">rtc</strong></span>
+module: you cannot have both the OProfile and rtc modules loaded nor the rtc support compiled in the kernel.
+</p>
+            <p>
+RTC mode is less capable than the hardware counters mode; in particular, it is unable to profile sections of
+the kernel where interrupts are disabled. There is just one available event, "RTC interrupts", and its value 
+corresponds to the number of interrupts generated per second (that is, a higher number means a better profiling
+resolution, and higher overhead). The current implementation of the real-time clock supports only power-of-two
+sampling rates from 2 to 4096 per second.  Other values within this range are rounded to the nearest power of
+two.
+</p>
+            <p>
+Setting the value from the GUI should be straightforward. On the command line, you need to specify the
+event to <span><strong class="command">opcontrol</strong></span>, e.g. :
+</p>
+            <p>
+              <span>
+                <strong class="command">opcontrol --event=RTC_INTERRUPTS:256</strong>
+              </span>
+            </p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="timer"></a>3.3. OProfile in timer interrupt mode</h3>
+                </div>
+              </div>
+            </div>
+            <div class="note" style="margin-left: 0.5in; margin-right: 0.5in;">
+              <h3 class="title">Note</h3>
+              <p>
+This section applies to 2.6 kernels and above only.
+</p>
+            </div>
+            <p>
+In 2.6 kernels on CPUs without OProfile support for the hardware performance counters, the driver
+falls back to using the timer interrupt for profiling. Like the RTC mode in 2.4 kernels, this is not able to
+profile code that has interrupts disabled. Note that there are no configuration parameters for
+setting this, unlike the RTC and hardware performance counter setup.
+</p>
+            <p>
+You can force use of the timer interrupt by using the <code class="option">timer=1</code> module
+parameter (or <code class="option">oprofile.timer=1</code> on the boot command line if OProfile is
+built-in).
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="p4"></a>3.4. Pentium 4 support</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The Pentium 4 / Xeon performance counters are organized around 3 types of model specific registers (MSRs): 45 event
+selection control registers (ESCRs), 18 counter configuration control registers (CCCRs) and 18 counters. ESCRs describe a
+particular set of events which are to be recorded, and CCCRs bind ESCRs to counters and configure their
+operation. Unfortunately the relationship between these registers is quite complex; they cannot all be used with one
+another at any time. There is, however, a subset of 8 counters, 8 ESCRs, and 8 CCCRs which can be used independently of
+one another, so OProfile only accesses those registers, treating them as a bank of 8 "normal" counters, similar
+to those in the P6 or Athlon families of CPU.
+</p>
+            <p>
+There is currently no support for Precision Event-Based Sampling (PEBS), nor any advanced uses of the Debug Store
+(DS). Current support is limited to the conservative extension of OProfile's existing interrupt-based model described
+above.  Performance monitoring hardware on Pentium 4 / Xeon processors with Hyperthreading enabled (multiple logical
+processors on a single die) is not supported in 2.4 kernels (you can use OProfile if you disable hyper-threading,
+though).
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="ia64"></a>3.5. Intel Itanium 2 support</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The Itanium 2 performance monitoring unit (PMU) organizes the counters as four
+pairs of performance event monitoring registers. Each pair is composed of a
+Performance Monitoring Configuration (PMC) register and Performance Monitoring
+Data (PMD) register.  The PMC selects the performance event being monitored and
+the PMD determines the sampling interval. The IA64 Performance Monitoring Unit
+(PMU) triggers sampling with maskable interrupts. Thus, samples will not occur
+in sections of the IA64 kernel where interrupts are disabled.
+</p>
+            <p>
+None of the advance features of the Itanium 2 performance monitoring unit
+such as opcode matching, address range matching, or precise event sampling are
+supported by this version of OProfile.  The Itanium 2 support only maps OProfile's
+existing interrupt-based model to the PMU hardware.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="ppc64"></a>3.6. PowerPC64 support</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The performance monitoring unit (PMU) for the PowerPC 64-bit processors 
+consists of between 6 and 8 counters (depending on the model), plus three
+special purpose registers used for programming the counters -- MMCR0, MMCR1,
+and MMCRA.  Advanced features such as instruction matching and thresholding are
+not supported by this version of OProfile.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="misuse"></a>3.7. Dangerous counter settings</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+OProfile is a low-level profiler which allow continuous profiling with a low-overhead cost.
+If too low a count reset value is set for a counter, the system can become overloaded with counter
+interrupts, and seem as if the system has frozen. Whilst some validation is done, it
+is not foolproof.
+</p>
+            <div class="note" style="margin-left: 0.5in; margin-right: 0.5in;">
+              <h3 class="title">Note</h3>
+              <p>
+This can happen as follows: When the profiler count
+reaches zero an NMI handler is called which stores the sample values in an internal buffer, then resets the counter
+to its original value. If the count is very low, a pending NMI can be sent before the NMI handler has
+completed. Due to the priority of the NMI, the local APIC delivers the pending interrupt immediately after
+completion of the previous interrupt handler, and control never returns to other parts of the system.
+In this way the system seems to be frozen.
+</p>
+            </div>
+            <p>If this happens, it will be impossible to bring the system back to a workable state.
+There is no way to provide real security against this happening, other than making sure to use a reasonable value
+for the counter reset. For example, setting <code class="constant">CPU_CLK_UNHALTED</code> event type with a ridiculously low reset count (e.g. 500)
+is likely to freeze the system.
+</p>
+            <p>
+In short : <span><strong class="command">Don't try a foolish sample count value</strong></span>. Unfortunately the definition of a foolish value
+is really dependent on the event type - if ever in doubt, e-mail </p>
+            <div class="address">
+              <p><code class="email">&lt;<a href="mailto:oprofile-list@lists.sf.net">oprofile-list@lists.sf.net</a>&gt;</code>.</p>
+            </div>
+          </div>
+        </div>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="results"></a>Chapter 4. Obtaining results</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#profile-spec">1. Profile specifications</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#profile-spec-examples">1.1. Examples</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#profile-spec-details">1.2. Profile specification parameters</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#id2682936">1.3. Locating and managing binary images</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#no-results">1.4. What to do when you don't get any results</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#opreport">2. Image summaries and symbol summaries (<span><strong class="command">opreport</strong></span>)</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opreport-merging">2.1. Merging separate profiles</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opreport-comparison">2.2. Side-by-side multiple results</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opreport-callgraph">2.3. Callgraph output</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opreport-diff">2.4. Differential profiles with <span><strong class="command">opreport</strong></span></a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opreport-anon">2.5. Anonymous executable mappings</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opreport-options">2.6. Options for <span><strong class="command">opreport</strong></span></a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#opannotate">3. Outputting annotated source (<span><strong class="command">opannotate</strong></span>)</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opannotate-finding-source">3.1. Locating source files</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opannotate-details">3.2. Usage of <span><strong class="command">opannotate</strong></span></a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#opgprof">4. <span><strong class="command">gprof</strong></span>-compatible output (<span><strong class="command">opgprof</strong></span>)</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#opgprof-details">4.1. Usage of <span><strong class="command">opgprof</strong></span></a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#oparchive">5. Archiving measurements (<span><strong class="command">oparchive</strong></span>)</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#oparchive-details">5.1. Usage of <span><strong class="command">oparchive</strong></span></a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+          </dl>
+        </div>
+        <p>
+OK, so the profiler has been running, but it's not much use unless we can get some data out. Fairly often,
+OProfile does a little <span class="emphasis"><em>too</em></span> good a job of keeping overhead low, and no data reaches
+the profiler. This can happen on lightly-loaded machines. Remember you can force a dump at any time with :
+</p>
+        <p>
+          <span>
+            <strong class="command">opcontrol --dump</strong>
+          </span>
+        </p>
+        <p>Remember to do this before complaining there is no profiling data !
+Now that we've got some data, it has to be processed. That's the job of <span><strong class="command">opreport</strong></span>,
+<span><strong class="command">opannotate</strong></span>, or <span><strong class="command">opgprof</strong></span>.
+</p>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="profile-spec"></a>1. Profile specifications</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+All of the analysis tools take a <span class="emphasis"><em>profile specification</em></span>.
+This is a set of definitions that describe which actual profiles should be
+examined. The simplest profile specification is empty: this will match all
+the available profile files for the current session (this is what happens
+when you do <span><strong class="command">opreport</strong></span>).
+</p>
+          <p>
+Specification parameters are of the form <code class="option">name:value[,value]</code>.
+For example, if I wanted to get a combined symbol summary for
+<code class="filename">/bin/myprog</code> and <code class="filename">/bin/myprog2</code>,
+I could do <span><strong class="command">opreport -l image:/bin/myprog,/bin/myprog2</strong></span>.
+As a special case, you don't actually need to specify the <code class="option">image:</code>
+part here: anything left on the command line is assumed to be an
+<code class="option">image:</code> name. Similarly, if no <code class="option">session:</code>
+is specified, then <code class="option">session:current</code> is assumed ("current"
+is a special name of the current / last profiling session).
+</p>
+          <p>
+In addition to the comma-separated list shown above, some of the 
+specification parameters can take <span><strong class="command">glob</strong></span>-style
+values. For example, if I want to see image summaries for all
+binaries profiled in <code class="filename">/usr/bin/</code>, I could do
+<span><strong class="command">opreport image:/usr/bin/\*</strong></span>. Note the necessity
+to escape the special character from the shell.
+</p>
+          <p>
+For <span><strong class="command">opreport</strong></span>, profile specifications can be used to
+define two profiles, giving differential output. This is done by
+enclosing each of the two specifications within curly braces, as shown
+in the examples below. Any specifications outside of curly braces are
+shared across both.
+</p>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="profile-spec-examples"></a>1.1. Examples</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Image summaries for all profiles with <code class="constant">DATA_MEM_REFS</code>
+samples in the saved session called "stresstest" :
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+# opreport session:stresstest event:DATA_MEM_REFS
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+Symbol summary for the application called "test_sym53c8xx,9xx". Note the
+escaping is necessary as <code class="option">image:</code> takes a comma-separated list.
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+# opreport -l ./test/test_sym53c8xx\,9xx
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+Image summaries for all binaries in the <code class="filename">test</code> directory,
+excepting <code class="filename">boring-test</code> :
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+# opreport image:./test/\* image-exclude:./test/boring-test
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+Differential profile of a binary stored in two archives :
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+# opreport -l /bin/bash { archive:./orig } { archive:./new }
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+Differential profile of an archived binary with the current session :
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+# opreport -l /bin/bash { archive:./orig } { }
+</pre>
+                </td>
+              </tr>
+            </table>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="profile-spec-details"></a>1.2. Profile specification parameters</h3>
+                </div>
+              </div>
+            </div>
+            <div class="variablelist">
+              <dl>
+                <dt>
+                  <span class="term">
+                    <code class="option">archive:</code>
+                    <span class="emphasis">
+                      <em>archivepath</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		A path to an archive made with <span><strong class="command">oparchive</strong></span>.
+		Absence of this tag, unlike others, means "the current system",
+		equivalent to specifying "archive:".
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">session:</code>
+                    <span class="emphasis">
+                      <em>sessionlist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		A comma-separated list of session names to resolve in. Absence of this
+		tag, unlike others, means "the current session", equivalent to
+		specifying "session:current".
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">session-exclude:</code>
+                    <span class="emphasis">
+                      <em>sessionlist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+                A comma-separated list of sessions to exclude.
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">image:</code>
+                    <span class="emphasis">
+                      <em>imagelist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+                A comma-separated list of image names to resolve. Each entry may be relative
+                path, <span><strong class="command">glob</strong></span>-style name, or full path, e.g.</p>
+                  <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+                    <tr>
+                      <td>
+                        <pre class="screen">opreport 'image:/usr/bin/oprofiled,*op*,./opreport'</pre>
+                      </td>
+                    </tr>
+                  </table>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">image-exclude:</code>
+                    <span class="emphasis">
+                      <em>imagelist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		Same as <code class="option">image:</code>, but the matching images are excluded.
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">lib-image:</code>
+                    <span class="emphasis">
+                      <em>imagelist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		Same as <code class="option">image:</code>, but only for images that are for
+		a particular primary binary image (namely, an application). This only
+		makes sense to use if you're using <code class="option">--separate</code>.
+		This includes kernel modules and the kernel when using
+		<code class="option">--separate=kernel</code>.
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">lib-image-exclude:</code>
+                    <span class="emphasis">
+                      <em>imagelist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		Same as <code class="option">lib-image:</code>, but the matching images
+		are excluded.
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">event:</code>
+                    <span class="emphasis">
+                      <em>eventlist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		The symbolic event name to match on, e.g. <code class="option">event:DATA_MEM_REFS</code>.
+		You can pass a list of events for side-by-side comparison with <span><strong class="command">opreport</strong></span>.
+		When using the timer interrupt, the event is always "TIMER".
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">count:</code>
+                    <span class="emphasis">
+                      <em>eventcountlist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		The event count to match on, e.g. <code class="option">event:DATA_MEM_REFS count:30000</code>.
+		Note that this value refers to the setting used for <span><strong class="command">opcontrol</strong></span>
+		only, and has nothing to do with the sample counts in the profile data
+		itself.
+		You can pass a list of events for side-by-side comparison with <span><strong class="command">opreport</strong></span>.
+		When using the timer interrupt, the count is always 0 (indicating it cannot be set).
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">unit-mask:</code>
+                    <span class="emphasis">
+                      <em>masklist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		The unit mask value of the event to match on, e.g. <code class="option">unit-mask:1</code>.
+		You can pass a list of events for side-by-side comparison with <span><strong class="command">opreport</strong></span>.
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">cpu:</code>
+                    <span class="emphasis">
+                      <em>cpulist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		Only consider profiles for the given numbered CPU (starting from zero).
+		This is only useful when using CPU profile separation.
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">tgid:</code>
+                    <span class="emphasis">
+                      <em>pidlist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		Only consider profiles for the given task groups. Unless some program
+		is using threads, the task group ID of a process is the same
+		as its process ID. This option corresponds to the POSIX
+		notion of a thread group.
+		This is only useful when using per-process profile separation.
+		</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">tid:</code>
+                    <span class="emphasis">
+                      <em>tidlist</em>
+                    </span>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+		Only consider profiles for the given threads. When using
+		recent thread libraries, all threads in a process share the
+		same task group ID, but have different thread IDs. You can
+		use this option in combination with <code class="option">tgid:</code> to
+		restrict the results to particular threads within a process.
+		This is only useful when using per-process profile separation.
+		</p>
+                </dd>
+              </dl>
+            </div>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="id2682936"></a>1.3. Locating and managing binary images</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Each session's sample files can be found in the <code class="filename">/var/lib/oprofile/samples/</code> directory.
+These are used, along with the binary image files, to produce human-readable data.
+In some circumstances (kernel modules in an initrd, or modules on 2.6 kernels), OProfile
+will not be able to find the binary images. All the tools have an <code class="option">--image-path</code>
+option to which you can pass a comma-separated list of alternate paths to search. For example,
+I can let OProfile find my 2.6 modules by using <span><strong class="command">--image-path /lib/modules/2.6.0/kernel/</strong></span>.
+It is your responsibility to ensure that the correct images are found when using this
+option.
+</p>
+            <p>
+Note that if a binary image changes after the sample file was created, you won't be able to get useful
+symbol-based data out. This situation is detected for you. If you replace a binary, you should
+make sure to save the old binary if you need to do comparative profiles.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="no-results"></a>1.4. What to do when you don't get any results</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+When attempting to get output, you may see the error :
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+error: no sample files found: profile specification too strict ?
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+What this is saying is that the profile specification you passed in,
+when matched against the available sample files, resulted in no matches.
+There are a number of reasons this might happen:
+</p>
+            <div class="variablelist">
+              <dl>
+                <dt>
+                  <span class="term">spelling</span>
+                </dt>
+                <dd>
+                  <p>
+You specified a binary name, but spelt it wrongly. Check your spelling !
+</p>
+                </dd>
+                <dt>
+                  <span class="term">profiler wasn't running</span>
+                </dt>
+                <dd>
+                  <p>
+Make very sure that OProfile was actually up and running when you ran
+the binary.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">binary didn't run long enough</span>
+                </dt>
+                <dd>
+                  <p>
+Remember OProfile is a statistical profiler - you're not guaranteed to
+get samples for short-running programs. You can help this by using a
+lower count for the performance counter, so there are a lot more samples
+taken per second.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">binary spent most of its time in libraries</span>
+                </dt>
+                <dd>
+                  <p>
+Similarly, if the binary spends little time in the main binary image
+itself, with most of it spent in shared libraries it uses, you might
+not see any samples for the binary image itself. You can check this
+by using <span><strong class="command">opcontrol --separate=lib</strong></span> before the
+profiling session, so <span><strong class="command">opreport</strong></span> and friends show
+the library profiles on a per-application basis.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">specification was really too strict</span>
+                </dt>
+                <dd>
+                  <p>
+For example, you specified something like <code class="option">tgid:3433</code>,
+but no task with that group ID ever ran the code.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">binary didn't generate any events</span>
+                </dt>
+                <dd>
+                  <p>
+If you're using a particular event counter, for example counting MMX
+operations, the code might simply have not generated any events in the
+first place. Verify the code you're profiling does what you expect it
+to.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">you didn't specify kernel module name correctly</span>
+                </dt>
+                <dd>
+                  <p>
+If you're using 2.6 kernels, and trying to get reports for a kernel
+module, make sure to use the <code class="option">-p</code> option, and specify the
+module name <span class="emphasis"><em>with</em></span> the <code class="filename">.ko</code>
+extension. Check if the module is one loaded from initrd.
+</p>
+                </dd>
+              </dl>
+            </div>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="opreport"></a>2. Image summaries and symbol summaries (<span><strong class="command">opreport</strong></span>)</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+The <span><strong class="command">opreport</strong></span> utility is the primary utility you will use for 
+getting formatted data out of OProfile. It produces two types of data: image summaries
+and symbol summaries. An image summary lists the number of samples for individual
+binary images such as libraries or applications. Symbol summaries provide per-symbol
+profile data. In the following example, we're getting an image summary for the whole
+system:
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+$ opreport --long-filenames
+CPU: PIII, speed 863.195 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 23150
+   905898 59.7415 /usr/lib/gcc-lib/i386-redhat-linux/3.2/cc1plus
+   214320 14.1338 /boot/2.6.0/vmlinux
+   103450  6.8222 /lib/i686/libc-2.3.2.so
+    60160  3.9674 /usr/local/bin/madplay
+    31769  2.0951 /usr/local/oprofile-pp/bin/oprofiled
+    26550  1.7509 /usr/lib/libartsflow.so.1.0.0
+    23906  1.5765 /usr/bin/as
+    18770  1.2378 /oprofile
+    15528  1.0240 /usr/lib/qt-3.0.5/lib/libqt-mt.so.3.0.5
+    11979  0.7900 /usr/X11R6/bin/XFree86
+    11328  0.7471 /bin/bash
+    ...
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+If we had specified <code class="option">--symbols</code> in the previous command, we would have
+gotten a symbol summary of all the images across the entire system. We can restrict this to only
+part of the system profile; for example,
+below is a symbol summary of the OProfile daemon. Note that as we used
+<span><strong class="command">opcontrol --separate=kernel</strong></span>, symbols from images that <span><strong class="command">oprofiled</strong></span>
+has used are also shown.
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+$ opreport -l `which oprofiled` 2&gt;/dev/null | more
+CPU: PIII, speed 863.195 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 23150
+vma      samples  %           image name               symbol name
+0804be10 14971    28.1993     oprofiled                odb_insert
+0804afdc 7144     13.4564     oprofiled                pop_buffer_value
+c01daea0 6113     11.5144     vmlinux                  __copy_to_user_ll
+0804b060 2816      5.3042     oprofiled                opd_put_sample
+0804b4a0 2147      4.0441     oprofiled                opd_process_samples
+0804acf4 1855      3.4941     oprofiled                opd_put_image_sample
+0804ad84 1766      3.3264     oprofiled                opd_find_image
+0804a5ec 1084      2.0418     oprofiled                opd_find_module
+0804ba5c 741       1.3957     oprofiled                odb_hash_add_node
+...
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+These are the two basic ways you are most likely to use regularly, but <span><strong class="command">opreport</strong></span>
+can do a lot more than that, as described below.
+</p>
+          <div class="sect2" lang="en" xml:lang="en"><div class="titlepage"><div><div><h3 class="title"><a id="opreport-merging"></a>2.1. Merging separate profiles</h3></div></div></div>
+
+If you have used one of the <code class="option">--separate=</code> options
+whilst profiling, there can be several separate profiles for
+a single binary image within a session. Normally the output
+will keep these images separated (so, for example, the image summary
+output shows library image summaries on a per-application basis,
+when using <code class="option">--separate=lib</code>).
+Sometimes it can be useful to merge these results back together
+before getting results. The <code class="option">--merge</code> option allows
+you to do that.
+</div>
+          <div class="sect2" lang="en" xml:lang="en"><div class="titlepage"><div><div><h3 class="title"><a id="opreport-comparison"></a>2.2. Side-by-side multiple results</h3></div></div></div>
+If you have used multiple events when profiling, by default you get
+side-by-side results of each event's sample values from <span><strong class="command">opreport</strong></span>.
+You can restrict which events to list by appropriate use of the
+<code class="option">event:</code> profile specifications, etc.
+</div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="opreport-callgraph"></a>2.3. Callgraph output</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+When using the <code class="option">opcontrol --callgraph</code> option, you can see what
+functions are calling other functions in the output. Consider the
+following program:
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+#include &lt;string.h&gt;
+#include &lt;stdlib.h&gt;
+#include &lt;stdio.h&gt;
+
+#define SIZE 500000
+
+static int compare(const void *s1, const void *s2)
+{
+        return strcmp(s1, s2);
+}
+
+static void repeat(void)
+{
+        int i;
+        char *strings[SIZE];
+        char str[] = "abcdefghijklmnopqrstuvwxyz";
+
+        for (i = 0; i &lt; SIZE; ++i) {
+                strings[i] = strdup(str);
+                strfry(strings[i]);
+        }
+
+        qsort(strings, SIZE, sizeof(char *), compare);
+}
+
+int main()
+{
+        while (1)
+                repeat();
+}
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+When running with the call-graph option, OProfile will
+record the function stack every time it takes a sample.
+<span><strong class="command">opreport --callgraph</strong></span> outputs an entry for each
+function, where each entry looks similar to:
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+samples  %        image name               symbol name
+  197       0.1548  cg                       main
+  127036   99.8452  cg                       repeat
+84590    42.5084  libc-2.3.2.so            strfry
+  84590    66.4838  libc-2.3.2.so            strfry [self]
+  39169    30.7850  libc-2.3.2.so            random_r
+  3475      2.7312  libc-2.3.2.so            __i686.get_pc_thunk.bx
+-------------------------------------------------------------------------------
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+Here the non-indented line is the function we're focussing upon
+(<code class="function">strfry()</code>). This
+line is the same as you'd get from a normal <span><strong class="command">opreport</strong></span>
+output.
+</p>
+            <p>
+Above the non-indented line we find the functions that called this
+function (for example, <code class="function">repeat()</code> calls
+<code class="function">strfry()</code>). The samples and percentage values here
+refer to the number of times we took a sample where this call was found
+in the stack; the percentage is relative to all other callers of the
+function we're focussing on. Note that these values are
+<span class="emphasis"><em>not</em></span> call counts; they only reflect the call stack
+every time a sample is taken; that is, if a call is found in the stack
+at the time of a sample, it is recorded in this count.
+</p>
+            <p>
+Below the line are functions that are called by
+<code class="function">strfry()</code> (called <span class="emphasis"><em>callees</em></span>).
+It's clear here that <code class="function">strfry()</code> calls
+<code class="function">random_r()</code>. We also see a special entry with a
+"[self]" marker. This records the normal samples for the function, but
+the percentage becomes relative to all callees. This allows you to
+compare time spent in the function itself compared to functions it
+calls. Note that if a function calls itself, then it will appear in the
+list of callees of itself, but without the "[self]" marker; so recursive
+calls are still clearly separable.
+</p>
+            <p>
+You may have noticed that the output lists <code class="function">main()</code>
+as calling <code class="function">strfry()</code>, but it's clear from the source
+that this doesn't actually happen. See <a href="#interpreting-callgraph" title="3. Interpreting call-graph profiles">Section 3, &#8220;Interpreting call-graph profiles&#8221;</a> for an explanation.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="opreport-diff"></a>2.4. Differential profiles with <span><strong class="command">opreport</strong></span></h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Often, we'd like to be able to compare two profiles. For example, when
+analysing the performance of an application, we'd like to make code
+changes and examine the effect of the change. This is supported in
+<span><strong class="command">opreport</strong></span> by giving a profile specification that
+identifies two different profiles. The general form is of:
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+$ opreport &lt;shared-spec&gt; { &lt;first-profile&gt; } { &lt;second-profile&gt; }
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+For each of the profiles, the shared section is prefixed, and then the
+specification is analysed. The usual parameters work both within the
+shared section, and in the sub-specification within the curly braces.
+</p>
+            <p>
+A typical way to use this feature is with archives created with
+<span><strong class="command">oparchive</strong></span>. Let's look at an example:
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+$ ./a
+$ oparchive -o orig ./a
+$ opcontrol --reset
+  # edit and recompile a
+$ ./a
+  # now compare the current profile of a with the archived profile
+$ opreport -xl ./a { archive:./orig } { }
+CPU: PIII, speed 863.233 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a
+unit mask of 0x00 (No unit mask) count 100000
+samples  %        diff %    symbol name
+92435    48.5366  +0.4999   a
+54226    ---      ---       c
+49222    25.8459  +++       d
+48787    25.6175  -2.2e-01  b
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+Note that we specified an empty second profile in the curly braces, as
+we wanted to use the current session; alternatively, we could
+have specified another archive, or a tgid etc. We specified the binary
+<span><strong class="command">a</strong></span> in the shared section, so we matched that in both
+the profiles we're diffing.
+</p>
+            <p>
+As in the normal output, the results are sorted by the number of
+samples, and the percentage field represents the relative percentage of
+the symbol's samples in the second profile.
+</p>
+            <p>
+Notice the new column in the output. This value represents the
+percentage change of the relative percent between the first and the
+second profile: roughly, "how much more important this symbol is".
+Looking at the symbol <code class="function">a()</code>, we can see that it took
+roughly the same amount of the total profile in both the first and the
+second profile. The function <code class="function">c()</code> was not in the new
+profile, so has been marked with <code class="function">---</code>. Note that the
+sample value is the number of samples in the first profile; since we're
+displaying results for the second profile, we don't list a percentage
+value for it, as it would be meaningless. <code class="function">d()</code> is
+new in the second profile, and consequently marked with
+<code class="function">+++</code>.
+</p>
+            <p>
+When comparing profiles between different binaries, it should be clear
+that functions can change in terms of VMA and size. To avoid this
+problem, <span><strong class="command">opreport</strong></span> considers a symbol to be the same
+if the symbol name, image name, and owning application name all match;
+any other factors are ignored. Note that the check for application name
+means that trying to compare library profiles between two different
+applications will not work as you might expect: each symbol will be
+considered different.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="opreport-anon"></a>2.5. Anonymous executable mappings</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Many applications, typically ones involving dynamic compilation into
+machine code, have executable mappings that are not backed by an ELF
+file. <span><strong class="command">opreport</strong></span> has basic support for showing the
+samples taken in these regions; for example:
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+$ opreport /usr/jre1.5.0/bin/java
+CPU: PIII, speed 863.195 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 100000
+CPU_CLK_UNHALT...|
+  samples|      %|
+------------------
+    27344 100.000 java
+        CPU_CLK_UNHALT...|
+          samples|      %|
+        ------------------
+            27236  99.605 anon (tgid:12135 range:0xb2cb8000-0xb2e80000)
+              108  0.3949 java
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+Currently, there is no support for getting symbol-based summaries for
+such regions. Note that, since such mappings are dependent upon
+individual invocations of a binary, these mappings are always listed as
+a dependent image, even when using <code class="option">--separate=none</code>.
+Equally, the results are not affected by the <code class="option">--merge</code>
+option.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="opreport-options"></a>2.6. Options for <span><strong class="command">opreport</strong></span></h3>
+                </div>
+              </div>
+            </div>
+            <div class="variablelist">
+              <dl>
+                <dt>
+                  <span class="term">
+                    <code class="option">--accumulated / -a</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Accumulate sample and percentage counts in the symbol list.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--callgraph / -c</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show callgraph information.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--debug-info / -g</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show source file and line for each symbol.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--demangle / -D none|normal|smart</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+none: no demangling. normal: use default demangler (default) smart: use
+pattern-matching to make C++ symbol demangling more readable.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--details / -d</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show per-instruction details for all selected symbols. Note that, for
+binaries without symbol information, the VMA values shown are raw file
+offsets for the image binary.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--exclude-dependent / -x</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Do not include application-specific images for libraries, kernel modules
+and the kernel. This option only makes sense if the profile session
+used --separate.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--exclude-symbols / -e [symbols]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Exclude all the symbols in the given comma-separated list.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--global-percent / -%</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Make all percentages relative to the whole profile.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--help / -? / --usage</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show help message.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--image-path / -p [paths]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--include-symbols / -i [symbols]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Only include symbols in the given comma-separated list.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--long-filenames / -l</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Output full paths instead of basenames.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--merge / -m [lib,cpu,tid,tgid,unitmask,all]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Merge any profiles separated in a --separate session.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--no-header</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Don't output a header detailing profiling parameters.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--output-file / -o [file]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Output to the given file instead of stdout.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--reverse-sort / -r</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Reverse the sort from the default.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--show-address / -w</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show the VMA address of each symbol (off by default).
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--sort / -s [vma,sample,symbol,debug,image]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Sort the list of symbols by, respectively, symbol address,
+number of samples, symbol name, debug filename and line number,
+binary image filename.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--symbols / -l</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+List per-symbol information instead of a binary image summary.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--threshold / -t [percentage]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Only output data for symbols that have more than the given percentage
+of total samples.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--verbose / -V [options]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Give verbose debugging output.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--version / -v</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show version.
+</p>
+                </dd>
+              </dl>
+            </div>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="opannotate"></a>3. Outputting annotated source (<span><strong class="command">opannotate</strong></span>)</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+The <span><strong class="command">opannotate</strong></span> utility generates annotated source files or assembly listings, optionally
+mixed with source.
+If you want to see the source file, the profiled application needs to have debug information, and the source
+must be available through this debug information. For GCC, you must use the <code class="option">-g</code> option
+when you are compiling.
+If the binary doesn't contain sufficient debug information, you can still
+use <span><strong class="command">opannotate <code class="option">--assembly</code></strong></span> to get annotated assembly.
+</p>
+          <p>
+Note that for the reason explained in <a href="#hardware-counters" title="3.1. Hardware performance counters">Section 3.1, &#8220;Hardware performance counters&#8221;</a> the results can be
+inaccurate. The debug information itself can add other problems; for example, the line number for a symbol can be
+incorrect. Assembly instructions can be re-ordered and moved by the compiler, and this can lead to
+crediting source lines with samples not really "owned" by this line. Also see
+<a href="#interpreting" title="Chapter 5. Interpreting profiling results">Chapter 5, <i>Interpreting profiling results</i></a>.
+</p>
+          <p>
+You can output the annotation to one single file, containing all the source found using the
+<code class="option">--source</code>. You can use this in conjunction with <code class="option">--assembly</code>
+to get combined source/assembly output.
+</p>
+          <p>
+You can also output a directory of annotated source files that maintains the structure of
+the original sources. Each line in the annotated source is prepended with the samples
+for that line. Additionally, each symbol is annotated giving details for the symbol
+as a whole. An example:
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+$ opannotate --source --output-dir=annotated /usr/local/oprofile-pp/bin/oprofiled
+$ ls annotated/home/moz/src/oprofile-pp/daemon/
+opd_cookie.h  opd_image.c  opd_kernel.c  opd_sample_files.c  oprofiled.c
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+Line numbers are maintained in the source files, but each file has
+a footer appended describing the profiling details. The actual annotation
+looks something like this :
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+...
+               :static uint64_t pop_buffer_value(struct transient * trans)
+ 11510  1.9661 :{ /* pop_buffer_value total:  89901 15.3566 */
+               :        uint64_t val;
+               :
+ 10227  1.7469 :        if (!trans-&gt;remaining) {
+               :                fprintf(stderr, "BUG: popping empty buffer !\n");
+               :                exit(EXIT_FAILURE);
+               :        }
+               :
+               :        val = get_buffer_value(trans-&gt;buffer, 0);
+  2281  0.3896 :        trans-&gt;remaining--;
+  2296  0.3922 :        trans-&gt;buffer += kernel_pointer_size;
+               :        return val;
+ 10454  1.7857 :}
+...
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+The first number on each line is the number of samples, whilst the second is
+the relative percentage of total samples.
+</p>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="opannotate-finding-source"></a>3.1. Locating source files</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Of course, <span><strong class="command">opannotate</strong></span> needs to be able to locate the source files
+for the binary image(s) in order to produce output. Some binary images have debug
+information where the given source file paths are relative, not absolute. You can
+specify search paths to look for these files (similar to <span><strong class="command">gdb</strong></span>'s
+<code class="option">dir</code> command) with the <code class="option">--search-dirs</code> option.
+</p>
+            <p>
+Sometimes you may have a binary image which gives absolute paths for the source files,
+but you have the actual sources elsewhere (commonly, you've installed an SRPM for
+a binary on your system and you want annotation from an existing profile). You can
+use the <code class="option">--base-dirs</code> option to redirect OProfile to look somewhere
+else for source files. For example, imagine we have a binary generated from a source
+file that is given in the debug information as <code class="filename">/tmp/build/libfoo/foo.c</code>,
+and you have the source tree matching that binary installed in <code class="filename">/home/user/libfoo/</code>.
+You can redirect OProfile to find <code class="filename">foo.c</code> correctly like this :
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+$ opannotate --source --base-dirs=/tmp/build/libfoo/ --search-dirs=/home/user/libfoo/ --output-dir=annotated/ /lib/libfoo.so
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+You can specify multiple (comma-separated) paths to both options.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="opannotate-details"></a>3.2. Usage of <span><strong class="command">opannotate</strong></span></h3>
+                </div>
+              </div>
+            </div>
+            <div class="variablelist">
+              <dl>
+                <dt>
+                  <span class="term">
+                    <code class="option">--assembly / -a</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Output annotated assembly. If this is combined with --source, then mixed
+source / assembly annotations are output.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--base-dirs / -b [paths]/</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Comma-separated list of path prefixes. This can be used to point OProfile to a
+different location for source files when the debug information specifies an
+absolute path on your system for the source that does not exist. The prefix
+is stripped from the debug source file paths, then searched in the search dirs
+specified by <code class="option">--search-dirs</code>.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--demangle / -D none|normal|smart</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+none: no demangling. normal: use default demangler (default) smart: use
+pattern-matching to make C++ symbol demangling more readable.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--exclude-dependent / -x</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Do not include application-specific images for libraries, kernel modules
+and the kernel. This option only makes sense if the profile session
+used --separate.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--exclude-file [files]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Exclude all files in the given comma-separated list of glob patterns.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--exclude-symbols / -e [symbols]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Exclude all the symbols in the given comma-separated list.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--help / -? / --usage</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show help message.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--image-path / -p [paths]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--include-file [files]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Only include files in the given comma-separated list of glob patterns.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--include-symbols / -i [symbols]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Only include symbols in the given comma-separated list.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--objdump-params [params]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Pass the given parameters as extra values when calling objdump.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--output-dir / -o [dir]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Output directory. This makes opannotate output one annotated file for each
+source file. This option can't be used in conjunction with --assembly.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--search-dirs / -d [paths]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Comma-separated list of paths to search for source files. This is useful to find
+source files when the debug information only contains relative paths.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--source / -s</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Output annotated source. This requires debugging information to be available
+for the binaries.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--threshold / -t [percentage]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Only output data for symbols that have more than the given percentage
+of total samples.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--verbose / -V [options]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Give verbose debugging output.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--version / -v</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show version.
+</p>
+                </dd>
+              </dl>
+            </div>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="opgprof"></a>4. <span><strong class="command">gprof</strong></span>-compatible output (<span><strong class="command">opgprof</strong></span>)</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+If you're familiar with the output produced by <span><strong class="command">GNU gprof</strong></span>,
+you may find <span><strong class="command">opgprof</strong></span> useful. It takes a single binary
+as an argument, and produces a <code class="filename">gmon.out</code> file for use
+with <span><strong class="command">gprof -p</strong></span>. If call-graph profiling is enabled,
+then this is also included.
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+$ opgprof `which oprofiled` # generates gmon.out file
+$ gprof -p `which oprofiled` | head
+Flat profile:
+
+Each sample counts as 1 samples.
+  %   cumulative   self              self     total
+ time   samples   samples    calls  T1/call  T1/call  name
+ 33.13 206237.00 206237.00                             odb_insert
+ 22.67 347386.00 141149.00                             pop_buffer_value
+  9.56 406881.00 59495.00                             opd_put_sample
+  7.34 452599.00 45718.00                             opd_find_image
+  7.19 497327.00 44728.00                             opd_process_samples
+</pre>
+              </td>
+            </tr>
+          </table>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="opgprof-details"></a>4.1. Usage of <span><strong class="command">opgprof</strong></span></h3>
+                </div>
+              </div>
+            </div>
+            <div class="variablelist">
+              <dl>
+                <dt>
+                  <span class="term">
+                    <code class="option">--help / -? / --usage</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show help message.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--image-path / -p [paths]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--output-filename / -o [file]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Output to the given file instead of the default, gmon.out
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--threshold / -t [percentage]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Only output data for symbols that have more than the given percentage
+of total samples.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--verbose / -V [options]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Give verbose debugging output.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--version / -v</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show version.
+</p>
+                </dd>
+              </dl>
+            </div>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="oparchive"></a>5. Archiving measurements (<span><strong class="command">oparchive</strong></span>)</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+	The <span><strong class="command">oparchive</strong></span> utility generates a directory populated
+	with executable, debug, and oprofile sample files. This directory can be
+	moved to another machine via <span><strong class="command">tar</strong></span> and analyzed without
+	further use of the data collection machine.
+</p>
+          <p>
+	The following command would collect the sample files, the executables
+	associated with the sample files, and the debuginfo files associated
+	with the executables and copy them into
+	<code class="filename">/tmp/current_data</code>:
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+# oparchive -o /tmp/current_data
+</pre>
+              </td>
+            </tr>
+          </table>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="oparchive-details"></a>5.1. Usage of <span><strong class="command">oparchive</strong></span></h3>
+                </div>
+              </div>
+            </div>
+            <div class="variablelist">
+              <dl>
+                <dt>
+                  <span class="term">
+                    <code class="option">--help / -? / --usage</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show help message.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--exclude-dependent / -x</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Do not include application-specific images for libraries, kernel modules
+and the kernel. This option only makes sense if the profile session
+used --separate.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--image-path / -p [paths]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--output-directory / -o [directory]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Output to the given directory. There is no default. This must be specified.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--verbose / -V [options]</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Give verbose debugging output.
+</p>
+                </dd>
+                <dt>
+                  <span class="term">
+                    <code class="option">--version / -v</code>
+                  </span>
+                </dt>
+                <dd>
+                  <p>
+Show version.
+</p>
+                </dd>
+              </dl>
+            </div>
+          </div>
+        </div>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="interpreting"></a>Chapter 5. Interpreting profiling results</h2>
+            </div>
+          </div>
+        </div>
+        <div class="toc">
+          <p>
+            <b>Table of Contents</b>
+          </p>
+          <dl>
+            <dt>
+              <span class="sect1">
+                <a href="#irq-latency">1. Profiling interrupt latency</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#kernel-profiling">2. Kernel profiling</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#irq-masking">2.1. Interrupt masking</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#idle">2.2. Idle time</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#kernel-modules">2.3. Profiling kernel modules</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#interpreting-callgraph">3. Interpreting call-graph profiles</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#debug-info">4. Inaccuracies in annotated source</a>
+              </span>
+            </dt>
+            <dd>
+              <dl>
+                <dt>
+                  <span class="sect2">
+                    <a href="#effect-of-optimizations">4.1. Side effects of optimizations</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#prologues">4.2. Prologues and epilogues</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#inlined-function">4.3. Inlined functions</a>
+                  </span>
+                </dt>
+                <dt>
+                  <span class="sect2">
+                    <a href="#wrong-linenr-info">4.4. Inaccuracy in line number information</a>
+                  </span>
+                </dt>
+              </dl>
+            </dd>
+            <dt>
+              <span class="sect1">
+                <a href="#symbol-without-debug-info">5. Assembly functions</a>
+              </span>
+            </dt>
+            <dt>
+              <span class="sect1">
+                <a href="#hidden-cost">6. Other discrepancies</a>
+              </span>
+            </dt>
+          </dl>
+        </div>
+        <p>
+The standard caveats of profiling apply in interpreting the results from OProfile:
+profile realistic situations, profile different scenarios, profile
+for as long as a time as possible, avoid system-specific artifacts, don't trust
+the profile data too much. Also bear in mind the comments on the performance
+counters above - you <span class="emphasis"><em>cannot</em></span> rely on totally accurate
+instruction-level profiling.  However, for almost all circumstances the data
+can be useful. Ideally a utility such as Intel's VTUNE would be available to
+allow careful instruction-level analysis; go hassle Intel for this, not me ;)
+</p>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="irq-latency"></a>1. Profiling interrupt latency</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+This is an example of how the latency of delivery of profiling interrupts
+can impact the reliability of the profiling data. This is pretty much a 
+worst-case-scenario example: these problems are fairly rare.
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+double fun(double a, double b, double c)
+{
+ double result = 0;
+ for (int i = 0 ; i &lt; 10000; ++i) {
+  result += a;
+  result *= b;
+  result /= c;
+ }
+ return result;
+}
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+Here the last instruction of the loop is very costly, and you would expect the result
+reflecting that - but (cutting the instructions inside the loop):
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+$ opannotate -a -t 10 ./a.out
+
+     88 15.38% : 8048337:       fadd   %st(3),%st
+     48 8.391% : 8048339:       fmul   %st(2),%st
+     68 11.88% : 804833b:       fdiv   %st(1),%st
+    368 64.33% : 804833d:       inc    %eax
+               : 804833e:       cmp    $0x270f,%eax
+               : 8048343:       jle    8048337
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+The problem comes from the x86 hardware; when the counter overflows the IRQ
+is asserted but the hardware has features that can delay the NMI interrupt:
+x86 hardware is synchronous (i.e. cannot interrupt during an instruction);
+there is also a latency when the IRQ is asserted, and the multiple
+execution units and the out-of-order model of modern x86 CPUs also causes
+problems. This is the same function, with annotation :
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+$ opannotate -s -t 10 ./a.out
+
+               :double fun(double a, double b, double c)
+               :{ /* _Z3funddd total:     572 100.0% */
+               : double result = 0;
+    368 64.33% : for (int i = 0 ; i &lt; 10000; ++i) {
+     88 15.38% :  result += a;
+     48 8.391% :  result *= b;
+     68 11.88% :  result /= c;
+               : }
+               : return result;
+               :}
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+The conclusion: don't trust samples coming at the end of a loop,
+particularly if the last instruction generated by the compiler is costly. This
+case can also occur for branches. Always bear in mind that samples
+can be delayed by a few cycles from its real position. That's a hardware
+problem and OProfile can do nothing about it.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="kernel-profiling"></a>2. Kernel profiling</h2>
+              </div>
+            </div>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="irq-masking"></a>2.1. Interrupt masking</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+OProfile uses non-maskable interrupts (NMI) on the P6 generation, Pentium 4,
+Athlon and Duron processors. These interrupts can occur even in section of the
+Linux where interrupts are disabled, allowing collection of samples in virtually
+all executable code.  The RTC, timer interrupt mode, and Itanium 2 collection mechanisms
+use maskable interrupts. Thus, the RTC and Itanium 2 data collection mechanism have "sample
+shadows", or blind spots: regions where no samples will be collected. Typically, the samples
+will be attributed to the code immediately after the interrupts are re-enabled.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="idle"></a>2.2. Idle time</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Your kernel is likely to support halting the processor when a CPU is idle. As
+the typical hardware events like <code class="constant">CPU_CLK_UNHALTED</code> do not
+count when the CPU is halted, the kernel profile will not reflect the actual
+amount of time spent idle. You can change this behaviour by booting with
+the <code class="option">idle=poll</code> option, which uses a different idle routine. This
+will appear as <code class="function">poll_idle()</code> in your kernel profile.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="kernel-modules"></a>2.3. Profiling kernel modules</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+OProfile profiles kernel modules by default. However, there are a couple of problems
+you may have when trying to get results. First, you may have booted via an initrd;
+this means that the actual path for the module binaries cannot be determined automatically.
+To get around this, you can use the <code class="option">-p</code> option to the profiling tools
+to specify where to look for the kernel modules.
+</p>
+            <p>
+In 2.6, the information on where kernel module binaries are located has been removed.
+This means OProfile needs guiding with the <code class="option">-p</code> option to find your
+modules. Normally, you can just use your standard module top-level directory for this.
+Note that due to this problem, OProfile cannot check that the modification times match;
+it is your responsibility to make sure you do not modify a binary after a profile
+has been created.
+</p>
+            <p>
+If you have run <span><strong class="command">insmod</strong></span> or <span><strong class="command">modprobe</strong></span> to insert a module
+in a particular directory, it is important that you specify this directory with the 
+<code class="option">-p</code> option first, so that it over-rides an older module binary that might
+exist in other directories you've specified with <code class="option">-p</code>. It is up to you
+to make sure that these values are correct: 2.6 kernels simply do not provide enough
+information for OProfile to get this information.
+</p>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="interpreting-callgraph"></a>3. Interpreting call-graph profiles</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+Sometimes the results from call-graph profiles may be different to what
+you expect to see. The first thing to check is whether the target
+binaries where compiled with frame pointers enabled (if the binary was
+compiled using <span><strong class="command">gcc</strong></span>'s
+<code class="option">-fomit-frame-pointer</code> option, you will not get
+meaningful results). Note that as of this writing, the GCC developers
+plan to disable frame pointers by default. The Linux kernel is built
+without frame pointers by default; there is a configuration option you
+can use to turn it on under the "Kernel Hacking" menu.
+</p>
+          <p>
+Often you may see a caller of a function that does not actually directly
+call the function you're looking at (e.g. if <code class="function">a()</code>
+calls <code class="function">b()</code>, which in turn calls
+<code class="function">c()</code>, you may see an entry for
+<code class="function">a()-&gt;c()</code>).  What's actually occurring is that we
+are taking samples at the very start (or the very end) of
+<code class="function">c()</code>; at these few instructions, we haven't yet
+created the new function's frame, so it appears as if
+<code class="function">a()</code> is calling directly into
+<code class="function">c()</code>. Be careful not to be misled by these
+entries.
+</p>
+          <p>
+Like the rest of OProfile, call-graph profiling uses a statistical
+approach; this means that sometimes a backtrace sample is truncated, or
+even partially wrong. Bear this in mind when examining results.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="debug-info"></a>4. Inaccuracies in annotated source</h2>
+              </div>
+            </div>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="effect-of-optimizations"></a>4.1. Side effects of optimizations</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The compiler can introduce some pitfalls in the annotated source output.
+The optimizer can move pieces of code in such manner that two line of codes
+are interlaced (instruction scheduling). Also debug info generated by the compiler 
+can show strange behavior. This is especially true for complex expressions e.g. inside
+an if statement:
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+	if (a &amp;&amp; ..
+	    b &amp;&amp; ..
+	    c &amp;&amp;)
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+here the problem come from the position of line number. The available debug
+info does not give enough details for the if condition, so all samples are
+accumulated at the position of the right brace of the expression. Using
+<span><strong class="command">opannotate <code class="option">-a</code></strong></span> can help to show the real
+samples at an assembly level.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="prologues"></a>4.2. Prologues and epilogues</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+The compiler generally needs to generate "glue" code across function calls, dependent
+on the particular function call conventions used. Additionally other things
+need to happen, like stack pointer adjustment for the local variables; this
+code is known as the function prologue. Similar code is needed at function return,
+and is known as the function epilogue. This will show up in annotations as
+samples at the very start and end of a function, where there is no apparent
+executable code in the source.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="inlined-function"></a>4.3. Inlined functions</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+You may see that a function is credited with a certain number of samples, but
+the listing does not add up to the correct total. To pick a real example :
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+               :internal_sk_buff_alloc_security(struct sk_buff *skb)
+ 353 2.342%    :{ /* internal_sk_buff_alloc_security total: 1882 12.48% */
+               :
+               :        sk_buff_security_t *sksec;
+  15 0.0995%   :        int rc = 0;
+               :
+  10 0.06633%  :        sksec = skb-&gt;lsm_security;
+ 468 3.104%    :        if (sksec &amp;&amp; sksec-&gt;magic == DSI_MAGIC) {
+               :                goto out;
+               :        }
+               :
+               :        sksec = (sk_buff_security_t *) get_sk_buff_memory(skb);
+   3 0.0199%   :        if (!sksec) {
+  38 0.2521%   :                rc = -ENOMEM;
+               :                goto out;
+  10 0.06633%  :        }
+               :        memset(sksec, 0, sizeof (sk_buff_security_t));
+  44 0.2919%   :        sksec-&gt;magic = DSI_MAGIC;
+  32 0.2123%   :        sksec-&gt;skb = skb;
+  45 0.2985%   :        sksec-&gt;sid = DSI_SID_NORMAL;
+  31 0.2056%   :        skb-&gt;lsm_security = sksec;
+               :
+               :      out:
+               :
+ 146 0.9685%   :        return rc;
+               :
+  98 0.6501%   :}
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+Here, the function is credited with 1,882 samples, but the annotations
+below do not account for this. This is usually because of inline functions -
+the compiler marks such code with debug entries for the inline function
+definition, and this is where <span><strong class="command">opannotate</strong></span> annotates
+such samples. In the case above, <code class="function">memset</code> is the most
+likely candidate for this problem. Examining the mixed source/assembly
+output can help identify such results.
+</p>
+            <p>
+When running <span><strong class="command">opannotate</strong></span>, you may get a warning
+"some functions compiled without debug information may have incorrect source line attributions".
+In some rare cases, OProfile is not able to verify that the derived source line
+is correct (when some parts of the binary image are compiled without debugging
+information). Be wary of results if this warning appears.
+</p>
+            <p>
+Furthermore, for some languages the compiler can implicitly generate functions,
+such as default copy constructors. Such functions are labelled by the compiler
+as having a line number of 0, which means the source annotation can be confusing.
+</p>
+          </div>
+          <div class="sect2" lang="en" xml:lang="en">
+            <div class="titlepage">
+              <div>
+                <div>
+                  <h3 class="title"><a id="wrong-linenr-info"></a>4.4. Inaccuracy in line number information</h3>
+                </div>
+              </div>
+            </div>
+            <p>
+Depending on your compiler you can fall into the following problem:
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+struct big_object { int a[500]; };
+
+int main()
+{
+	big_object a, b;
+	for (int i = 0 ; i != 1000 * 1000; ++i)
+		b = a;
+	return 0;
+}
+
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+Compiled with <span><strong class="command">gcc</strong></span> 3.0.4 the annotated source is clearly inaccurate:
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+               :int main()
+               :{  /* main total: 7871 100% */
+               :        big_object a, b;
+               :        for (int i = 0 ; i != 1000 * 1000; ++i)
+               :                b = a;
+ 7871 100%     :        return 0;
+               :}
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+The problem here is distinct from the IRQ latency problem; the debug line number
+information is not precise enough; again, looking at output of <span><strong class="command">opannoatate -as</strong></span> can help.
+</p>
+            <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+              <tr>
+                <td>
+                  <pre class="screen">
+               :int main()
+               :{
+               :        big_object a, b;
+               :        for (int i = 0 ; i != 1000 * 1000; ++i)
+               : 80484c0:       push   %ebp
+               : 80484c1:       mov    %esp,%ebp
+               : 80484c3:       sub    $0xfac,%esp
+               : 80484c9:       push   %edi
+               : 80484ca:       push   %esi
+               : 80484cb:       push   %ebx
+               :                b = a;
+               : 80484cc:       lea    0xfffff060(%ebp),%edx
+               : 80484d2:       lea    0xfffff830(%ebp),%eax
+               : 80484d8:       mov    $0xf423f,%ebx
+               : 80484dd:       lea    0x0(%esi),%esi
+               :        return 0;
+    3 0.03811% : 80484e0:       mov    %edx,%edi
+               : 80484e2:       mov    %eax,%esi
+    1 0.0127%  : 80484e4:       cld
+    8 0.1016%  : 80484e5:       mov    $0x1f4,%ecx
+ 7850 99.73%   : 80484ea:       repz movsl %ds:(%esi),%es:(%edi)
+    9 0.1143%  : 80484ec:       dec    %ebx
+               : 80484ed:       jns    80484e0
+               : 80484ef:       xor    %eax,%eax
+               : 80484f1:       pop    %ebx
+               : 80484f2:       pop    %esi
+               : 80484f3:       pop    %edi
+               : 80484f4:       leave
+               : 80484f5:       ret
+</pre>
+                </td>
+              </tr>
+            </table>
+            <p>
+So here it's clear that copying is correctly credited with of all the samples, but the
+line number information is misplaced. <span><strong class="command">objdump -dS</strong></span> exposes the
+same problem. Note that maintaining accurate debug information for compilers when optimizing is difficult, so this problem is not suprising.
+The problem of debug information
+accuracy is also dependent on the binutils version used; some BFD library versions
+contain a work-around for known problems of <span><strong class="command">gcc</strong></span>, some others do not. This is unfortunate but we must live with that,
+since profiling is pointless when you disable optimisation (which would give better debugging entries).
+</p>
+          </div>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="symbol-without-debug-info"></a>5. Assembly functions</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+Often the assembler cannot generate debug information automatically.
+This means that you cannot get a source report unless 
+you manually define the neccessary debug information; read your assembler documentation for how you might
+do that. The only
+debugging info needed currently by OProfile is the line-number/filename-VMA association. When profiling assembly
+without debugging info you can always get report for symbols, and optionally for VMA, through <span><strong class="command">opreport -l</strong></span>
+or <span><strong class="command">opreport -d</strong></span>, but this works only for symbols with the right attributes.
+For <span><strong class="command">gas</strong></span> you can get this by
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+.globl foo
+	.type	foo,@function
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p> 
+whilst for <span><strong class="command">nasm</strong></span> you must use
+</p>
+          <table xmlns="" border="0" style="background: #E0E0E0;" width="90%">
+            <tr>
+              <td>
+                <pre class="screen">
+	  GLOBAL foo:function		; [1]
+</pre>
+              </td>
+            </tr>
+          </table>
+          <p>
+Note that OProfile does not need the global attribute, only the function attribute.
+</p>
+        </div>
+        <div class="sect1" lang="en" xml:lang="en">
+          <div class="titlepage">
+            <div>
+              <div>
+                <h2 class="title" style="clear: both"><a id="hidden-cost"></a>6. Other discrepancies</h2>
+              </div>
+            </div>
+          </div>
+          <p>
+Another cause of apparent problems is the hidden cost of instructions. A very
+common example is two memory reads: one from L1 cache and the other from memory:
+the second memory read is likely to have more samples.
+There are many other causes of hidden cost of instructions. A non-exhaustive
+list: mis-predicted branch, TLB cache miss, partial register stall,
+partial register dependencies, memory mismatch stall, re-executed µops. If you want to write
+programs at the assembly level, be sure to take a look at the Intel and
+AMD documentation at <a href="http://developer.intel.com/">http://developer.intel.com/</a>
+and <a href="http://www.amd.com/products/cpg/athlon/techdocs/">http://www.amd.com/products/cpg/athlon/techdocs/</a>.
+</p>
+        </div>
+      </div>
+      <div class="chapter" lang="en" xml:lang="en">
+        <div class="titlepage">
+          <div>
+            <div>
+              <h2 class="title"><a id="ack"></a>Chapter 6. Acknowledgments</h2>
+            </div>
+          </div>
+        </div>
+        <p>
+Thanks to (in no particular order) : Arjan van de Ven, Rik van Riel, Juan Quintela, Philippe Elie,
+Phillipp Rumpf, Tigran Aivazian, Alex Brown, Alisdair Rawsthorne, Bob Montgomery, Ray Bryant, H.J. Lu,
+Jeff Esper, Will Cohen, Graydon Hoare, Cliff Woolley, Alex Tsariounov, Al Stone, Jason Yeh,
+Randolph Chung, Anton Blanchard, Richard Henderson, Andries Brouwer, Bryan Rittmeyer,
+Maynard P. Johnson,
+Richard Reich (rreich@rdrtech.com), Zwane Mwaikambo, Dave Jones, Charles Filtness; and finally Pulp, for "Intro".
+</p>
+      </div>
+    </div>
+  </body>
+</html>
diff --git a/doc/oprofile.xml b/doc/oprofile.xml
new file mode 100644
index 0000000..3e0f296
--- /dev/null
+++ b/doc/oprofile.xml
@@ -0,0 +1,2367 @@
+<?xml version="1.0" encoding='ISO-8859-1'?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd">
+
+<book id="oprofile-guide">
+<bookinfo>
+	<title>OProfile manual</title>
+ 
+	<authorgroup>
+		<author>
+			<firstname>John</firstname>
+			<surname>Levon</surname>
+			<affiliation>
+				<address><email>levon@movementarian.org</email></address>
+			</affiliation>
+		</author>
+	</authorgroup>
+
+	<copyright>
+		<year>2000-2004</year>
+		<holder>Victoria University of Manchester, John Levon and others</holder>
+	</copyright>
+</bookinfo>
+
+<toc></toc>
+
+<chapter id="introduction">
+<title>Introduction</title>
+
+<para>
+This manual applies to OProfile version <oprofileversion />.
+OProfile is a profiling system for Linux 2.2/2.4/2.6 systems on a number of architectures. It is capable of profiling
+all parts of a running system, from the kernel (including modules and interrupt handlers) to shared libraries
+to binaries. It runs transparently in the background collecting information at a low overhead. These
+features make it ideal for profiling entire systems to determine bottle necks in real-world systems.
+</para>
+<para>
+Many CPUs provide "performance counters", hardware registers that can count "events"; for example,
+cache misses, or CPU cycles. OProfile provides profiles of code based on the number of these occurring events:
+repeatedly, every time a certain (configurable) number of events has occurred, the PC value is recorded.
+This information is aggregated into profiles for each binary image.</para>
+<para>
+Some hardware setups do not allow OProfile to use performance counters: in these cases, no
+events are available, and OProfile operates in timer/RTC mode, as described in later chapters.
+</para>
+<sect1 id="applications">
+<title>Applications of OProfile</title>
+<para>
+OProfile is useful in a number of situations. You might want to use OProfile when you :
+</para>
+<itemizedlist>
+<listitem><para>need low overhead</para></listitem>
+<listitem><para>cannot use highly intrusive profiling methods</para></listitem>
+<listitem><para>need to profile interrupt handlers</para></listitem>
+<listitem><para>need to profile an application and its shared libraries</para></listitem>
+<listitem><para>need to capture the performance behaviour of entire system</para></listitem>
+<listitem><para>want to examine hardware effects such as cache misses</para></listitem>
+<listitem><para>want detailed source annotation</para></listitem>
+<listitem><para>want instruction-level profiles</para></listitem>
+<listitem><para>want call-graph profiles</para></listitem>
+</itemizedlist>
+<para>
+OProfile is not a panacea. OProfile might not be a complete solution when you :
+</para>
+<itemizedlist>
+<listitem><para>require call graph profiles on platforms other than 2.6/x86</para></listitem>
+<listitem><para>don't have root permissions</para></listitem>
+<listitem><para>require 100% instruction-accurate profiles</para></listitem>
+<listitem><para>need function call counts or an interstitial profiling API</para></listitem>
+<listitem><para>cannot tolerate any disturbance to the system whatsoever</para></listitem>
+<listitem><para>need to profile interpreted or dynamically compiled code such as Java or Python</para></listitem>
+</itemizedlist>
+</sect1>
+
+<sect1 id="requirements">
+<title>System requirements</title>
+
+<variablelist>
+	<varlistentry>
+		<term>Linux kernel 2.2/2.4/2.6</term>
+		<listitem><para>
+			OProfile uses a kernel module that can be compiled for
+			2.2.11 or later and 2.4. Versions 2.4.10 or above are recommended, and required if you use the
+			boot-time kernel option <option>nosmp</option>.  2.6 kernels are supported with the in-kernel
+			OProfile driver. Note that only 32-bit x86 and IA64 are supported on 2.2/2.4 kernels.
+
+			</para>
+			<para>
+			PPC64 processors (Power4/Power5/PPC970) require a recent (&gt; 2.6.5) kernel with the line 
+			<constant>#define PV_970</constant> present in <filename>include/asm-ppc64/processor.h</filename>.
+<!-- FIXME: do we require always gte 2.4.10 for nosmp ? -->
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>modutils 2.4.6 or above</term>
+		<listitem><para>
+			You should have installed modutils 2.4.6 or higher (in fact earlier versions work well in almost all
+			cases).
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>Supported architecture</term>
+		<listitem><para>
+			For Intel IA32, a CPU with either a P6 generation or Pentium 4 core is
+			required. In marketing terms this translates to anything
+			between an Intel Pentium Pro (not Pentium Classics) and
+			a Pentium 4 / Xeon, including all Celerons.  The AMD
+			Athlon, and Duron CPUs are also supported.  Other IA32
+			CPU types only support the RTC mode of OProfile; please
+			see later in this manual for details.  Hyper-threaded Pentium IVs
+			are not supported in 2.4. For 2.4 kernels, the Intel
+			IA-64 CPUs are also supported. For 2.6 kernels, there is additionally
+			support for Alpha processors, MIPS, ARM, x86-64, sparc64, ppc64, and,
+			in timer mode, PA-RISC and s390.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>Uniprocessor or SMP</term>
+		<listitem><para>
+			SMP machines are fully supported.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>Required libraries</term>
+		<listitem><para>
+			These libraries are required : <filename>popt</filename>, <filename>bfd</filename>,
+			<filename>liberty</filename> (debian users: libiberty is provided in binutils-dev package), <filename>dl</filename>,
+			plus the standard C++ libraries.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>Bash version 2</term>
+		<listitem><para>
+			The <command>opcontrol</command> script requires bash version 2 at least to be installed
+			as <filename>/bin/bash</filename> or <filename>/bin/bash2</filename>
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>OProfile GUI</term>
+		<listitem><para>
+			The use of the GUI to start the profiler requires the <filename>Qt 2</filename> library. <filename>Qt 3</filename> should
+			also work.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+ 		<term><acronym>ELF</acronym></term>
+		<listitem><para>
+			Probably not too strenuous a requirement, but older <acronym>A.OUT</acronym> binaries/libraries are not supported.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>K&amp;R coding style</term>
+		<listitem><para>
+			OK, so it's not really a requirement, but I wish it was...
+		</para></listitem>
+	</varlistentry>
+</variablelist>
+
+
+</sect1>
+
+<sect1 id="resources">
+<title>Internet resources</title>
+
+<variablelist>
+	<varlistentry>
+		<term>Web page</term>
+		<listitem><para>
+			There is a web page (which you may be reading now) at
+			<ulink url="http://oprofile.sf.net/">http://oprofile.sf.net/</ulink>.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>Download</term>
+		<listitem><para>
+			You can download a source tarball or get anonymous CVS at the sourceforge page,
+			<ulink url="http://sf.net/projects/oprofile/">http://sf.net/projects/oprofile/</ulink>.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>Mailing list</term>
+		<listitem><para>
+			There is a low-traffic OProfile-specific mailing list, details at
+			<ulink url="http://sf.net/mail/?group_id=16191">http://sf.net/mail/?group_id=16191</ulink>.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>Bug tracker</term>
+		<listitem><para>
+			There is a bug tracker for OProfile at SourceForge,
+			<ulink url="http://sf.net/tracker/?group_id=16191&amp;atid=116191">http://sf.net/tracker/?group_id=16191&amp;atid=116191</ulink>.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term>IRC channel</term>
+		<listitem><para>
+			Several OProfile developers and users sometimes hang out on channel <command>#oprofile</command>
+			on the <ulink url="http://freenode.info">freenode</ulink> network. 
+		</para></listitem>
+	</varlistentry>
+</variablelist>
+
+</sect1>
+
+<sect1 id="install">
+<title>Installation</title>
+
+<para>
+First you need to build OProfile and install it. <command>./configure</command>, <command>make</command>, <command>make install</command>
+is often all you need, but note these arguments to <command>./configure</command> :
+</para>
+<variablelist>
+	<varlistentry>
+		<term><option>--with-linux</option></term>
+		<listitem><para>
+			Use this option to specify the location of the kernel source tree you wish
+			to compile against. The kernel module is built against this source and
+			will only work with a running kernel built from the same source with
+			exact same options, so it is important you specify this option if you need
+			to.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--with-kernel-support</option></term>
+		<listitem><para>
+			Use this option with 2.6 and above kernels to indicate the 
+	    		kernel provides the OProfile device driver.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--with-qt-dir/includes/libraries</option></term>
+		<listitem><para>
+			Specify the location of Qt headers and libraries. It defaults to searching in
+			<constant>$QTDIR</constant> if these are not specified.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry id="enable-abi">
+		<term><option>--enable-abi</option></term>
+		<listitem><para>
+			Activate code within the OProfile sample collection daemon
+			<command>oprofiled</command> which records information about the binary
+			format of sample files in <filename>/var/lib/oprofile/abi</filename>, to
+			permit their transport between hosts using the
+			<command>opimport</command> utility. See <xref
+			linkend="opimport" />. This option is primarily intended for embedded
+			systems or remote analysis of production machines; if you will be
+			performing all sample analysis on the same machine as you are profiling,
+			it is safe to omit this option.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry id="disable-werror">
+		<term><option>--disable-werror</option></term>
+		<listitem><para>
+			Development versions of OProfile build by
+			default with <option>-Werror</option>. This option turns
+			<option>-Werror</option> off.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry id="disable-optimization">
+		<term><option>--disable-optimization</option></term>
+		<listitem><para>
+			Disable the <option>-O2</option> compiler flag
+			(useful if you discover an OProfile bug and want to give a useful
+			back-trace etc.)
+		</para></listitem>
+	</varlistentry>
+</variablelist>
+<para>
+You'll need to have a configured kernel source for the current kernel
+to build the module for 2.4 kernels.  Since all distributions provide different kernels it's unlikely the running kernel match the configured source
+you installed. The safest way is to recompile your own kernel, run it and compile oprofile. It is also recommended that if you have a
+uniprocessor machine, you enable the local APIC / IO_APIC support for
+your kernel (this is automatically enabled for SMP kernels). With many BIOS, kernel >= 2.6.9 and UP kernel it's not sufficient to enable the local APIC you must also turn it on explicitely at boot time by providing "lapic" option to the kernel. On
+machines with power management, such as laptops, the power management
+must be turned off when using OProfile with 2.4 kernels. The power management software
+in the BIOS cannot handle the non-maskable interrupts (NMIs) used by
+OProfile for data collection. If you use the NMI watchdog, be aware that
+the watchdog is disabled when profiling starts, and not re-enabled until the
+OProfile module is removed (or, in 2.6, when OProfile is not running). If you compile OProfile for
+a 2.2 kernel you must be root to compile the module. If you are using
+2.6 kernels or higher, you do not need kernel source, as long as the
+OProfile driver is enabled; additionally, you should not need to disable
+power management.
+</para>
+<para>
+Please note that you must save or have available the <filename>vmlinux</filename> file
+generated during a kernel compile, as OProfile needs it (you can use
+<option>--no-vmlinux</option>, but this will prevent kernel profiling).
+</para>
+
+</sect1>
+
+<sect1 id="uninstall">
+<title>Uninstalling OProfile</title>
+<para>
+You must have the source tree available to uninstall OProfile; a <command>make uninstall</command> will
+remove all installed files except your configuration file in the directory <filename>~/.oprofile</filename>.
+</para>
+</sect1>
+
+</chapter>
+
+<chapter id="overview"> 
+<title>Overview</title>
+
+<sect1 id="getting-started">
+<title>Getting started</title>
+<para>
+Before you can use OProfile, you must set it up. The minimum setup required for this
+is to tell OProfile where the <filename>vmlinux</filename> file corresponding to the
+running kernel is, for example :
+</para>
+<screen>opcontrol --vmlinux=/boot/vmlinux-`uname -r`</screen>
+<para>
+If you don't want to profile the kernel itself,
+you can tell OProfile you don't have a <filename>vmlinux</filename> file :
+</para>
+<screen>opcontrol --no-vmlinux</screen>
+<para>
+Now we are ready to start the daemon (<command>oprofiled</command>) which collects
+the profile data :
+</para>
+<screen>opcontrol --start</screen>
+<para>
+When I want to stop profiling, I can do so with :
+</para>
+<screen>opcontrol --shutdown</screen>
+<para>
+Note that unlike <command>gprof</command>, no instrumentation (<option>-pg</option>
+and <option>-a</option> options to <command>gcc</command>)
+is necessary.
+</para>
+<para>
+Periodically (or on <command>opcontrol --shutdown</command> or <command>opcontrol --dump</command>)
+the profile data is written out into the <filename>/var/lib/oprofile/samples</filename> directory.
+These profile files cover shared libraries, applications, the kernel (vmlinux), and kernel modules.
+You can clear the profile data (at any time) with <command>opcontrol --reset</command>.
+</para>
+<para>
+You can get summaries of this data in a number of ways at any time. To get a summary of
+data across the entire system for all of these profiles, you can do :
+</para>
+<screen>opreport</screen>
+<para>
+Or to get a more detailed summary, for a particular image, you can do something like :
+</para>
+<screen>opreport -l /boot/vmlinux-`uname -r`</screen>
+<para>
+There are also a number of other ways of presenting the data, as described later in this manual.
+Note that OProfile will choose a default profiling setup for you. However, there are a number
+of options you can pass to <command>opcontrol</command> if you need to change something,
+also detailed later.
+</para>
+
+</sect1>
+
+<sect1 id="tools-overview">
+<title>Tools summary</title>
+<para>
+This section gives a brief description of the available OProfile utilities and their purpose.
+</para>
+<variablelist>
+<varlistentry>
+	<term><filename>ophelp</filename></term>
+	<listitem><para>
+		This utility lists the available events and short descriptions.
+	</para></listitem>
+</varlistentry>
+	
+<varlistentry>
+	<term><filename>opcontrol</filename></term>
+	<listitem><para>
+		Used for controlling the OProfile data collection, discussed in <xref linkend="controlling" />.
+	</para></listitem>
+</varlistentry>
+
+<varlistentry>
+	<term><filename>opreport</filename></term>
+	<listitem><para>
+		This is the main tool for retrieving useful profile data, described in
+		<xref linkend="opreport" />.
+	</para></listitem>
+</varlistentry>
+
+<varlistentry>
+	<term><filename>opannotate</filename></term>
+	<listitem><para>
+		This utility can be used to produce annotated source, assembly or mixed source/assembly.
+		Source level annotation is available only if the application was compiled with 
+		debugging symbols. See <xref linkend="opannotate" />.
+	</para></listitem>
+</varlistentry>
+
+<varlistentry>
+	<term><filename>opgprof</filename></term>
+	<listitem><para>
+		This utility can output gprof-style data files for a binary, for use with
+		<command>gprof -p</command>. See <xref linkend="opgprof" />.
+	</para></listitem>
+</varlistentry>
+
+<varlistentry>
+	<term><filename>oparchive</filename></term>
+	<listitem><para>
+		This utility can be used to collect executables, debuginfo,
+		and sample files and copy the files into an archive.
+		The archive is self-contained and can be moved to another
+		machine for further analysis.
+		See <xref linkend="oparchive" />.
+	</para></listitem>
+</varlistentry>
+
+<varlistentry id="opimport">
+	<term><filename>opimport</filename></term>
+	<listitem><para>
+		This utility converts sample database files from a foreign binary format (abi) to
+		the native format. This is useful only when moving sample files between hosts,
+		for analysis on platforms other than the one used for collection. The abi format
+		of the file to be imported is described in a text file located in
+		<filename>/var/lib/oprofile/abi</filename>, if the <option>--enable-abi</option>
+		configure-time option was enabled. Furthermore, the <command>opimport</command>
+		tool is not built unless <option>--enable-abi</option> is given. See <xref
+		linkend="enable-abi" />.
+	</para></listitem>
+</varlistentry>
+
+</variablelist>
+</sect1>
+	
+</chapter>
+ 
+<chapter id="controlling">
+<title>Controlling the profiler</title>
+
+<sect1 id="controlling-daemon">
+<title>Using <command>opcontrol</command></title>
+<para>
+In this section we describe the configuration and control of the profiling system
+with opcontrol in more depth.
+The <command>opcontrol</command> script has a default setup, but you
+can alter this with the options given below. In particular,
+if your hardware supports performance counters, you can configure them.
+There are a number of counters (for example, counter 0 and counter 1
+on the Pentium III). Each of these counters can be programmed with
+an event to count, such as cache misses or MMX operations. The event
+chosen for each counter is reflected in the profile data collected
+by OProfile: functions and binaries at the top of the profiles reflect
+that most of the chosen events happened within that code.
+</para>
+<para>
+Additionally, each counter has a "count" value: this corresponds to how
+detailed the profile is. The lower the value, the more frequently profile
+samples are taken. A counter can choose to sample only kernel code, user-space code,
+or both (both is the default). Finally, some events have a "unit mask"
+- this is a value that further restricts the types of event that are counted. 
+The event types and unit masks for your CPU are listed by <command>opcontrol
+--list-events</command>.
+</para>
+<para>
+The <command>opcontrol</command> script provides the following actions :
+</para>
+<variablelist>
+	<varlistentry>
+		<term><option>--init</option></term>
+		<listitem><para>
+		Loads the OProfile module if required and makes the OProfile driver
+		interface available.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--setup</option></term>
+		<listitem><para>
+		    Followed by list arguments for profiling set up. List of arguments
+		    saved in <filename>/root/.oprofile/daemonrc</filename>.
+		    Giving this option is not necessary; you can just directly pass one
+		    of the setup options, e.g. <command>opcontrol --no-vmlinux</command>.
+		  </para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--status</option></term>
+		<listitem><para>
+		Show configuration information.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--start-daemon</option></term>
+		<listitem><para>
+		    Start the oprofile daemon without starting actual profiling. The profiling
+		can then be started using <option>--start</option>. This is useful for avoiding
+		measuring the cost of daemon startup, as <option>--start</option> is a simple
+		write to a file in oprofilefs. Not available in 2.2/2.4 kernels.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--start</option></term>
+		<listitem><para>
+		    Start data collection with either arguments provided by <option>--setup</option>
+		or information saved in <filename>/root/.oprofile/daemonrc</filename>. Specifying
+		the addition <option>--verbose</option> makes the daemon generate lots of debug data
+		whilst it is running.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--dump</option></term>
+		<listitem><para>
+		    Force a flush of the collected profiling data to the daemon.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--stop</option></term>
+		<listitem><para>
+		    Stop data collection (this separate step is not possible with 2.2 or 2.4 kernels).
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--shutdown</option></term>
+		<listitem><para>
+		    Stop data collection and kill the daemon.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--reset</option></term>
+		<listitem><para>
+		    Clears out data from current session, but leaves saved sessions.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--save=</option>session_name</term>
+		<listitem><para>
+		    Save data from current session to session_name.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--deinit</option></term>
+		<listitem><para>
+                Shuts down daemon. Unload the OProfile module and oprofilefs.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--list-events</option></term>
+		<listitem><para>
+		    List event types and unit masks.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--help</option></term>
+		<listitem><para>
+		    Generate usage messages.
+		</para></listitem>
+	</varlistentry>
+</variablelist>
+
+<para>
+There are a number of possible settings, of which, only
+<option>--vmlinux</option> (or <option>--no-vmlinux</option>)
+is required. These settings are stored in <filename>~/.oprofile/daemonrc</filename>.
+</para>
+<variablelist>
+	<varlistentry>
+		<term><option>--buffer-size=</option>num</term>
+		<listitem><para>
+		Number of samples in kernel buffer.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--cpu-buffer-size=</option>num</term>
+		<listitem><para>
+		Number of samples in kernel per-cpu buffer (2.6 only). If you
+		profile at high rate it can help to increase this if the log
+		file show excessive count of sample lost cpu buffer overflow. 
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--event=</option>[eventspec]</term>
+		<listitem><para>
+		Use the given performance counter event to profile.
+		See <xref linkend="eventspec" /> below.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--separate=</option>[none,lib,kernel,thread,cpu,all]</term>
+		<listitem><para>
+		By default, every profile is stored in a single file. Thus, for example,
+		samples in the C library are all accredited to the <filename>/lib/libc.o</filename>
+		profile. However, you choose to create separate sample files by specifying
+		one of the below options.
+		</para>
+		<informaltable frame="all">
+		<tgroup cols='2'> 
+		<tbody>
+		<row><entry><option>none</option></entry><entry>No profile separation (default)</entry></row>
+		<row><entry><option>lib</option></entry><entry>Create per-application profiles for libraries</entry></row>
+		<row><entry><option>kernel</option></entry><entry>Create per-application profiles for the kernel and kernel modules</entry></row>
+		<row><entry><option>thread</option></entry><entry>Create profiles for each thread and each task</entry></row>
+		<row><entry><option>cpu</option></entry><entry>Create profiles for each CPU</entry></row>
+		<row><entry><option>all</option></entry><entry>All of the above options</entry></row>
+		</tbody>
+		</tgroup>
+		</informaltable>
+		<para>
+		Note  that <option>--separate=kernel</option> also turns on <option>--separate=lib</option>.
+		<!-- FIXME: update if this change -->
+		When using <option>--separate=kernel</option>, samples in hardware interrupts, soft-irqs, or other
+		asynchronous kernel contexts are credited to the task currently running. This means you will see
+		seemingly nonsense profiles such as <filename>/bin/bash</filename> showing samples for the PPP modules,
+		etc.
+		</para>
+		<para>
+		On 2.2/2.4 only kernel threads already started when profiling begins are correctly profiled;
+		newly started kernel thread samples are credited to the vmlinux (kernel) profile.
+		</para>
+		<para>
+		Using <option>--separate=thread</option> creates a lot
+		of sample files if you leave OProfile running for a while; it's most
+		useful when used for short sessions, or when using image filtering.
+		</para>
+		</listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--callgraph=</option>#depth</term>
+		<listitem><para>
+		Enable call-graph sample collection with a maximum depth. Use 0 to disable
+		callgraph profiling. Currently this requires a recent
+		2.6 kernel, and x86.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--image=</option>image,[images]|"all"</term>
+		<listitem><para>
+		Image filtering. If you specify one or more absolute
+		paths to binaries, OProfile will only produce profile results for those
+		binary images. This is useful for restricting the sometimes voluminous
+		output you may get otherwise, especially with
+		<option>--separate=thread</option>. Note that if you are using
+		<option>--separate=lib</option> or
+		<option>--separate=kernel</option>, then if you specification an
+		application binary, the shared libraries and kernel code
+		<emphasis>are</emphasis> included. Specify the value
+		"all" to profile everything (the default).
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--vmlinux=</option>file</term>
+		<listitem><para>
+		vmlinux kernel image.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>--no-vmlinux</option></term>
+		<listitem><para>
+		Use this when you don't have a kernel vmlinux file, and you don't want
+		to profile the kernel. This still counts the total number of kernel samples,
+		but can't give symbol-based results for the kernel or any modules.
+		</para></listitem>
+	</varlistentry>
+</variablelist>
+
+<sect2 id="opcontrolexamples">
+<title>Examples</title>
+
+<sect3 id="examplesperfctr">
+<title>Intel performance counter setup</title>
+<para>
+Here, we have a Pentium III running at 800MHz, and we want to look at where data memory
+references are happening most, and also get results for CPU time.
+</para>
+<screen>
+# opcontrol --event=CPU_CLK_UNHALTED:400000 --event=DATA_MEM_REFS:10000
+# opcontrol --vmlinux=/boot/2.6.0/vmlinux
+# opcontrol --start
+</screen>
+</sect3>
+
+<sect3 id="examplesrtc">
+<title>RTC mode</title>
+<para>
+Here, we have an Intel laptop without support for performance counters, running on 2.4 kernels.
+</para>
+<screen>
+# ophelp -r
+CPU with RTC device
+# opcontrol --vmlinux=/boot/2.4.13/vmlinux --event=RTC_INTERRUPTS:1024
+# opcontrol --start
+</screen>
+</sect3>
+
+<sect3 id="examplesstartdaemon">
+<title>Starting the daemon separately</title>
+<para>
+If we're running 2.6 kernels, we can use <option>--start-daemon</option> to avoid
+the profiler startup affecting results.
+</para>
+<screen>
+# opcontrol --vmlinux=/boot/2.6.0/vmlinux
+# opcontrol --start-daemon
+# my_favourite_benchmark --init
+# opcontrol --start ; my_favourite_benchmark --run ; opcontrol --stop
+</screen>
+</sect3>
+
+<sect3 id="exampleseparate">
+<title>Separate profiles for libraries and the kernel</title>
+<para>
+Here, we want to see a profile of the OProfile daemon itself, including when
+it was running inside the kernel driver, and its use of shared libraries.
+</para>
+<screen>
+# opcontrol --separate=kernel --vmlinux=/boot/2.6.0/vmlinux
+# opcontrol --start
+# my_favourite_stress_test --run
+# opreport -l -p /lib/modules/2.6.0/kernel /usr/local/bin/oprofiled
+</screen>
+</sect3>
+
+<sect3 id="examplessessions">
+<title>Profiling sessions</title>
+<para>
+It can often be useful to split up profiling data into several different
+time periods. For example, you may want to collect data on an application's
+startup separately from the normal runtime data. You can use the simple
+command <command>opcontrol --save</command> to do this. For example :
+</para>
+<screen>
+# opcontrol --save=blah
+</screen>
+<para>
+will create a sub-directory in <filename>/var/lib/oprofile/samples</filename> containing the samples
+up to that point (the current session's sample files are moved into this
+directory). You can then pass this session name as a parameter to the post-profiling
+analysis tools, to only get data up to the point you named the
+session. If you do not want to save a session, you can do
+<command>rm -rf /var/lib/oprofile/samples/sessionname</command> or, for the
+current session, <command>opcontrol --reset</command>.
+</para>
+</sect3>
+</sect2> 
+
+<sect2 id="eventspec">
+<title>Specifying performance counter events</title>
+<para>
+The <option>--event</option> option to <command>opcontrol</command>
+takes a specification that indicates how the details of each
+hardware performance counter should be setup. If you want to
+revert to OProfile's default setting (<option>--event</option>
+is strictly optional), use <option>--event=default</option>.
+</para>
+<para>
+You can pass multiple event specifications. OProfile will allocate
+hardware counters as necessary. Note that some combinations are not
+allowed by the CPU; running <command>opcontrol --list-events</command> gives the details
+of each event. The event specification is a colon-separated string
+of the form <option><emphasis>name</emphasis>:<emphasis>count</emphasis>:<emphasis>unitmask</emphasis>:<emphasis>kernel</emphasis>:<emphasis>user</emphasis></option> as described in this table:
+<note><para>
+For the PowerPC platforms, all events specified must be in the same group; i.e., the group number
+appended to the event name (e.g. <constant>&lt;<emphasis>some-event-name</emphasis>&gt;_GRP9</constant>) must be the same.
+</para></note>
+</para>
+<informaltable frame="all">
+<tgroup cols='2'> 
+<tbody>
+<row><entry><option>name</option></entry><entry>The symbolic event name, e.g. <constant>CPU_CLK_UNHALTED</constant></entry></row>
+<row><entry><option>count</option></entry><entry>The counter reset value, e.g. 100000</entry></row>
+<row><entry><option>unitmask</option></entry><entry>The unit mask, as given in the events list, e.g. 0x0f</entry></row>
+<row><entry><option>kernel</option></entry><entry>Whether to profile kernel code</entry></row>
+<row><entry><option>user</option></entry><entry>Whether to profile userspace code</entry></row>
+</tbody>
+</tgroup>
+</informaltable>
+<para>
+The last three values are optional, if you omit them (e.g. <option>--event=DATA_MEM_REFS:30000</option>),
+they will be set to the default values (a unit mask of 0, and profiling both kernel and
+userspace code). Note that some events require a unit mask.
+</para>
+<para>
+If OProfile is using RTC mode, and you want to alter the default counter value,
+you can use something like <option>--event=RTC_INTERRUPTS:2048</option>. Note the last
+three values here are ignored.
+If OProfile is using timer-interrupt mode, there is no configuration possible.
+</para>
+<para>
+The table below lists the events selected by default
+(<option>--event=default</option>) for the various computer architectures:
+</para>
+<informaltable frame="all">
+<tgroup cols='3'> 
+<tbody>
+<row><entry>Processor</entry><entry>cpu_type</entry><entry>Default event</entry></row>
+<row><entry>Alpha EV4</entry><entry>alpha/ev4</entry><entry>CYCLES:100000:0:1:1</entry></row>
+<row><entry>Alpha EV5</entry><entry>alpha/ev5</entry><entry>CYCLES:100000:0:1:1</entry></row>
+<row><entry>Alpha PCA56</entry><entry>alpha/pca56</entry><entry>CYCLES:100000:0:1:1</entry></row>
+<row><entry>Alpha EV6</entry><entry>alpha/ev6</entry><entry>CYCLES:100000:0:1:1</entry></row>
+<row><entry>Alpha EV67</entry><entry>alpha/ev67</entry><entry>CYCLES:100000:0:1:1</entry></row>
+<row><entry>ARM/XScale PMU1</entry><entry>arm/xscale1</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
+<row><entry>ARM/XScale PMU2</entry><entry>arm/xscale2</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
+<row><entry>Athlon</entry><entry>i386/athlon</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium Pro</entry><entry>i386/ppro</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium II</entry><entry>i386/pii</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium III</entry><entry>i386/piii</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium M (P6 core)</entry><entry>i386/p6_mobile</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Pentium 4 (non-HT)</entry><entry>i386/p4</entry><entry>GLOBAL_POWER_EVENTS:100000:1:1:1</entry></row>
+<row><entry>Pentium 4 (HT)</entry><entry>i386/p4-ht</entry><entry>GLOBAL_POWER_EVENTS:100000:1:1:1</entry></row>
+<row><entry>Hammer</entry><entry>x86-64/hammer</entry><entry>CPU_CLK_UNHALTED:100000:0:1:1</entry></row>
+<row><entry>Itanium</entry><entry>ia64/itanium</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
+<row><entry>Itanium 2</entry><entry>ia64/itanium2</entry><entry>CPU_CYCLES:100000:0:1:1</entry></row>
+<row><entry>TIMER_INT</entry><entry>timer</entry><entry>None selectable</entry></row>
+<row><entry>IBM iseries</entry><entry>PowerPC 4/5/970</entry><entry>CYCLES:10000:0:1:1</entry></row>
+<row><entry>IBM pseries</entry><entry>PowerPC 4/5/970</entry><entry>CYCLES:10000:0:1:1</entry></row>
+<row><entry>IBM s390</entry><entry>timer</entry><entry>None selectable</entry></row>
+<row><entry>IBM s390x</entry><entry>timer</entry><entry>None selectable</entry></row>
+</tbody>
+</tgroup>
+</informaltable>
+
+</sect2>
+
+</sect1>
+ 
+<sect1 id="oprofile-gui">
+<title>Using <command>oprof_start</command></title>
+<para>
+The <command>oprof_start</command> application provides a convenient way to start the profiler.
+Note that <command>oprof_start</command> is just a wrapper around the <command>opcontrol</command> script,
+so it does not provide more services than the script itself.
+</para>
+<para>
+After <command>oprof_start</command> is started you can select the event type for each counter;
+the sampling rate and other related parameters are explained in <xref linkend="controlling-daemon" />.
+The "Configuration" section allows you to set general parameters such as the buffer size, kernel filename
+etc. The counter setup interface should be self-explanatory; <xref linkend="hardware-counters" /> and related 
+links contain information on using unit masks.
+</para>
+<para>
+A status line shows the current status of the profiler: how long it has been running, and the average
+number of interrupts received per second and the total, over all processors.
+Note that quitting <command>oprof_start</command> does not stop the profiler.
+</para>
+<para>
+Your configuration is saved in the same file as <command>opcontrol</command> uses; that is,
+<filename>~/.oprofile/daemonrc</filename>.
+</para>
+
+</sect1>
+
+<sect1 id="detailed-parameters">
+<title>Configuration details</title>
+
+<sect2 id="hardware-counters">
+<title>Hardware performance counters</title>
+<note>
+<para>
+Your CPU type may not include the requisite support for hardware performance counters, in which case
+you must use OProfile in RTC mode in 2.4 (see <xref linkend="rtc" />), or timer mode in 2.6 (see <xref linkend="timer" />). 
+You do not really need to read this section unless you are interested in using 
+events other than the default event chosen by OProfile.
+</para>
+</note>
+<para>
+The Intel hardware performance counters are detailed in the Intel IA-32 Architecture Manual, Volume 3, available
+from <ulink url="http://developer.intel.com/">http://developer.intel.com/</ulink>. The AMD Athlon/Duron
+implementation is detailed in <ulink
+url="http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/22007.pdf">
+http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/22007.pdf</ulink>.
+For PowerPC64 processors in IBM iSeries and pSeries systems, processor documentation
+is available at <ulink url="http://www-306.ibm.com/chips/techlib/techlib.nsf/productfamilies/PowerPC/">
+http://www-306.ibm.com/chips/techlib/techlib.nsf/productfamilies/PowerPC</ulink>.  (For example, the
+specific publication containing information on the performance monitor unit for the PowerPC970 is 
+"IBM PowerPC 970FX RISC Microprocessor User's Manual.")
+These processors are capable of delivering an interrupt when a counter overflows.
+This is the basic mechanism on which OProfile is based. The delivery mode is <acronym>NMI</acronym>,
+so blocking interrupts in the kernel does not prevent profiling. When the interrupt handler is called,
+the current <acronym>PC</acronym> value and the current task are recorded into the profiling structure.
+This allows the overflow event to be attached to a specific assembly instruction in a binary image.
+The daemon receives this data from the kernel, and writes it to the sample files.
+</para>
+<para>
+If we use an event such as <constant>CPU_CLK_UNHALTED</constant> or <constant>INST_RETIRED</constant>
+(<constant>GLOBAL_POWER_EVENTS</constant> or <constant>INSTR_RETIRED</constant>, respectively, on the Pentium 4), we can
+use the overflow counts as an estimate of actual time spent in each part of code. Alternatively we can profile interesting
+data such as the cache behaviour of routines with the other available counters.
+</para>
+<para>
+However there are several caveats. First, there are those issues listed in the Intel manual. There is a delay
+between the counter overflow and the interrupt delivery that can skew results on a small scale - this means
+you cannot rely on the profiles at the instruction level as being perfectly accurate.
+If you are using an "event-mode" counter such as the cache counters, a count registered against it doesn't mean
+that it is responsible for that event. However, it implies that the counter overflowed in the dynamic
+vicinity of that instruction, to within a few instructions. Further details on this problem can be found in 
+<xref linkend="interpreting" /> and also in the Digital paper "ProfileMe: A Hardware Performance Counter".
+</para>
+<para>
+Each counter has several configuration parameters.
+First, there is the unit mask: this simply further specifies what to count.
+Second, there is the counter value, discussed below. Third, there is a parameter whether to increment counts
+whilst in kernel or user space. You can configure these separately for each counter.
+</para>
+<para>
+After each overflow event, the counter will be re-initialized
+such that another overflow will occur after this many events have been counted. Thus, higher
+values mean less-detailed profiling, and lower values mean more detail, but higher overhead.
+Picking a good value for this
+parameter is, unfortunately, somewhat of a black art. It is of course dependent on the event
+you have chosen.
+Specifying too large a value will mean not enough interrupts are generated
+to give a realistic profile (though this problem can be ameliorated by profiling for <emphasis>longer</emphasis>).
+Specifying too small a value can lead to higher performance overhead.
+</para>
+
+</sect2>
+
+<sect2 id="rtc">
+<title>OProfile in RTC mode</title>
+<note><para>
+This section applies to 2.2/2.4 kernels only.
+</para></note>
+<para>
+Some CPU types do not provide the needed hardware support to use the hardware performance counters. This includes
+some laptops, classic Pentiums, and other CPU types not yet supported by OProfile (such as Cyrix). 
+On these machines, OProfile falls
+back to using the real-time clock interrupt to collect samples. This interrupt is also used by the <command>rtc</command>
+module: you cannot have both the OProfile and rtc modules loaded nor the rtc support compiled in the kernel.
+</para>
+<para>
+RTC mode is less capable than the hardware counters mode; in particular, it is unable to profile sections of
+the kernel where interrupts are disabled. There is just one available event, "RTC interrupts", and its value 
+corresponds to the number of interrupts generated per second (that is, a higher number means a better profiling
+resolution, and higher overhead). The current implementation of the real-time clock supports only power-of-two
+sampling rates from 2 to 4096 per second.  Other values within this range are rounded to the nearest power of
+two.
+</para>
+<para>
+Setting the value from the GUI should be straightforward. On the command line, you need to specify the
+event to <command>opcontrol</command>, e.g. :
+</para>
+<para><command>opcontrol --event=RTC_INTERRUPTS:256</command></para>
+</sect2>
+
+<sect2 id="timer">
+<title>OProfile in timer interrupt mode</title>
+<note><para>
+This section applies to 2.6 kernels and above only.
+</para></note>
+<para>
+In 2.6 kernels on CPUs without OProfile support for the hardware performance counters, the driver
+falls back to using the timer interrupt for profiling. Like the RTC mode in 2.4 kernels, this is not able to
+profile code that has interrupts disabled. Note that there are no configuration parameters for
+setting this, unlike the RTC and hardware performance counter setup.
+</para>
+<para>
+You can force use of the timer interrupt by using the <option>timer=1</option> module
+parameter (or <option>oprofile.timer=1</option> on the boot command line if OProfile is
+built-in).
+</para>
+</sect2>
+
+<sect2 id="p4">
+<title>Pentium 4 support</title>
+<para>
+The Pentium 4 / Xeon performance counters are organized around 3 types of model specific registers (MSRs): 45 event
+selection control registers (ESCRs), 18 counter configuration control registers (CCCRs) and 18 counters. ESCRs describe a
+particular set of events which are to be recorded, and CCCRs bind ESCRs to counters and configure their
+operation. Unfortunately the relationship between these registers is quite complex; they cannot all be used with one
+another at any time. There is, however, a subset of 8 counters, 8 ESCRs, and 8 CCCRs which can be used independently of
+one another, so OProfile only accesses those registers, treating them as a bank of 8 "normal" counters, similar
+to those in the P6 or Athlon families of CPU.
+</para>
+<para>
+There is currently no support for Precision Event-Based Sampling (PEBS), nor any advanced uses of the Debug Store
+(DS). Current support is limited to the conservative extension of OProfile's existing interrupt-based model described
+above.  Performance monitoring hardware on Pentium 4 / Xeon processors with Hyperthreading enabled (multiple logical
+processors on a single die) is not supported in 2.4 kernels (you can use OProfile if you disable hyper-threading,
+though).
+</para>
+</sect2>
+
+<sect2 id="ia64">
+<title>Intel Itanium 2 support</title>
+<para>
+The Itanium 2 performance monitoring unit (PMU) organizes the counters as four
+pairs of performance event monitoring registers. Each pair is composed of a
+Performance Monitoring Configuration (PMC) register and Performance Monitoring
+Data (PMD) register.  The PMC selects the performance event being monitored and
+the PMD determines the sampling interval. The IA64 Performance Monitoring Unit
+(PMU) triggers sampling with maskable interrupts. Thus, samples will not occur
+in sections of the IA64 kernel where interrupts are disabled.
+</para>
+<para>
+None of the advance features of the Itanium 2 performance monitoring unit
+such as opcode matching, address range matching, or precise event sampling are
+supported by this version of OProfile.  The Itanium 2 support only maps OProfile's
+existing interrupt-based model to the PMU hardware.
+</para>
+</sect2>
+
+<sect2 id="ppc64">
+<title>PowerPC64 support</title>
+<para>
+The performance monitoring unit (PMU) for the PowerPC 64-bit processors 
+consists of between 6 and 8 counters (depending on the model), plus three
+special purpose registers used for programming the counters -- MMCR0, MMCR1,
+and MMCRA.  Advanced features such as instruction matching and thresholding are
+not supported by this version of OProfile.
+</para>
+</sect2>
+
+<sect2 id="misuse">
+<title>Dangerous counter settings</title>
+<para>
+OProfile is a low-level profiler which allow continuous profiling with a low-overhead cost.
+If too low a count reset value is set for a counter, the system can become overloaded with counter
+interrupts, and seem as if the system has frozen. Whilst some validation is done, it
+is not foolproof.
+</para>
+<note><para>
+This can happen as follows: When the profiler count
+reaches zero an NMI handler is called which stores the sample values in an internal buffer, then resets the counter
+to its original value. If the count is very low, a pending NMI can be sent before the NMI handler has
+completed. Due to the priority of the NMI, the local APIC delivers the pending interrupt immediately after
+completion of the previous interrupt handler, and control never returns to other parts of the system.
+In this way the system seems to be frozen.
+</para></note>
+<para>If this happens, it will be impossible to bring the system back to a workable state.
+There is no way to provide real security against this happening, other than making sure to use a reasonable value
+for the counter reset. For example, setting <constant>CPU_CLK_UNHALTED</constant> event type with a ridiculously low reset count (e.g. 500)
+is likely to freeze the system.
+</para>
+<para>
+In short : <command>Don't try a foolish sample count value</command>. Unfortunately the definition of a foolish value
+is really dependent on the event type - if ever in doubt, e-mail </para>
+<address><email>oprofile-list@lists.sf.net</email>.</address>
+</sect2>
+
+</sect1>
+ 
+</chapter>
+
+<chapter id="results">
+<title>Obtaining results</title>
+<para>
+OK, so the profiler has been running, but it's not much use unless we can get some data out. Fairly often,
+OProfile does a little <emphasis>too</emphasis> good a job of keeping overhead low, and no data reaches
+the profiler. This can happen on lightly-loaded machines. Remember you can force a dump at any time with :
+</para>
+<para><command>opcontrol --dump</command></para>
+<para>Remember to do this before complaining there is no profiling data !
+Now that we've got some data, it has to be processed. That's the job of <command>opreport</command>,
+<command>opannotate</command>, or <command>opgprof</command>.
+</para>
+
+<sect1 id="profile-spec">
+<title>Profile specifications</title>
+
+<para>
+All of the analysis tools take a <emphasis>profile specification</emphasis>.
+This is a set of definitions that describe which actual profiles should be
+examined. The simplest profile specification is empty: this will match all
+the available profile files for the current session (this is what happens
+when you do <command>opreport</command>).
+</para>
+<para>
+Specification parameters are of the form <option>name:value[,value]</option>.
+For example, if I wanted to get a combined symbol summary for
+<filename>/bin/myprog</filename> and <filename>/bin/myprog2</filename>,
+I could do <command>opreport -l image:/bin/myprog,/bin/myprog2</command>.
+As a special case, you don't actually need to specify the <option>image:</option>
+part here: anything left on the command line is assumed to be an
+<option>image:</option> name. Similarly, if no <option>session:</option>
+is specified, then <option>session:current</option> is assumed ("current"
+is a special name of the current / last profiling session).
+</para>
+<para>
+In addition to the comma-separated list shown above, some of the 
+specification parameters can take <command>glob</command>-style
+values. For example, if I want to see image summaries for all
+binaries profiled in <filename>/usr/bin/</filename>, I could do
+<command>opreport image:/usr/bin/\*</command>. Note the necessity
+to escape the special character from the shell.
+</para>
+<para>
+For <command>opreport</command>, profile specifications can be used to
+define two profiles, giving differential output. This is done by
+enclosing each of the two specifications within curly braces, as shown
+in the examples below. Any specifications outside of curly braces are
+shared across both.
+</para>
+
+<sect2 id="profile-spec-examples">
+<title>Examples</title>
+
+<para>
+Image summaries for all profiles with <constant>DATA_MEM_REFS</constant>
+samples in the saved session called "stresstest" :
+</para>
+<screen>
+# opreport session:stresstest event:DATA_MEM_REFS
+</screen>
+
+<para>
+Symbol summary for the application called "test_sym53c8xx,9xx". Note the
+escaping is necessary as <option>image:</option> takes a comma-separated list.
+</para>
+<screen>
+# opreport -l ./test/test_sym53c8xx\,9xx
+</screen>
+
+<para>
+Image summaries for all binaries in the <filename>test</filename> directory,
+excepting <filename>boring-test</filename> :
+</para>
+<screen>
+# opreport image:./test/\* image-exclude:./test/boring-test
+</screen>
+
+<para>
+Differential profile of a binary stored in two archives :
+</para>
+<screen>
+# opreport -l /bin/bash { archive:./orig } { archive:./new }
+</screen>
+
+<para>
+Differential profile of an archived binary with the current session :
+</para>
+<screen>
+# opreport -l /bin/bash { archive:./orig } { }
+</screen>
+
+</sect2> <!-- profile spec examples -->
+
+<sect2 id="profile-spec-details">
+<title>Profile specification parameters</title>
+
+<variablelist>
+	<varlistentry>
+		<term><option>archive:</option><emphasis>archivepath</emphasis></term>
+		<listitem><para>
+		A path to an archive made with <command>oparchive</command>.
+		Absence of this tag, unlike others, means "the current system",
+		equivalent to specifying "archive:".
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>session:</option><emphasis>sessionlist</emphasis></term>
+		<listitem><para>
+		A comma-separated list of session names to resolve in. Absence of this
+		tag, unlike others, means "the current session", equivalent to
+		specifying "session:current".
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>session-exclude:</option><emphasis>sessionlist</emphasis></term>
+		<listitem><para>
+                A comma-separated list of sessions to exclude.
+		</para></listitem>
+	</varlistentry>
+	<varlistentry>
+		<term><option>image:</option><emphasis>imagelist</emphasis></term>
+		<listitem><para>
+                A comma-separated list of image names to resolve. Each entry may be relative
+                path, <command>glob</command>-style name, or full path, e.g.</para>
+		<screen>opreport 'image:/usr/bin/oprofiled,*op*,./opreport'</screen>
+		</listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>image-exclude:</option><emphasis>imagelist</emphasis></term>
+		<listitem><para>
+		Same as <option>image:</option>, but the matching images are excluded.
+		</para></listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>lib-image:</option><emphasis>imagelist</emphasis></term>
+		<listitem><para>
+		Same as <option>image:</option>, but only for images that are for
+		a particular primary binary image (namely, an application). This only
+		makes sense to use if you're using <option>--separate</option>.
+		This includes kernel modules and the kernel when using
+		<option>--separate=kernel</option>.
+		</para></listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>lib-image-exclude:</option><emphasis>imagelist</emphasis></term>
+		<listitem><para>
+		Same as <option>lib-image:</option>, but the matching images
+		are excluded.
+		</para></listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>event:</option><emphasis>eventlist</emphasis></term>
+		<listitem><para>
+		The symbolic event name to match on, e.g. <option>event:DATA_MEM_REFS</option>.
+		You can pass a list of events for side-by-side comparison with <command>opreport</command>.
+		When using the timer interrupt, the event is always "TIMER".
+		</para></listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>count:</option><emphasis>eventcountlist</emphasis></term>
+		<listitem><para>
+		The event count to match on, e.g. <option>event:DATA_MEM_REFS count:30000</option>.
+		Note that this value refers to the setting used for <command>opcontrol</command>
+		only, and has nothing to do with the sample counts in the profile data
+		itself.
+		You can pass a list of events for side-by-side comparison with <command>opreport</command>.
+		When using the timer interrupt, the count is always 0 (indicating it cannot be set).
+		</para></listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>unit-mask:</option><emphasis>masklist</emphasis></term>
+		<listitem><para>
+		The unit mask value of the event to match on, e.g. <option>unit-mask:1</option>.
+		You can pass a list of events for side-by-side comparison with <command>opreport</command>.
+		</para></listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>cpu:</option><emphasis>cpulist</emphasis></term>
+		<listitem><para>
+		Only consider profiles for the given numbered CPU (starting from zero).
+		This is only useful when using CPU profile separation.
+		</para></listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>tgid:</option><emphasis>pidlist</emphasis></term>
+		<listitem><para>
+		Only consider profiles for the given task groups. Unless some program
+		is using threads, the task group ID of a process is the same
+		as its process ID. This option corresponds to the POSIX
+		notion of a thread group.
+		This is only useful when using per-process profile separation.
+		</para></listitem>
+	</varlistentry>
+
+	<varlistentry>
+		<term><option>tid:</option><emphasis>tidlist</emphasis></term>
+		<listitem><para>
+		Only consider profiles for the given threads. When using
+		recent thread libraries, all threads in a process share the
+		same task group ID, but have different thread IDs. You can
+		use this option in combination with <option>tgid:</option> to
+		restrict the results to particular threads within a process.
+		This is only useful when using per-process profile separation.
+		</para></listitem>
+	</varlistentry>
+</variablelist>
+
+</sect2>
+
+<sect2>
+<title>Locating and managing binary images</title>
+<para>
+Each session's sample files can be found in the <filename>/var/lib/oprofile/samples/</filename> directory.
+These are used, along with the binary image files, to produce human-readable data.
+In some circumstances (kernel modules in an initrd, or modules on 2.6 kernels), OProfile
+will not be able to find the binary images. All the tools have an <option>--image-path</option>
+option to which you can pass a comma-separated list of alternate paths to search. For example,
+I can let OProfile find my 2.6 modules by using <command>--image-path /lib/modules/2.6.0/kernel/</command>.
+It is your responsibility to ensure that the correct images are found when using this
+option.
+</para>
+<para>
+Note that if a binary image changes after the sample file was created, you won't be able to get useful
+symbol-based data out. This situation is detected for you. If you replace a binary, you should
+make sure to save the old binary if you need to do comparative profiles.
+</para>
+
+</sect2>
+
+<sect2 id="no-results">
+<title>What to do when you don't get any results</title>
+<para>
+When attempting to get output, you may see the error :
+</para>
+<screen>
+error: no sample files found: profile specification too strict ?
+</screen>
+<para>
+What this is saying is that the profile specification you passed in,
+when matched against the available sample files, resulted in no matches.
+There are a number of reasons this might happen:
+</para>
+<variablelist>
+<varlistentry><term>spelling</term><listitem><para>
+You specified a binary name, but spelt it wrongly. Check your spelling !
+</para></listitem></varlistentry>
+<varlistentry><term>profiler wasn't running</term><listitem><para>
+Make very sure that OProfile was actually up and running when you ran
+the binary.
+</para></listitem></varlistentry>
+<varlistentry><term>binary didn't run long enough</term><listitem><para>
+Remember OProfile is a statistical profiler - you're not guaranteed to
+get samples for short-running programs. You can help this by using a
+lower count for the performance counter, so there are a lot more samples
+taken per second.
+</para></listitem></varlistentry>
+<varlistentry><term>binary spent most of its time in libraries</term><listitem><para>
+Similarly, if the binary spends little time in the main binary image
+itself, with most of it spent in shared libraries it uses, you might
+not see any samples for the binary image itself. You can check this
+by using <command>opcontrol --separate=lib</command> before the
+profiling session, so <command>opreport</command> and friends show
+the library profiles on a per-application basis.
+</para></listitem></varlistentry>
+<varlistentry><term>specification was really too strict</term><listitem><para>
+For example, you specified something like <option>tgid:3433</option>,
+but no task with that group ID ever ran the code.
+</para></listitem></varlistentry>
+<varlistentry><term>binary didn't generate any events</term><listitem><para>
+If you're using a particular event counter, for example counting MMX
+operations, the code might simply have not generated any events in the
+first place. Verify the code you're profiling does what you expect it
+to.
+</para></listitem></varlistentry>
+<varlistentry><term>you didn't specify kernel module name correctly</term><listitem><para>
+If you're using 2.6 kernels, and trying to get reports for a kernel
+module, make sure to use the <option>-p</option> option, and specify the
+module name <emphasis>with</emphasis> the <filename>.ko</filename>
+extension. Check if the module is one loaded from initrd.
+</para></listitem></varlistentry>
+</variablelist>
+
+</sect2>
+
+</sect1> <!-- profile-spec -->
+
+<sect1 id="opreport">
+<title>Image summaries and symbol summaries (<command>opreport</command>)</title>
+<para>
+The <command>opreport</command> utility is the primary utility you will use for 
+getting formatted data out of OProfile. It produces two types of data: image summaries
+and symbol summaries. An image summary lists the number of samples for individual
+binary images such as libraries or applications. Symbol summaries provide per-symbol
+profile data. In the following example, we're getting an image summary for the whole
+system:
+</para>
+<screen>
+$ opreport --long-filenames
+CPU: PIII, speed 863.195 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 23150
+   905898 59.7415 /usr/lib/gcc-lib/i386-redhat-linux/3.2/cc1plus
+   214320 14.1338 /boot/2.6.0/vmlinux
+   103450  6.8222 /lib/i686/libc-2.3.2.so
+    60160  3.9674 /usr/local/bin/madplay
+    31769  2.0951 /usr/local/oprofile-pp/bin/oprofiled
+    26550  1.7509 /usr/lib/libartsflow.so.1.0.0
+    23906  1.5765 /usr/bin/as
+    18770  1.2378 /oprofile
+    15528  1.0240 /usr/lib/qt-3.0.5/lib/libqt-mt.so.3.0.5
+    11979  0.7900 /usr/X11R6/bin/XFree86
+    11328  0.7471 /bin/bash
+    ...
+</screen>
+<para>
+If we had specified <option>--symbols</option> in the previous command, we would have
+gotten a symbol summary of all the images across the entire system. We can restrict this to only
+part of the system profile; for example,
+below is a symbol summary of the OProfile daemon. Note that as we used
+<command>opcontrol --separate=kernel</command>, symbols from images that <command>oprofiled</command>
+has used are also shown.
+</para>
+<screen>
+$ opreport -l `which oprofiled` 2>/dev/null | more
+CPU: PIII, speed 863.195 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 23150
+vma      samples  %           image name               symbol name
+0804be10 14971    28.1993     oprofiled                odb_insert
+0804afdc 7144     13.4564     oprofiled                pop_buffer_value
+c01daea0 6113     11.5144     vmlinux                  __copy_to_user_ll
+0804b060 2816      5.3042     oprofiled                opd_put_sample
+0804b4a0 2147      4.0441     oprofiled                opd_process_samples
+0804acf4 1855      3.4941     oprofiled                opd_put_image_sample
+0804ad84 1766      3.3264     oprofiled                opd_find_image
+0804a5ec 1084      2.0418     oprofiled                opd_find_module
+0804ba5c 741       1.3957     oprofiled                odb_hash_add_node
+...
+</screen>
+
+<para>
+These are the two basic ways you are most likely to use regularly, but <command>opreport</command>
+can do a lot more than that, as described below.
+</para>
+
+<sect2 id="opreport-merging">
+<title>Merging separate profiles</title>
+
+If you have used one of the <option>--separate=</option> options
+whilst profiling, there can be several separate profiles for
+a single binary image within a session. Normally the output
+will keep these images separated (so, for example, the image summary
+output shows library image summaries on a per-application basis,
+when using <option>--separate=lib</option>).
+Sometimes it can be useful to merge these results back together
+before getting results. The <option>--merge</option> option allows
+you to do that.
+</sect2>
+
+<sect2 id="opreport-comparison">
+<title>Side-by-side multiple results</title>
+If you have used multiple events when profiling, by default you get
+side-by-side results of each event's sample values from <command>opreport</command>.
+You can restrict which events to list by appropriate use of the
+<option>event:</option> profile specifications, etc.
+</sect2>
+
+<sect2 id="opreport-callgraph">
+<title>Callgraph output</title>
+<para>
+When using the <option>opcontrol --callgraph</option> option, you can see what
+functions are calling other functions in the output. Consider the
+following program:
+</para>
+<screen>
+#include &lt;string.h&gt;
+#include &lt;stdlib.h&gt;
+#include &lt;stdio.h&gt;
+
+#define SIZE 500000
+
+static int compare(const void *s1, const void *s2)
+{
+        return strcmp(s1, s2);
+}
+
+static void repeat(void)
+{
+        int i;
+        char *strings[SIZE];
+        char str[] = "abcdefghijklmnopqrstuvwxyz";
+
+        for (i = 0; i &lt; SIZE; ++i) {
+                strings[i] = strdup(str);
+                strfry(strings[i]);
+        }
+
+        qsort(strings, SIZE, sizeof(char *), compare);
+}
+
+int main()
+{
+        while (1)
+                repeat();
+}
+</screen>
+<para>
+When running with the call-graph option, OProfile will
+record the function stack every time it takes a sample.
+<command>opreport --callgraph</command> outputs an entry for each
+function, where each entry looks similar to:
+</para>
+<screen>
+samples  %        image name               symbol name
+  197       0.1548  cg                       main
+  127036   99.8452  cg                       repeat
+84590    42.5084  libc-2.3.2.so            strfry
+  84590    66.4838  libc-2.3.2.so            strfry [self]
+  39169    30.7850  libc-2.3.2.so            random_r
+  3475      2.7312  libc-2.3.2.so            __i686.get_pc_thunk.bx
+-------------------------------------------------------------------------------
+</screen>
+<para>
+Here the non-indented line is the function we're focussing upon
+(<function>strfry()</function>). This
+line is the same as you'd get from a normal <command>opreport</command>
+output.
+</para>
+<para>
+Above the non-indented line we find the functions that called this
+function (for example, <function>repeat()</function> calls
+<function>strfry()</function>). The samples and percentage values here
+refer to the number of times we took a sample where this call was found
+in the stack; the percentage is relative to all other callers of the
+function we're focussing on. Note that these values are
+<emphasis>not</emphasis> call counts; they only reflect the call stack
+every time a sample is taken; that is, if a call is found in the stack
+at the time of a sample, it is recorded in this count.
+</para>
+<para>
+Below the line are functions that are called by
+<function>strfry()</function> (called <emphasis>callees</emphasis>).
+It's clear here that <function>strfry()</function> calls
+<function>random_r()</function>. We also see a special entry with a
+"[self]" marker. This records the normal samples for the function, but
+the percentage becomes relative to all callees. This allows you to
+compare time spent in the function itself compared to functions it
+calls. Note that if a function calls itself, then it will appear in the
+list of callees of itself, but without the "[self]" marker; so recursive
+calls are still clearly separable.
+</para>
+<para>
+You may have noticed that the output lists <function>main()</function>
+as calling <function>strfry()</function>, but it's clear from the source
+that this doesn't actually happen. See <xref
+linkend="interpreting-callgraph" /> for an explanation.
+</para>
+
+</sect2> <!-- opreport-callgraph -->
+
+<sect2 id="opreport-diff">
+<title>Differential profiles with <command>opreport</command></title>
+
+<para>
+Often, we'd like to be able to compare two profiles. For example, when
+analysing the performance of an application, we'd like to make code
+changes and examine the effect of the change. This is supported in
+<command>opreport</command> by giving a profile specification that
+identifies two different profiles. The general form is of:
+</para>
+<screen>
+$ opreport &lt;shared-spec&gt; { &lt;first-profile&gt; } { &lt;second-profile&gt; }
+</screen>
+<para>
+For each of the profiles, the shared section is prefixed, and then the
+specification is analysed. The usual parameters work both within the
+shared section, and in the sub-specification within the curly braces.
+</para>
+<para>
+A typical way to use this feature is with archives created with
+<command>oparchive</command>. Let's look at an example:
+</para>
+<screen>
+$ ./a
+$ oparchive -o orig ./a
+$ opcontrol --reset
+  # edit and recompile a
+$ ./a
+  # now compare the current profile of a with the archived profile
+$ opreport -xl ./a { archive:./orig } { }
+CPU: PIII, speed 863.233 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a
+unit mask of 0x00 (No unit mask) count 100000
+samples  %        diff %    symbol name
+92435    48.5366  +0.4999   a
+54226    ---      ---       c
+49222    25.8459  +++       d
+48787    25.6175  -2.2e-01  b
+</screen>
+<para>
+Note that we specified an empty second profile in the curly braces, as
+we wanted to use the current session; alternatively, we could
+have specified another archive, or a tgid etc. We specified the binary
+<command>a</command> in the shared section, so we matched that in both
+the profiles we're diffing.
+</para>
+<para>
+As in the normal output, the results are sorted by the number of
+samples, and the percentage field represents the relative percentage of
+the symbol's samples in the second profile.
+</para>
+<para>
+Notice the new column in the output. This value represents the
+percentage change of the relative percent between the first and the
+second profile: roughly, "how much more important this symbol is".
+Looking at the symbol <function>a()</function>, we can see that it took
+roughly the same amount of the total profile in both the first and the
+second profile. The function <function>c()</function> was not in the new
+profile, so has been marked with <function>---</function>. Note that the
+sample value is the number of samples in the first profile; since we're
+displaying results for the second profile, we don't list a percentage
+value for it, as it would be meaningless. <function>d()</function> is
+new in the second profile, and consequently marked with
+<function>+++</function>.
+</para>
+<para>
+When comparing profiles between different binaries, it should be clear
+that functions can change in terms of VMA and size. To avoid this
+problem, <command>opreport</command> considers a symbol to be the same
+if the symbol name, image name, and owning application name all match;
+any other factors are ignored. Note that the check for application name
+means that trying to compare library profiles between two different
+applications will not work as you might expect: each symbol will be
+considered different.
+</para>
+
+</sect2> <!-- opreport-diff -->
+
+<sect2 id="opreport-anon">
+<title>Anonymous executable mappings</title>
+<para>
+Many applications, typically ones involving dynamic compilation into
+machine code, have executable mappings that are not backed by an ELF
+file. <command>opreport</command> has basic support for showing the
+samples taken in these regions; for example:
+</para>
+<screen>
+$ opreport /usr/jre1.5.0/bin/java
+CPU: PIII, speed 863.195 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (clocks processor is not halted) with a unit mask of 0x00 (No unit mask) count 100000
+CPU_CLK_UNHALT...|
+  samples|      %|
+------------------
+    27344 100.000 java
+        CPU_CLK_UNHALT...|
+          samples|      %|
+        ------------------
+            27236  99.605 anon (tgid:12135 range:0xb2cb8000-0xb2e80000)
+              108  0.3949 java
+</screen>
+<para>
+Currently, there is no support for getting symbol-based summaries for
+such regions. Note that, since such mappings are dependent upon
+individual invocations of a binary, these mappings are always listed as
+a dependent image, even when using <option>--separate=none</option>.
+Equally, the results are not affected by the <option>--merge</option>
+option.
+</para>
+</sect2> <!-- opreport-anon -->
+
+<sect2 id="opreport-options">
+<title>Options for <command>opreport</command></title>
+
+<variablelist>
+<varlistentry><term><option>--accumulated / -a</option></term><listitem><para>
+Accumulate sample and percentage counts in the symbol list.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--callgraph / -c</option></term><listitem><para>
+Show callgraph information.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--debug-info / -g</option></term><listitem><para>
+Show source file and line for each symbol.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--demangle / -D none|normal|smart</option></term><listitem><para>
+none: no demangling. normal: use default demangler (default) smart: use
+pattern-matching to make C++ symbol demangling more readable.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--details / -d</option></term><listitem><para>
+Show per-instruction details for all selected symbols. Note that, for
+binaries without symbol information, the VMA values shown are raw file
+offsets for the image binary.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--exclude-dependent / -x</option></term><listitem><para>
+Do not include application-specific images for libraries, kernel modules
+and the kernel. This option only makes sense if the profile session
+used --separate.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--exclude-symbols / -e [symbols]</option></term><listitem><para>
+Exclude all the symbols in the given comma-separated list.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--global-percent / -%</option></term><listitem><para>
+Make all percentages relative to the whole profile.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--help / -? / --usage</option></term><listitem><para>
+Show help message.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--image-path / -p [paths]</option></term><listitem><para>
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--include-symbols / -i [symbols]</option></term><listitem><para>
+Only include symbols in the given comma-separated list.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--long-filenames / -l</option></term><listitem><para>
+Output full paths instead of basenames.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--merge / -m [lib,cpu,tid,tgid,unitmask,all]</option></term><listitem><para>
+Merge any profiles separated in a --separate session.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--no-header</option></term><listitem><para>
+Don't output a header detailing profiling parameters.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--output-file / -o [file]</option></term><listitem><para>
+Output to the given file instead of stdout.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--reverse-sort / -r</option></term><listitem><para>
+Reverse the sort from the default.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--show-address / -w</option></term><listitem><para>
+Show the VMA address of each symbol (off by default).
+</para></listitem></varlistentry>
+<varlistentry><term><option>--sort / -s [vma,sample,symbol,debug,image]</option></term><listitem><para>
+Sort the list of symbols by, respectively, symbol address,
+number of samples, symbol name, debug filename and line number,
+binary image filename.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--symbols / -l</option></term><listitem><para>
+List per-symbol information instead of a binary image summary.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--threshold / -t [percentage]</option></term><listitem><para>
+Only output data for symbols that have more than the given percentage
+of total samples.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--verbose / -V [options]</option></term><listitem><para>
+Give verbose debugging output.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--version / -v</option></term><listitem><para>
+Show version.
+</para></listitem></varlistentry>
+</variablelist>
+
+</sect2>
+
+</sect1> <!-- opreport -->
+
+<sect1 id="opannotate">
+<title>Outputting annotated source (<command>opannotate</command>)</title>
+<para>
+The <command>opannotate</command> utility generates annotated source files or assembly listings, optionally
+mixed with source.
+If you want to see the source file, the profiled application needs to have debug information, and the source
+must be available through this debug information. For GCC, you must use the <option>-g</option> option
+when you are compiling.
+If the binary doesn't contain sufficient debug information, you can still
+use <command>opannotate <option>--assembly</option></command> to get annotated assembly.
+</para>
+<para>
+Note that for the reason explained in <xref linkend="hardware-counters" /> the results can be
+inaccurate. The debug information itself can add other problems; for example, the line number for a symbol can be
+incorrect. Assembly instructions can be re-ordered and moved by the compiler, and this can lead to
+crediting source lines with samples not really "owned" by this line. Also see
+<xref linkend="interpreting" />.
+</para>
+<para>
+You can output the annotation to one single file, containing all the source found using the
+<option>--source</option>. You can use this in conjunction with <option>--assembly</option>
+to get combined source/assembly output.
+</para>
+<para>
+You can also output a directory of annotated source files that maintains the structure of
+the original sources. Each line in the annotated source is prepended with the samples
+for that line. Additionally, each symbol is annotated giving details for the symbol
+as a whole. An example:
+</para>
+<screen>
+$ opannotate --source --output-dir=annotated /usr/local/oprofile-pp/bin/oprofiled
+$ ls annotated/home/moz/src/oprofile-pp/daemon/
+opd_cookie.h  opd_image.c  opd_kernel.c  opd_sample_files.c  oprofiled.c
+</screen>
+<para>
+Line numbers are maintained in the source files, but each file has
+a footer appended describing the profiling details. The actual annotation
+looks something like this :
+</para>
+<screen>
+...
+               :static uint64_t pop_buffer_value(struct transient * trans)
+ 11510  1.9661 :{ /* pop_buffer_value total:  89901 15.3566 */
+               :        uint64_t val;
+               :
+ 10227  1.7469 :        if (!trans->remaining) {
+               :                fprintf(stderr, "BUG: popping empty buffer !\n");
+               :                exit(EXIT_FAILURE);
+               :        }
+               :
+               :        val = get_buffer_value(trans->buffer, 0);
+  2281  0.3896 :        trans->remaining--;
+  2296  0.3922 :        trans->buffer += kernel_pointer_size;
+               :        return val;
+ 10454  1.7857 :}
+...
+</screen>
+
+<para>
+The first number on each line is the number of samples, whilst the second is
+the relative percentage of total samples.
+</para>
+
+<sect2 id="opannotate-finding-source">
+<title>Locating source files</title>
+<para>
+Of course, <command>opannotate</command> needs to be able to locate the source files
+for the binary image(s) in order to produce output. Some binary images have debug
+information where the given source file paths are relative, not absolute. You can
+specify search paths to look for these files (similar to <command>gdb</command>'s
+<option>dir</option> command) with the <option>--search-dirs</option> option.
+</para>
+<para>
+Sometimes you may have a binary image which gives absolute paths for the source files,
+but you have the actual sources elsewhere (commonly, you've installed an SRPM for
+a binary on your system and you want annotation from an existing profile). You can
+use the <option>--base-dirs</option> option to redirect OProfile to look somewhere
+else for source files. For example, imagine we have a binary generated from a source
+file that is given in the debug information as <filename>/tmp/build/libfoo/foo.c</filename>,
+and you have the source tree matching that binary installed in <filename>/home/user/libfoo/</filename>.
+You can redirect OProfile to find <filename>foo.c</filename> correctly like this :
+</para>
+<screen>
+$ opannotate --source --base-dirs=/tmp/build/libfoo/ --search-dirs=/home/user/libfoo/ --output-dir=annotated/ /lib/libfoo.so
+</screen>
+<para>
+You can specify multiple (comma-separated) paths to both options.
+</para>
+</sect2>
+
+<sect2 id="opannotate-details">
+<title>Usage of <command>opannotate</command></title>
+
+<variablelist>
+<varlistentry><term><option>--assembly / -a</option></term><listitem><para>
+Output annotated assembly. If this is combined with --source, then mixed
+source / assembly annotations are output.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--base-dirs / -b [paths]/</option></term><listitem><para>
+Comma-separated list of path prefixes. This can be used to point OProfile to a
+different location for source files when the debug information specifies an
+absolute path on your system for the source that does not exist. The prefix
+is stripped from the debug source file paths, then searched in the search dirs
+specified by <option>--search-dirs</option>.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--demangle / -D none|normal|smart</option></term><listitem><para>
+none: no demangling. normal: use default demangler (default) smart: use
+pattern-matching to make C++ symbol demangling more readable.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--exclude-dependent / -x</option></term><listitem><para>
+Do not include application-specific images for libraries, kernel modules
+and the kernel. This option only makes sense if the profile session
+used --separate.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--exclude-file [files]</option></term><listitem><para>
+Exclude all files in the given comma-separated list of glob patterns.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--exclude-symbols / -e [symbols]</option></term><listitem><para>
+Exclude all the symbols in the given comma-separated list.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--help / -? / --usage</option></term><listitem><para>
+Show help message.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--image-path / -p [paths]</option></term><listitem><para>
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--include-file [files]</option></term><listitem><para>
+Only include files in the given comma-separated list of glob patterns.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--include-symbols / -i [symbols]</option></term><listitem><para>
+Only include symbols in the given comma-separated list.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--objdump-params [params]</option></term><listitem><para>
+Pass the given parameters as extra values when calling objdump.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--output-dir / -o [dir]</option></term><listitem><para>
+Output directory. This makes opannotate output one annotated file for each
+source file. This option can't be used in conjunction with --assembly.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--search-dirs / -d [paths]</option></term><listitem><para>
+Comma-separated list of paths to search for source files. This is useful to find
+source files when the debug information only contains relative paths.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--source / -s</option></term><listitem><para>
+Output annotated source. This requires debugging information to be available
+for the binaries.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--threshold / -t [percentage]</option></term><listitem><para>
+Only output data for symbols that have more than the given percentage
+of total samples.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--verbose / -V [options]</option></term><listitem><para>
+Give verbose debugging output.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--version / -v</option></term><listitem><para>
+Show version.
+</para></listitem></varlistentry>
+</variablelist>
+
+
+</sect2> <!-- opannotate-details -->
+
+</sect1> <!-- opannotate -->
+
+<sect1 id="opgprof">
+<title><command>gprof</command>-compatible output (<command>opgprof</command>)</title>
+<para>
+If you're familiar with the output produced by <command>GNU gprof</command>,
+you may find <command>opgprof</command> useful. It takes a single binary
+as an argument, and produces a <filename>gmon.out</filename> file for use
+with <command>gprof -p</command>. If call-graph profiling is enabled,
+then this is also included.
+</para>
+<screen>
+$ opgprof `which oprofiled` # generates gmon.out file
+$ gprof -p `which oprofiled` | head
+Flat profile:
+
+Each sample counts as 1 samples.
+  %   cumulative   self              self     total
+ time   samples   samples    calls  T1/call  T1/call  name
+ 33.13 206237.00 206237.00                             odb_insert
+ 22.67 347386.00 141149.00                             pop_buffer_value
+  9.56 406881.00 59495.00                             opd_put_sample
+  7.34 452599.00 45718.00                             opd_find_image
+  7.19 497327.00 44728.00                             opd_process_samples
+</screen>
+
+<sect2 id="opgprof-details">
+<title>Usage of <command>opgprof</command></title>
+
+<variablelist>
+<varlistentry><term><option>--help / -? / --usage</option></term><listitem><para>
+Show help message.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--image-path / -p [paths]</option></term><listitem><para>
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--output-filename / -o [file]</option></term><listitem><para>
+Output to the given file instead of the default, gmon.out
+</para></listitem></varlistentry>
+<varlistentry><term><option>--threshold / -t [percentage]</option></term><listitem><para>
+Only output data for symbols that have more than the given percentage
+of total samples.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--verbose / -V [options]</option></term><listitem><para>
+Give verbose debugging output.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--version / -v</option></term><listitem><para>
+Show version.
+</para></listitem></varlistentry>
+</variablelist>
+
+</sect2> <!-- opgprof-details -->
+
+</sect1> <!-- opgprof -->
+
+<sect1 id="oparchive">
+<title>Archiving measurements (<command>oparchive</command>)</title>
+<para>
+	The <command>oparchive</command> utility generates a directory populated
+	with executable, debug, and oprofile sample files. This directory can be
+	moved to another machine via <command>tar</command> and analyzed without
+	further use of the data collection machine.
+</para>
+
+<para>
+	The following command would collect the sample files, the executables
+	associated with the sample files, and the debuginfo files associated
+	with the executables and copy them into
+	<filename>/tmp/current_data</filename>:
+</para>
+
+<screen>
+# oparchive -o /tmp/current_data
+</screen>
+
+<sect2 id="oparchive-details">
+<title>Usage of <command>oparchive</command></title>
+
+<variablelist>
+<varlistentry><term><option>--help / -? / --usage</option></term><listitem><para>
+Show help message.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--exclude-dependent / -x</option></term><listitem><para>
+Do not include application-specific images for libraries, kernel modules
+and the kernel. This option only makes sense if the profile session
+used --separate.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--image-path / -p [paths]</option></term><listitem><para>
+Comma-separated list of additional paths to search for binaries.
+This is needed to find modules in kernels 2.6 and upwards.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--output-directory / -o [directory]</option></term><listitem><para>
+Output to the given directory. There is no default. This must be specified.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--verbose / -V [options]</option></term><listitem><para>
+Give verbose debugging output.
+</para></listitem></varlistentry>
+<varlistentry><term><option>--version / -v</option></term><listitem><para>
+Show version.
+</para></listitem></varlistentry>
+</variablelist>
+
+</sect2> <!-- oparchive-details -->
+
+</sect1> <!-- oparchive -->
+
+</chapter>
+
+<chapter id="interpreting">
+<title>Interpreting profiling results</title>
+<para>
+The standard caveats of profiling apply in interpreting the results from OProfile:
+profile realistic situations, profile different scenarios, profile
+for as long as a time as possible, avoid system-specific artifacts, don't trust
+the profile data too much. Also bear in mind the comments on the performance
+counters above - you <emphasis>cannot</emphasis> rely on totally accurate
+instruction-level profiling.  However, for almost all circumstances the data
+can be useful. Ideally a utility such as Intel's VTUNE would be available to
+allow careful instruction-level analysis; go hassle Intel for this, not me ;)
+</para>
+<sect1 id="irq-latency">
+<title>Profiling interrupt latency</title>
+<para>
+This is an example of how the latency of delivery of profiling interrupts
+can impact the reliability of the profiling data. This is pretty much a 
+worst-case-scenario example: these problems are fairly rare.
+</para>
+<screen>
+double fun(double a, double b, double c)
+{
+ double result = 0;
+ for (int i = 0 ; i &lt; 10000; ++i) {
+  result += a;
+  result *= b;
+  result /= c;
+ }
+ return result;
+}
+</screen>
+<para>
+Here the last instruction of the loop is very costly, and you would expect the result
+reflecting that - but (cutting the instructions inside the loop):
+</para>
+<screen>
+$ opannotate -a -t 10 ./a.out
+
+     88 15.38% : 8048337:       fadd   %st(3),%st
+     48 8.391% : 8048339:       fmul   %st(2),%st
+     68 11.88% : 804833b:       fdiv   %st(1),%st
+    368 64.33% : 804833d:       inc    %eax
+               : 804833e:       cmp    $0x270f,%eax
+               : 8048343:       jle    8048337
+</screen>
+<para>
+The problem comes from the x86 hardware; when the counter overflows the IRQ
+is asserted but the hardware has features that can delay the NMI interrupt:
+x86 hardware is synchronous (i.e. cannot interrupt during an instruction);
+there is also a latency when the IRQ is asserted, and the multiple
+execution units and the out-of-order model of modern x86 CPUs also causes
+problems. This is the same function, with annotation :
+</para>
+<screen>
+$ opannotate -s -t 10 ./a.out
+
+               :double fun(double a, double b, double c)
+               :{ /* _Z3funddd total:     572 100.0% */
+               : double result = 0;
+    368 64.33% : for (int i = 0 ; i &lt; 10000; ++i) {
+     88 15.38% :  result += a;
+     48 8.391% :  result *= b;
+     68 11.88% :  result /= c;
+               : }
+               : return result;
+               :}
+</screen>
+<para>
+The conclusion: don't trust samples coming at the end of a loop,
+particularly if the last instruction generated by the compiler is costly. This
+case can also occur for branches. Always bear in mind that samples
+can be delayed by a few cycles from its real position. That's a hardware
+problem and OProfile can do nothing about it.
+</para>
+</sect1>
+<sect1 id="kernel-profiling">
+<title>Kernel profiling</title>
+<sect2 id="irq-masking">
+<title>Interrupt masking</title>
+<para>
+OProfile uses non-maskable interrupts (NMI) on the P6 generation, Pentium 4,
+Athlon and Duron processors. These interrupts can occur even in section of the
+Linux where interrupts are disabled, allowing collection of samples in virtually
+all executable code.  The RTC, timer interrupt mode, and Itanium 2 collection mechanisms
+use maskable interrupts. Thus, the RTC and Itanium 2 data collection mechanism have "sample
+shadows", or blind spots: regions where no samples will be collected. Typically, the samples
+will be attributed to the code immediately after the interrupts are re-enabled.
+</para>
+</sect2>
+<sect2 id="idle">
+<title>Idle time</title>
+<para>
+Your kernel is likely to support halting the processor when a CPU is idle. As
+the typical hardware events like <constant>CPU_CLK_UNHALTED</constant> do not
+count when the CPU is halted, the kernel profile will not reflect the actual
+amount of time spent idle. You can change this behaviour by booting with
+the <option>idle=poll</option> option, which uses a different idle routine. This
+will appear as <function>poll_idle()</function> in your kernel profile.
+</para>
+</sect2>
+<sect2 id="kernel-modules">
+<title>Profiling kernel modules</title>
+<para>
+OProfile profiles kernel modules by default. However, there are a couple of problems
+you may have when trying to get results. First, you may have booted via an initrd;
+this means that the actual path for the module binaries cannot be determined automatically.
+To get around this, you can use the <option>-p</option> option to the profiling tools
+to specify where to look for the kernel modules.
+</para>
+<para>
+In 2.6, the information on where kernel module binaries are located has been removed.
+This means OProfile needs guiding with the <option>-p</option> option to find your
+modules. Normally, you can just use your standard module top-level directory for this.
+Note that due to this problem, OProfile cannot check that the modification times match;
+it is your responsibility to make sure you do not modify a binary after a profile
+has been created.
+</para>
+<para>
+If you have run <command>insmod</command> or <command>modprobe</command> to insert a module
+in a particular directory, it is important that you specify this directory with the 
+<option>-p</option> option first, so that it over-rides an older module binary that might
+exist in other directories you've specified with <option>-p</option>. It is up to you
+to make sure that these values are correct: 2.6 kernels simply do not provide enough
+information for OProfile to get this information.
+</para>
+</sect2>
+</sect1>
+
+<sect1 id="interpreting-callgraph">
+<title>Interpreting call-graph profiles</title>
+<para>
+Sometimes the results from call-graph profiles may be different to what
+you expect to see. The first thing to check is whether the target
+binaries where compiled with frame pointers enabled (if the binary was
+compiled using <command>gcc</command>'s
+<option>-fomit-frame-pointer</option> option, you will not get
+meaningful results). Note that as of this writing, the GCC developers
+plan to disable frame pointers by default. The Linux kernel is built
+without frame pointers by default; there is a configuration option you
+can use to turn it on under the "Kernel Hacking" menu.
+</para>
+<para>
+Often you may see a caller of a function that does not actually directly
+call the function you're looking at (e.g. if <function>a()</function>
+calls <function>b()</function>, which in turn calls
+<function>c()</function>, you may see an entry for
+<function>a()->c()</function>).  What's actually occurring is that we
+are taking samples at the very start (or the very end) of
+<function>c()</function>; at these few instructions, we haven't yet
+created the new function's frame, so it appears as if
+<function>a()</function> is calling directly into
+<function>c()</function>. Be careful not to be misled by these
+entries.
+</para>
+<para>
+Like the rest of OProfile, call-graph profiling uses a statistical
+approach; this means that sometimes a backtrace sample is truncated, or
+even partially wrong. Bear this in mind when examining results.
+</para>
+<!--  FIXME: what do we need here ? -->
+</sect1>
+
+<sect1 id="debug-info">
+<title>Inaccuracies in annotated source</title>
+<sect2 id="effect-of-optimizations">
+<title>Side effects of optimizations</title>
+<para>
+The compiler can introduce some pitfalls in the annotated source output.
+The optimizer can move pieces of code in such manner that two line of codes
+are interlaced (instruction scheduling). Also debug info generated by the compiler 
+can show strange behavior. This is especially true for complex expressions e.g. inside
+an if statement:
+</para>
+<screen>
+	if (a &amp;&amp; ..
+	    b &amp;&amp; ..
+	    c &amp;&amp;)
+</screen>
+<para>
+here the problem come from the position of line number. The available debug
+info does not give enough details for the if condition, so all samples are
+accumulated at the position of the right brace of the expression. Using
+<command>opannotate <option>-a</option></command> can help to show the real
+samples at an assembly level.
+</para>
+</sect2>
+<sect2 id="prologues">
+<title>Prologues and epilogues</title>
+<para>
+The compiler generally needs to generate "glue" code across function calls, dependent
+on the particular function call conventions used. Additionally other things
+need to happen, like stack pointer adjustment for the local variables; this
+code is known as the function prologue. Similar code is needed at function return,
+and is known as the function epilogue. This will show up in annotations as
+samples at the very start and end of a function, where there is no apparent
+executable code in the source.
+</para>
+</sect2>
+<sect2 id="inlined-function">
+<title>Inlined functions</title>
+<para>
+You may see that a function is credited with a certain number of samples, but
+the listing does not add up to the correct total. To pick a real example :
+</para>
+<screen>
+               :internal_sk_buff_alloc_security(struct sk_buff *skb)
+ 353 2.342%    :{ /* internal_sk_buff_alloc_security total: 1882 12.48% */
+               :
+               :        sk_buff_security_t *sksec;
+  15 0.0995%   :        int rc = 0;
+               :
+  10 0.06633%  :        sksec = skb-&gt;lsm_security;
+ 468 3.104%    :        if (sksec &amp;&amp; sksec-&gt;magic == DSI_MAGIC) {
+               :                goto out;
+               :        }
+               :
+               :        sksec = (sk_buff_security_t *) get_sk_buff_memory(skb);
+   3 0.0199%   :        if (!sksec) {
+  38 0.2521%   :                rc = -ENOMEM;
+               :                goto out;
+  10 0.06633%  :        }
+               :        memset(sksec, 0, sizeof (sk_buff_security_t));
+  44 0.2919%   :        sksec-&gt;magic = DSI_MAGIC;
+  32 0.2123%   :        sksec-&gt;skb = skb;
+  45 0.2985%   :        sksec-&gt;sid = DSI_SID_NORMAL;
+  31 0.2056%   :        skb-&gt;lsm_security = sksec;
+               :
+               :      out:
+               :
+ 146 0.9685%   :        return rc;
+               :
+  98 0.6501%   :}
+</screen>
+<para>
+Here, the function is credited with 1,882 samples, but the annotations
+below do not account for this. This is usually because of inline functions -
+the compiler marks such code with debug entries for the inline function
+definition, and this is where <command>opannotate</command> annotates
+such samples. In the case above, <function>memset</function> is the most
+likely candidate for this problem. Examining the mixed source/assembly
+output can help identify such results.
+</para>
+<para>
+When running <command>opannotate</command>, you may get a warning
+"some functions compiled without debug information may have incorrect source line attributions".
+In some rare cases, OProfile is not able to verify that the derived source line
+is correct (when some parts of the binary image are compiled without debugging
+information). Be wary of results if this warning appears.
+</para>
+<para>
+Furthermore, for some languages the compiler can implicitly generate functions,
+such as default copy constructors. Such functions are labelled by the compiler
+as having a line number of 0, which means the source annotation can be confusing.
+</para>
+<!-- FIXME so what *actually* happens to those samples ? ignored ? -->
+</sect2>
+<sect2 id="wrong-linenr-info">
+<title>Inaccuracy in line number information</title>
+<para>
+Depending on your compiler you can fall into the following problem:
+</para>
+<screen>
+struct big_object { int a[500]; };
+
+int main()
+{
+	big_object a, b;
+	for (int i = 0 ; i != 1000 * 1000; ++i)
+		b = a;
+	return 0;
+}
+
+</screen>
+<para>
+Compiled with <command>gcc</command> 3.0.4 the annotated source is clearly inaccurate:
+</para>
+<screen>
+               :int main()
+               :{  /* main total: 7871 100% */
+               :        big_object a, b;
+               :        for (int i = 0 ; i != 1000 * 1000; ++i)
+               :                b = a;
+ 7871 100%     :        return 0;
+               :}
+</screen>
+<para>
+The problem here is distinct from the IRQ latency problem; the debug line number
+information is not precise enough; again, looking at output of <command>opannoatate -as</command> can help.
+</para>
+<screen>
+               :int main()
+               :{
+               :        big_object a, b;
+               :        for (int i = 0 ; i != 1000 * 1000; ++i)
+               : 80484c0:       push   %ebp
+               : 80484c1:       mov    %esp,%ebp
+               : 80484c3:       sub    $0xfac,%esp
+               : 80484c9:       push   %edi
+               : 80484ca:       push   %esi
+               : 80484cb:       push   %ebx
+               :                b = a;
+               : 80484cc:       lea    0xfffff060(%ebp),%edx
+               : 80484d2:       lea    0xfffff830(%ebp),%eax
+               : 80484d8:       mov    $0xf423f,%ebx
+               : 80484dd:       lea    0x0(%esi),%esi
+               :        return 0;
+    3 0.03811% : 80484e0:       mov    %edx,%edi
+               : 80484e2:       mov    %eax,%esi
+    1 0.0127%  : 80484e4:       cld
+    8 0.1016%  : 80484e5:       mov    $0x1f4,%ecx
+ 7850 99.73%   : 80484ea:       repz movsl %ds:(%esi),%es:(%edi)
+    9 0.1143%  : 80484ec:       dec    %ebx
+               : 80484ed:       jns    80484e0
+               : 80484ef:       xor    %eax,%eax
+               : 80484f1:       pop    %ebx
+               : 80484f2:       pop    %esi
+               : 80484f3:       pop    %edi
+               : 80484f4:       leave
+               : 80484f5:       ret
+</screen>
+<para>
+So here it's clear that copying is correctly credited with of all the samples, but the
+line number information is misplaced. <command>objdump -dS</command> exposes the
+same problem. Note that maintaining accurate debug information for compilers when optimizing is difficult, so this problem is not suprising.
+The problem of debug information
+accuracy is also dependent on the binutils version used; some BFD library versions
+contain a work-around for known problems of <command>gcc</command>, some others do not. This is unfortunate but we must live with that,
+since profiling is pointless when you disable optimisation (which would give better debugging entries).
+</para>
+</sect2>
+</sect1>
+<sect1 id="symbol-without-debug-info">
+<title>Assembly functions</title>
+<para>
+Often the assembler cannot generate debug information automatically.
+This means that you cannot get a source report unless 
+you manually define the neccessary debug information; read your assembler documentation for how you might
+do that. The only
+debugging info needed currently by OProfile is the line-number/filename-VMA association. When profiling assembly
+without debugging info you can always get report for symbols, and optionally for VMA, through <command>opreport -l</command>
+or <command>opreport -d</command>, but this works only for symbols with the right attributes.
+For <command>gas</command> you can get this by
+</para>
+<screen>
+.globl foo
+	.type	foo,@function
+</screen>
+<para> 
+whilst for <command>nasm</command> you must use
+</para>
+<screen>
+	  GLOBAL foo:function		; [1]
+</screen>
+<para>
+Note that OProfile does not need the global attribute, only the function attribute.
+</para>
+</sect1>
+<!-- 
+
+FIXME: I commented this bit out until we've written something ...
+
+improve this ? but look first why this file is special 
+<sect2 id="small-functions">
+<title>Small functions</title>
+<para>
+Very small functions can show strange behavior. The file in your source
+directory of OProfile <filename>$SRC/test-oprofile/understanding/puzzle.c</filename>
+show such example
+</para>
+</sect2>
+--> 
+<sect1 id="hidden-cost">
+<title>Other discrepancies</title>
+<para>
+Another cause of apparent problems is the hidden cost of instructions. A very
+common example is two memory reads: one from L1 cache and the other from memory:
+the second memory read is likely to have more samples.
+There are many other causes of hidden cost of instructions. A non-exhaustive
+list: mis-predicted branch, TLB cache miss, partial register stall,
+partial register dependencies, memory mismatch stall, re-executed µops. If you want to write
+programs at the assembly level, be sure to take a look at the Intel and
+AMD documentation at <ulink url="http://developer.intel.com/">http://developer.intel.com/</ulink>
+and <ulink url="http://www.amd.com/products/cpg/athlon/techdocs/">http://www.amd.com/products/cpg/athlon/techdocs/</ulink>.
+</para>
+</sect1>
+</chapter>
+
+<chapter id="ack">
+<title>Acknowledgments</title>
+<para>
+Thanks to (in no particular order) : Arjan van de Ven, Rik van Riel, Juan Quintela, Philippe Elie,
+Phillipp Rumpf, Tigran Aivazian, Alex Brown, Alisdair Rawsthorne, Bob Montgomery, Ray Bryant, H.J. Lu,
+Jeff Esper, Will Cohen, Graydon Hoare, Cliff Woolley, Alex Tsariounov, Al Stone, Jason Yeh,
+Randolph Chung, Anton Blanchard, Richard Henderson, Andries Brouwer, Bryan Rittmeyer,
+Maynard P. Johnson,
+Richard Reich (rreich@rdrtech.com), Zwane Mwaikambo, Dave Jones, Charles Filtness; and finally Pulp, for "Intro".
+</para>
+</chapter>
+
+</book>
diff --git a/doc/srcdoc/Doxyfile.in b/doc/srcdoc/Doxyfile.in
new file mode 100644
index 0000000..1f76ff4
--- /dev/null
+++ b/doc/srcdoc/Doxyfile.in
@@ -0,0 +1,184 @@
+# Doxyfile 1.2.13-20020210
+
+#---------------------------------------------------------------------------
+# General configuration options
+#---------------------------------------------------------------------------
+PROJECT_NAME           = @PACKAGE@
+PROJECT_NUMBER         = @VERSION@
+OUTPUT_DIRECTORY       =
+OUTPUT_LANGUAGE        = English
+EXTRACT_ALL            = YES
+EXTRACT_PRIVATE        = NO
+EXTRACT_STATIC         = NO
+EXTRACT_LOCAL_CLASSES  = YES
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = NO
+STRIP_FROM_PATH        = 
+INTERNAL_DOCS          = NO
+STRIP_CODE_COMMENTS    = NO
+CASE_SENSE_NAMES       = YES
+SHORT_NAMES            = NO
+HIDE_SCOPE_NAMES       = NO
+VERBATIM_HEADERS       = YES
+SHOW_INCLUDE_FILES     = YES
+JAVADOC_AUTOBRIEF      = YES
+INHERIT_DOCS           = YES
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+DISTRIBUTE_GROUP_DOC   = NO
+TAB_SIZE               = 8
+GENERATE_TODOLIST      = NO
+GENERATE_TESTLIST      = NO
+GENERATE_BUGLIST       = NO
+ALIASES                = 
+ENABLED_SECTIONS       = 
+MAX_INITIALIZER_LINES  = 30
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+SHOW_USED_FILES        = YES
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET                  = NO
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           = 
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT                  = ../../
+FILE_PATTERNS          = *.cpp *.c *.h
+RECURSIVE              = YES
+EXCLUDE                = ../../module
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       = *.moc.cpp *.moc.h oprof_start.base.cpp
+EXAMPLE_PATH           = 
+EXAMPLE_PATTERNS       = 
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             = 
+INPUT_FILTER           = 
+FILTER_SOURCE_FILES    = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX     = NO
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          = 
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            = 
+HTML_FOOTER            = 
+HTML_STYLESHEET        = 
+HTML_ALIGN_MEMBERS     = YES
+GENERATE_HTMLHELP      = NO
+GENERATE_CHI           = NO
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = NO
+TREEVIEW_WIDTH         = 250
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX         = NO
+LATEX_OUTPUT           = latex
+LATEX_CMD_NAME         = latex
+MAKEINDEX_CMD_NAME     = makeindex
+COMPACT_LATEX          = NO
+PAPER_TYPE             = a4wide
+EXTRA_PACKAGES         = 
+LATEX_HEADER           = 
+PDF_HYPERLINKS         = NO
+USE_PDFLATEX           = NO
+LATEX_BATCHMODE        = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF           = NO
+RTF_OUTPUT             = rtf
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = NO
+RTF_STYLESHEET_FILE    = 
+RTF_EXTENSIONS_FILE    = 
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN           = NO
+MAN_OUTPUT             = man
+MAN_EXTENSION          = .3
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML           = NO
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = YES
+EXPAND_ONLY_PREDEF     = YES
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           = 
+INCLUDE_FILE_PATTERNS  = 
+PREDEFINED             = 
+EXPAND_AS_DEFINED      = 
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references   
+#---------------------------------------------------------------------------
+TAGFILES               = 
+GENERATE_TAGFILE       = 
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS         = YES
+HAVE_DOT               = YES
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+TEMPLATE_RELATIONS     = YES
+HIDE_UNDOC_RELATIONS   = YES
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+GRAPHICAL_HIERARCHY    = YES
+DOT_IMAGE_FORMAT       = png
+DOT_PATH               = 
+DOTFILE_DIRS           = 
+MAX_DOT_GRAPH_WIDTH    = 1024
+MAX_DOT_GRAPH_HEIGHT   = 1024
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine   
+#---------------------------------------------------------------------------
+SEARCHENGINE           = NO
+#CGI_NAME               = search.cgi
+#CGI_URL                = 
+#DOC_URL                = 
+#DOC_ABSPATH            = 
+#BIN_ABSPATH            = /usr/local/bin/
+#EXT_DOC_PATHS          = 
diff --git a/doc/srcdoc/Makefile b/doc/srcdoc/Makefile
new file mode 100644
index 0000000..e848669
--- /dev/null
+++ b/doc/srcdoc/Makefile
@@ -0,0 +1,9 @@
+DOXYGEN=doxygen
+
+.PHONY: clean
+
+all: clean
+	doxygen Doxyfile
+
+clean:
+	rm -rf html/
diff --git a/doc/xsl/catalog-1.xml.in b/doc/xsl/catalog-1.xml.in
new file mode 100644
index 0000000..6ab6e7a
--- /dev/null
+++ b/doc/xsl/catalog-1.xml.in
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<!DOCTYPE catalog PUBLIC "-//OASIS/DTD Entity Resolution XML Catalog V1.0//EN"
+	"http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd">
+
+<catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
+	<nextCatalog catalog="@XML_CATALOG@" />
+
+	@CAT_ENTRY_START@
+	<uri name="http://docbook.sourceforge.net/release/xsl/current/xhtml/docbook.xsl"
+		uri="@DOCBOOK_ROOT@/xhtml/docbook.xsl"/>
+	<uri name="http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl"
+		uri="@DOCBOOK_ROOT@/xhtml/chunk.xsl"/>
+	@CAT_ENTRY_END@
+
+	<uri name="xsl/xhtml-common.xsl" uri="@top_srcdir@/doc/xsl/xhtml-common.xsl"/>
+</catalog>
diff --git a/doc/xsl/xhtml-chunk.xsl b/doc/xsl/xhtml-chunk.xsl
new file mode 100644
index 0000000..b3320e8
--- /dev/null
+++ b/doc/xsl/xhtml-chunk.xsl
@@ -0,0 +1,52 @@
+<?xml version='1.0'?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+<xsl:import href="http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl"/>
+<xsl:import href="xhtml-common.xsl"/>
+ 
+<xsl:template name="process-chunk">
+  <xsl:param name="prev" select="."/>
+  <xsl:param name="next" select="."/>
+ 
+  <xsl:variable name="ischunk">
+    <xsl:call-template name="chunk"/>
+  </xsl:variable>
+ 
+  <xsl:variable name="chunkfn">
+    <xsl:if test="$ischunk='1'">
+      <xsl:apply-templates mode="chunk-filename" select="."/>
+    </xsl:if>
+  </xsl:variable>
+ 
+  <xsl:if test="$ischunk='0'">
+    <xsl:message>
+      <xsl:text>Error </xsl:text>
+      <xsl:value-of select="name(.)"/>
+      <xsl:text> is not a chunk!</xsl:text>
+    </xsl:message>
+  </xsl:if>
+ 
+  <xsl:variable name="filename">
+    <xsl:call-template name="make-relative-filename">
+      <xsl:with-param name="base.dir" select="$base.dir"/>
+      <xsl:with-param name="base.name" select="$chunkfn"/>
+    </xsl:call-template>
+  </xsl:variable>
+ 
+<!-- FIXME: use Strict when the problems with width on td/th are
+  sorted out. Not yet. -->
+  <xsl:call-template name="write.chunk.with.doctype">
+    <xsl:with-param name="filename" select="$filename"/>
+    <xsl:with-param name="indent" select="'yes'"/>
+   <xsl:with-param name="doctype-public">-//W3C//DTD XHTML 1.0 Transitional//EN</xsl:with-param>
+   <xsl:with-param name="doctype-system">http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd</xsl:with-param>
+    <xsl:with-param name="content">
+      <xsl:call-template name="chunk-element-content">
+        <xsl:with-param name="prev" select="$prev"/>
+        <xsl:with-param name="next" select="$next"/>
+      </xsl:call-template>
+    </xsl:with-param>
+  </xsl:call-template>
+</xsl:template>
+ 
+</xsl:stylesheet>
diff --git a/doc/xsl/xhtml-common.xsl b/doc/xsl/xhtml-common.xsl
new file mode 100644
index 0000000..99f2bf7
--- /dev/null
+++ b/doc/xsl/xhtml-common.xsl
@@ -0,0 +1,55 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<xsl:param name="use.id.as.filename" select="1"/>
+<xsl:param name="section.autolabel" select="1"/>
+<xsl:param name="chapter.autolabel" select="1"/>
+<xsl:param name="ulink.target" select="''"/>
+ 
+<xsl:param name="version"/>
+<xsl:template match="oprofileversion">
+  <xsl:value-of select="$version"/>
+</xsl:template>
+
+<!-- Custom template for programlisting, screen and synopsis to generate a gray
+     background to the item. -->
+<xsl:template match="programlisting|screen|synopsis">
+  <xsl:param name="suppress-numbers" select="'0'"/>
+  <xsl:variable name="vendor" select="system-property('xsl:vendor')"/>
+  <xsl:variable name="id"><xsl:call-template name="object.id"/></xsl:variable>
+ 
+  <xsl:if test="@id">
+    <a href="{$id}"/>
+  </xsl:if>
+ 
+  <xsl:choose>
+    <xsl:when test="$suppress-numbers = '0'
+                    and @linenumbering = 'numbered'
+                    and $use.extensions != '0'
+                    and $linenumbering.extension != '0'">
+      <xsl:variable name="rtf">
+        <xsl:apply-templates/>
+      </xsl:variable>
+      <!-- Change the color background color in the line below. -->
+      <table border="0" style="background: #E0E0E0;" width="90%">
+      <tr><td>
+      <pre class="{name(.)}">
+        <xsl:call-template name="number.rtf.lines">
+          <xsl:with-param name="rtf" select="$rtf"/>
+        </xsl:call-template>
+      </pre>
+      </td></tr></table>
+    </xsl:when>
+    <xsl:otherwise>
+      <!-- Change the color background color in the line below. -->
+      <table border="0" style="background: #E0E0E0;" width="90%">
+      <tr><td>
+      <pre class="{name(.)}">
+        <xsl:apply-templates/>
+      </pre>
+      </td></tr></table>
+    </xsl:otherwise>
+  </xsl:choose>
+</xsl:template> 
+ 
+</xsl:stylesheet>
diff --git a/doc/xsl/xhtml.xsl b/doc/xsl/xhtml.xsl
new file mode 100644
index 0000000..31217a6
--- /dev/null
+++ b/doc/xsl/xhtml.xsl
@@ -0,0 +1,13 @@
+<?xml version='1.0'?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+xmlns:doc="http://nwalsh.com/xsl/documentation/1.0" version="1.0">
+ 
+<xsl:import href="http://docbook.sourceforge.net/release/xsl/current/xhtml/docbook.xsl"/>
+<xsl:import href="xhtml-common.xsl"/>
+ 
+<xsl:output method="xml" encoding="ISO-8859-1" indent="yes"
+doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
+doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
+/>
+ 
+</xsl:stylesheet>