Improve locale support
diff --git a/include/fmt/core.h b/include/fmt/core.h
index 14df3c1..916562f 100644
--- a/include/fmt/core.h
+++ b/include/fmt/core.h
@@ -960,6 +960,22 @@
   }
 };
 
+// A type-erased reference to an std::locale to avoid heavy <locale> include.
+class locale_ref {
+ private:
+  const void *locale_;  // A type-erased pointer to std::locale.
+  friend class locale;
+
+ public:
+  locale_ref() : locale_(FMT_NULL) {}
+
+  template <typename Locale>
+  explicit locale_ref(const Locale &loc);
+
+  template <typename Locale>
+  Locale get() const;
+};
+
 template <typename OutputIt, typename Context, typename Char>
 class context_base {
  public:
@@ -969,14 +985,16 @@
   basic_parse_context<Char> parse_context_;
   iterator out_;
   basic_format_args<Context> args_;
+  locale_ref loc_;
 
  protected:
   typedef Char char_type;
   typedef basic_format_arg<Context> format_arg;
 
   context_base(OutputIt out, basic_string_view<char_type> format_str,
-               basic_format_args<Context> ctx_args)
-  : parse_context_(format_str), out_(out), args_(ctx_args) {}
+               basic_format_args<Context> ctx_args,
+               locale_ref loc = locale_ref())
+  : parse_context_(format_str), out_(out), args_(ctx_args), loc_(loc) {}
 
   // Returns the argument with specified index.
   format_arg do_get_arg(unsigned arg_id) {
@@ -1009,6 +1027,8 @@
 
   // Advances the begin iterator to ``it``.
   void advance_to(iterator it) { out_ = it; }
+
+  locale_ref locale() { return loc_; }
 };
 
 template <typename Context, typename T>
@@ -1078,8 +1098,9 @@
    stored in the object so make sure they have appropriate lifetimes.
    */
   basic_format_context(OutputIt out, basic_string_view<char_type> format_str,
-                basic_format_args<basic_format_context> ctx_args)
-    : base(out, format_str, ctx_args) {}
+                       basic_format_args<basic_format_context> ctx_args,
+                       internal::locale_ref loc = internal::locale_ref())
+    : base(out, format_str, ctx_args, loc) {}
 
   format_arg next_arg() {
     return this->do_get_arg(this->parse_context().next_arg_id());
diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h
index 9a9d9db..513834f 100644
--- a/include/fmt/format-inl.h
+++ b/include/fmt/format-inl.h
@@ -203,25 +203,28 @@
 }
 
 #if !defined(FMT_STATIC_THOUSANDS_SEPARATOR)
-class locale {
- private:
-  std::locale locale_;
-
- public:
-  explicit locale(std::locale loc = std::locale()) : locale_(loc) {}
-  std::locale get() { return locale_; }
-};
-
 namespace internal {
+
+template <typename Locale>
+locale_ref::locale_ref(const Locale &loc) : locale_(&loc) {
+  static_assert(std::is_same<Locale, std::locale>::value, "");
+}
+
+template <typename Locale>
+Locale locale_ref::get() const {
+  static_assert(std::is_same<Locale, std::locale>::value, "");
+  return locale_ ? *static_cast<const std::locale*>(locale_) : std::locale();
+}
+
 template <typename Char>
-FMT_FUNC Char thousands_sep_impl(locale_provider *lp) {
-  std::locale loc = lp ? lp->locale().get() : std::locale();
-  return std::use_facet<std::numpunct<Char>>(loc).thousands_sep();
+FMT_FUNC Char thousands_sep_impl(locale_ref loc) {
+  return std::use_facet<std::numpunct<Char> >(
+    loc.get<std::locale>()).thousands_sep();
 }
 }
 #else
 template <typename Char>
-FMT_FUNC Char internal::thousands_sep(locale_provider *lp) {
+FMT_FUNC Char internal::thousands_sep(locale_ref) {
   return FMT_STATIC_THOUSANDS_SEPARATOR;
 }
 #endif
@@ -959,10 +962,6 @@
   vprint(stdout, format_str, args);
 }
 
-#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR)
-FMT_FUNC locale locale_provider::locale() { return fmt::locale(); }
-#endif
-
 FMT_END_NAMESPACE
 
 #ifdef _MSC_VER
diff --git a/include/fmt/format.h b/include/fmt/format.h
index 66d8df5..5f055da 100644
--- a/include/fmt/format.h
+++ b/include/fmt/format.h
@@ -422,16 +422,6 @@
 }
 #endif
 
-// A wrapper around std::locale used to reduce compile times since <locale>
-// is very heavy.
-class locale;
-
-class locale_provider {
- public:
-  virtual ~locale_provider() {}
-  virtual fmt::locale locale();
-};
-
 // The number of characters to store in the basic_memory_buffer object itself
 // to avoid dynamic memory allocation.
 enum { inline_buffer_size = 500 };
@@ -1034,16 +1024,16 @@
 };
 
 template <typename Char>
-FMT_API Char thousands_sep_impl(locale_provider *lp);
+FMT_API Char thousands_sep_impl(locale_ref loc);
 
 template <typename Char>
-inline Char thousands_sep(locale_provider *lp) {
-  return Char(thousands_sep_impl<char>(lp));
+inline Char thousands_sep(locale_ref loc) {
+  return Char(thousands_sep_impl<char>(loc));
 }
 
 template <>
-inline wchar_t thousands_sep(locale_provider *lp) {
-  return thousands_sep_impl<wchar_t>(lp);
+inline wchar_t thousands_sep(locale_ref loc) {
+  return thousands_sep_impl<wchar_t>(loc);
 }
 
 // Formats a decimal unsigned integer value writing into buffer.
@@ -1449,7 +1439,8 @@
   }
 
  public:
-  arg_formatter_base(Range r, format_specs *s): writer_(r), specs_(s) {}
+  arg_formatter_base(Range r, format_specs *s, locale_ref loc)
+    : writer_(r, loc), specs_(s) {}
 
   iterator operator()(monostate) {
     FMT_ASSERT(false, "invalid argument type");
@@ -2320,7 +2311,7 @@
     \endrst
    */
   explicit arg_formatter(context_type &ctx, format_specs *spec = FMT_NULL)
-  : base(Range(ctx.out()), spec), ctx_(ctx) {}
+  : base(Range(ctx.out()), spec, ctx.locale()), ctx_(ctx) {}
 
   // Deprecated.
   arg_formatter(context_type &ctx, format_specs &spec)
@@ -2408,7 +2399,7 @@
 
  private:
   iterator out_;  // Output iterator.
-  std::unique_ptr<locale_provider> locale_;
+  internal::locale_ref locale_;
 
   iterator out() const { return out_; }
 
@@ -2608,7 +2599,7 @@
 
     void on_num() {
       unsigned num_digits = internal::count_digits(abs_value);
-      char_type sep = internal::thousands_sep<char_type>(writer.locale_.get());
+      char_type sep = internal::thousands_sep<char_type>(writer.locale_);
       unsigned size = num_digits + SEP_SIZE * ((num_digits - 1) / 3);
       writer.write_int(size, get_prefix(), spec,
                        num_writer{abs_value, size, sep});
@@ -2698,7 +2689,9 @@
 
  public:
   /** Constructs a ``basic_writer`` object. */
-  explicit basic_writer(Range out): out_(out.begin()) {}
+  explicit basic_writer(
+      Range out, internal::locale_ref loc = internal::locale_ref())
+    : out_(out.begin()), locale_(loc) {}
 
   void write(int value) { write_decimal(value); }
   void write(long value) { write_decimal(value); }
@@ -3226,8 +3219,9 @@
   typedef typename ArgFormatter::range range;
 
   format_handler(range r, basic_string_view<Char> str,
-                 basic_format_args<Context> format_args)
-    : context(r.begin(), str, format_args) {}
+                 basic_format_args<Context> format_args,
+                 internal::locale_ref loc)
+    : context(r.begin(), str, format_args, loc) {}
 
   void on_text(const Char *begin, const Char *end) {
     auto size = internal::to_unsigned(end - begin);
@@ -3277,10 +3271,12 @@
 
 /** Formats arguments and writes the output to the range. */
 template <typename ArgFormatter, typename Char, typename Context>
-typename Context::iterator vformat_to(typename ArgFormatter::range out,
-                                      basic_string_view<Char> format_str,
-                                      basic_format_args<Context> args) {
-  format_handler<ArgFormatter, Char, Context> h(out, format_str, args);
+typename Context::iterator vformat_to(
+    typename ArgFormatter::range out,
+    basic_string_view<Char> format_str,
+    basic_format_args<Context> args,
+    internal::locale_ref loc = internal::locale_ref()) {
+  format_handler<ArgFormatter, Char, Context> h(out, format_str, args, loc);
   internal::parse_format_string<false>(format_str, h);
   return h.context.out();
 }
diff --git a/include/fmt/locale.h b/include/fmt/locale.h
new file mode 100644
index 0000000..9b8d29a
--- /dev/null
+++ b/include/fmt/locale.h
@@ -0,0 +1,47 @@
+// Formatting library for C++ - std::locale support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_LOCALE_H_
+#define FMT_LOCALE_H_
+
+#include "format.h"
+#include <locale>
+
+FMT_BEGIN_NAMESPACE
+
+namespace internal {
+template <typename Char>
+typename buffer_context<Char>::type::iterator vformat_to(
+    const std::locale &loc, basic_buffer<Char> &buf,
+    basic_string_view<Char> format_str,
+    basic_format_args<typename buffer_context<Char>::type> args) {
+  typedef back_insert_range<basic_buffer<Char> > range;
+  return vformat_to<arg_formatter<range>>(
+    buf, to_string_view(format_str), args, internal::locale_ref(loc));
+}
+
+template <typename Char>
+std::basic_string<Char> vformat(
+    const std::locale &loc, basic_string_view<Char> format_str,
+    basic_format_args<typename buffer_context<Char>::type> args) {
+  basic_memory_buffer<Char> buffer;
+  internal::vformat_to(loc, buffer, format_str, args);
+  return fmt::to_string(buffer);
+}
+}
+
+template <typename S, typename... Args>
+inline std::basic_string<FMT_CHAR(S)> format(
+    const std::locale &loc, const S &format_str, const Args &... args) {
+  return internal::vformat(
+    loc, to_string_view(format_str),
+    *internal::checked_args<S, Args...>(format_str, args...));
+}
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_LOCALE_H_
diff --git a/include/fmt/ostream.h b/include/fmt/ostream.h
index fa26f6d..84b31cc 100644
--- a/include/fmt/ostream.h
+++ b/include/fmt/ostream.h
@@ -1,6 +1,6 @@
 // Formatting library for C++ - std::ostream support
 //
-// Copyright (c) 2012 - 2016, Victor Zverovich
+// Copyright (c) 2012 - present, Victor Zverovich
 // All rights reserved.
 //
 // For the license information refer to format.h.
diff --git a/include/fmt/printf.h b/include/fmt/printf.h
index b50c8fa..e8caeca 100644
--- a/include/fmt/printf.h
+++ b/include/fmt/printf.h
@@ -243,7 +243,8 @@
    */
   printf_arg_formatter(internal::basic_buffer<char_type> &buffer,
                        format_specs &spec, context_type &ctx)
-    : base(back_insert_range<internal::basic_buffer<char_type>>(buffer), &spec),
+    : base(back_insert_range<internal::basic_buffer<char_type>>(buffer), &spec,
+           ctx.locale()),
       context_(ctx) {}
 
   template <typename T>