make arithmetic operators of enum_ optional (#508)

Following commit 90d278, the object code generated by the python
bindings of nanogui (github.com/wjakob/nanogui) went up by a whopping
12%. It turns out that that project has quite a few enums where we don't
really care about arithmetic operators.

This commit thus partially reverts the effects of #503 by introducing
an additional attribute py::arithmetic() that must be specified if the
arithmetic operators are desired.
diff --git a/docs/advanced/cast/overview.rst b/docs/advanced/cast/overview.rst
index e9f43be..8408572 100644
--- a/docs/advanced/cast/overview.rst
+++ b/docs/advanced/cast/overview.rst
@@ -77,7 +77,7 @@
 
 +------------------------------------+---------------------------+-------------------------------+
 |  Data type                         |  Description              | Header file                   |
-+=---================================+===========================+===============================+
++====================================+===========================+===============================+
 | ``int8_t``, ``uint8_t``            | 8-bit integers            | :file:`pybind11/pybind11.h`   |
 +------------------------------------+---------------------------+-------------------------------+
 | ``int16_t``, ``uint16_t``          | 16-bit integers           | :file:`pybind11/pybind11.h`   |
diff --git a/docs/classes.rst b/docs/classes.rst
index 300816d..3e8f2ee 100644
--- a/docs/classes.rst
+++ b/docs/classes.rst
@@ -393,4 +393,18 @@
     1L
 
 
+.. note::
+
+    When the special tag ``py::arithmetic()`` is specified to the ``enum_``
+    constructor, pybind11 creates an enumeration that also supports rudimentary
+    arithmetic and bit-level operations like comparisons, and, or, xor, negation,
+    etc.
+
+    .. code-block:: cpp
+
+        py::enum_<Pet::Kind>(pet, "Kind", py::arithmetic())
+           ...
+
+    By default, these are omitted to conserve space.
+
 .. [#f1] Stateless closures are those with an empty pair of brackets ``[]`` as the capture object.
diff --git a/include/pybind11/attr.h b/include/pybind11/attr.h
index d728210..2e6dec1 100644
--- a/include/pybind11/attr.h
+++ b/include/pybind11/attr.h
@@ -47,6 +47,9 @@
 /// Annotation which enables dynamic attributes, i.e. adds `__dict__` to a class
 struct dynamic_attr { };
 
+/// Annotation to mark enums as an arithmetic type
+struct arithmetic { };
+
 NAMESPACE_BEGIN(detail)
 /* Forward declarations */
 enum op_id : int;
@@ -306,6 +309,11 @@
     static void init(const dynamic_attr &, type_record *r) { r->dynamic_attr = true; }
 };
 
+
+/// Process an 'arithmetic' attribute for enums (does nothing here)
+template <>
+struct process_attribute<arithmetic> : process_attribute_default<arithmetic> {};
+
 /***
  * Process a keep_alive call policy -- invokes keep_alive_impl during the
  * pre-call handler if both Nurse, Patient != 0 and use the post-call handler
diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h
index 6804fa9..0079052 100644
--- a/include/pybind11/pybind11.h
+++ b/include/pybind11/pybind11.h
@@ -1187,51 +1187,62 @@
 template <typename Type> class enum_ : public class_<Type> {
 public:
     using class_<Type>::def;
-    using UnderlyingType = typename std::underlying_type<Type>::type;
+    using Scalar = typename std::underlying_type<Type>::type;
+    template <typename T> using arithmetic_tag = std::is_same<T, arithmetic>;
+
     template <typename... Extra>
     enum_(const handle &scope, const char *name, const Extra&... extra)
       : class_<Type>(scope, name, extra...), m_parent(scope) {
-        auto entries = new std::unordered_map<UnderlyingType, const char *>();
+
+        constexpr bool is_arithmetic =
+            !std::is_same<detail::first_of_t<arithmetic_tag, void, Extra...>,
+                          void>::value;
+
+        auto entries = new std::unordered_map<Scalar, const char *>();
         def("__repr__", [name, entries](Type value) -> std::string {
-            auto it = entries->find((UnderlyingType) value);
+            auto it = entries->find((Scalar) value);
             return std::string(name) + "." +
                 ((it == entries->end()) ? std::string("???")
                                         : std::string(it->second));
         });
-        def("__init__", [](Type& value, UnderlyingType i) { value = (Type)i; });
-        def("__init__", [](Type& value, UnderlyingType i) { new (&value) Type((Type) i); });
-        def("__int__", [](Type value) { return (UnderlyingType) value; });
+        def("__init__", [](Type& value, Scalar i) { value = (Type)i; });
+        def("__init__", [](Type& value, Scalar i) { new (&value) Type((Type) i); });
+        def("__int__", [](Type value) { return (Scalar) value; });
         def("__eq__", [](const Type &value, Type *value2) { return value2 && value == *value2; });
         def("__ne__", [](const Type &value, Type *value2) { return !value2 || value != *value2; });
-        def("__lt__", [](const Type &value, Type *value2) { return value2 && value < *value2; });
-        def("__gt__", [](const Type &value, Type *value2) { return value2 && value > *value2; });
-        def("__le__", [](const Type &value, Type *value2) { return value2 && value <= *value2; });
-        def("__ge__", [](const Type &value, Type *value2) { return value2 && value >= *value2; });
-        if (std::is_convertible<Type, UnderlyingType>::value) {
+        if (is_arithmetic) {
+            def("__lt__", [](const Type &value, Type *value2) { return value2 && value < *value2; });
+            def("__gt__", [](const Type &value, Type *value2) { return value2 && value > *value2; });
+            def("__le__", [](const Type &value, Type *value2) { return value2 && value <= *value2; });
+            def("__ge__", [](const Type &value, Type *value2) { return value2 && value >= *value2; });
+        }
+        if (std::is_convertible<Type, Scalar>::value) {
             // Don't provide comparison with the underlying type if the enum isn't convertible,
             // i.e. if Type is a scoped enum, mirroring the C++ behaviour.  (NB: we explicitly
-            // convert Type to UnderlyingType below anyway because this needs to compile).
-            def("__eq__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value == value2; });
-            def("__ne__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value != value2; });
-            def("__lt__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value < value2; });
-            def("__gt__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value > value2; });
-            def("__le__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value <= value2; });
-            def("__ge__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value >= value2; });
-            def("__invert__", [](const Type &value) { return ~((UnderlyingType) value); });
-            def("__and__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value & value2; });
-            def("__or__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value | value2; });
-            def("__xor__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value ^ value2; });
-            def("__rand__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value & value2; });
-            def("__ror__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value | value2; });
-            def("__rxor__", [](const Type &value, UnderlyingType value2) { return (UnderlyingType) value ^ value2; });
-            def("__and__", [](const Type &value, const Type &value2) { return (UnderlyingType) value & (UnderlyingType) value2; });
-            def("__or__", [](const Type &value, const Type &value2) { return (UnderlyingType) value | (UnderlyingType) value2; });
-            def("__xor__", [](const Type &value, const Type &value2) { return (UnderlyingType) value ^ (UnderlyingType) value2; });
+            // convert Type to Scalar below anyway because this needs to compile).
+            def("__eq__", [](const Type &value, Scalar value2) { return (Scalar) value == value2; });
+            def("__ne__", [](const Type &value, Scalar value2) { return (Scalar) value != value2; });
+            if (is_arithmetic) {
+                def("__lt__", [](const Type &value, Scalar value2) { return (Scalar) value < value2; });
+                def("__gt__", [](const Type &value, Scalar value2) { return (Scalar) value > value2; });
+                def("__le__", [](const Type &value, Scalar value2) { return (Scalar) value <= value2; });
+                def("__ge__", [](const Type &value, Scalar value2) { return (Scalar) value >= value2; });
+                def("__invert__", [](const Type &value) { return ~((Scalar) value); });
+                def("__and__", [](const Type &value, Scalar value2) { return (Scalar) value & value2; });
+                def("__or__", [](const Type &value, Scalar value2) { return (Scalar) value | value2; });
+                def("__xor__", [](const Type &value, Scalar value2) { return (Scalar) value ^ value2; });
+                def("__rand__", [](const Type &value, Scalar value2) { return (Scalar) value & value2; });
+                def("__ror__", [](const Type &value, Scalar value2) { return (Scalar) value | value2; });
+                def("__rxor__", [](const Type &value, Scalar value2) { return (Scalar) value ^ value2; });
+                def("__and__", [](const Type &value, const Type &value2) { return (Scalar) value & (Scalar) value2; });
+                def("__or__", [](const Type &value, const Type &value2) { return (Scalar) value | (Scalar) value2; });
+                def("__xor__", [](const Type &value, const Type &value2) { return (Scalar) value ^ (Scalar) value2; });
+            }
         }
-        def("__hash__", [](const Type &value) { return (UnderlyingType) value; });
+        def("__hash__", [](const Type &value) { return (Scalar) value; });
         // Pickling and unpickling -- needed for use with the 'multiprocessing' module
-        def("__getstate__", [](const Type &value) { return pybind11::make_tuple((UnderlyingType) value); });
-        def("__setstate__", [](Type &p, tuple t) { new (&p) Type((Type) t[0].cast<UnderlyingType>()); });
+        def("__getstate__", [](const Type &value) { return pybind11::make_tuple((Scalar) value); });
+        def("__setstate__", [](Type &p, tuple t) { new (&p) Type((Type) t[0].cast<Scalar>()); });
         m_entries = entries;
     }
 
@@ -1249,11 +1260,11 @@
     /// Add an enumeration entry
     enum_& value(char const* name, Type value) {
         this->attr(name) = pybind11::cast(value, return_value_policy::copy);
-        (*m_entries)[(UnderlyingType) value] = name;
+        (*m_entries)[(Scalar) value] = name;
         return *this;
     }
 private:
-    std::unordered_map<UnderlyingType, const char *> *m_entries;
+    std::unordered_map<Scalar, const char *> *m_entries;
     handle m_parent;
 };
 
diff --git a/tests/test_enum.cpp b/tests/test_enum.cpp
index 70a694a..09f334c 100644
--- a/tests/test_enum.cpp
+++ b/tests/test_enum.cpp
@@ -44,22 +44,20 @@
 test_initializer enums([](py::module &m) {
     m.def("test_scoped_enum", &test_scoped_enum);
 
-    py::enum_<UnscopedEnum>(m, "UnscopedEnum")
+    py::enum_<UnscopedEnum>(m, "UnscopedEnum", py::arithmetic())
         .value("EOne", EOne)
         .value("ETwo", ETwo)
         .export_values();
 
-    py::enum_<ScopedEnum>(m, "ScopedEnum")
+    py::enum_<ScopedEnum>(m, "ScopedEnum", py::arithmetic())
         .value("Two", ScopedEnum::Two)
-        .value("Three", ScopedEnum::Three)
-        ;
+        .value("Three", ScopedEnum::Three);
 
-    py::enum_<Flags>(m, "Flags")
+    py::enum_<Flags>(m, "Flags", py::arithmetic())
         .value("Read", Flags::Read)
         .value("Write", Flags::Write)
         .value("Execute", Flags::Execute)
         .export_values();
-        ;
 
     py::class_<ClassWithUnscopedEnum> exenum_class(m, "ClassWithUnscopedEnum");
     exenum_class.def_static("test_function", &ClassWithUnscopedEnum::test_function);