tests: cleanup and ci hardening (#2397)

* tests: refactor and cleanup

* refactor: more consistent

* tests: vendor six

* tests: more xfails, nicer system

* tests: simplify to info

* tests: suggestions from @YannickJadoul and @bstaletic

* tests: restore some pypy tests that now pass

* tests: rename info to env

* tests: strict False/True

* tests: drop explicit strict=True again

* tests: reduce minimum PyTest to 3.1
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 17f2a5e..2a077c6 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -266,8 +266,8 @@
   if(pytest_not_found)
     message(FATAL_ERROR "Running the tests requires pytest. Please install it manually"
                         " (try: ${PYTHON_EXECUTABLE} -m pip install pytest)")
-  elseif(pytest_version VERSION_LESS 3.0)
-    message(FATAL_ERROR "Running the tests requires pytest >= 3.0. Found: ${pytest_version}"
+  elseif(pytest_version VERSION_LESS 3.1)
+    message(FATAL_ERROR "Running the tests requires pytest >= 3.1. Found: ${pytest_version}"
                         "Please update it (try: ${PYTHON_EXECUTABLE} -m pip install -U pytest)")
   endif()
   set(PYBIND11_PYTEST_FOUND
diff --git a/tests/conftest.py b/tests/conftest.py
index 45a264a..8b6e47d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,24 +5,21 @@
 Adds docstring and exceptions message sanitizers: ignore Python 2 vs 3 differences.
 """
 
-import pytest
-import textwrap
-import difflib
-import re
-import sys
 import contextlib
-import platform
+import difflib
 import gc
+import re
+import textwrap
+
+import pytest
+
+# Early diagnostic for failed imports
+import pybind11_tests  # noqa: F401
 
 _unicode_marker = re.compile(r'u(\'[^\']*\')')
 _long_marker = re.compile(r'([0-9])L')
 _hexadecimal = re.compile(r'0x[0-9a-fA-F]+')
 
-# test_async.py requires support for async and await
-collect_ignore = []
-if sys.version_info[:2] < (3, 5):
-    collect_ignore.append("test_async.py")
-
 
 def _strip_and_dedent(s):
     """For triple-quote strings"""
@@ -192,63 +189,5 @@
 
 
 def pytest_configure():
-    """Add import suppression and test requirements to `pytest` namespace"""
-    try:
-        import numpy as np
-    except ImportError:
-        np = None
-    try:
-        import scipy
-    except ImportError:
-        scipy = None
-    try:
-        from pybind11_tests.eigen import have_eigen
-    except ImportError:
-        have_eigen = False
-
-    # Provide simple `six`-like aliases.
-    pytest.PY2 = (sys.version_info.major == 2)
-    pytest.CPYTHON = (platform.python_implementation() == "CPython")
-    pytest.PYPY = (platform.python_implementation() == "PyPy")
-
-    skipif = pytest.mark.skipif
     pytest.suppress = suppress
-    pytest.requires_numpy = skipif(not np, reason="numpy is not installed")
-    pytest.requires_scipy = skipif(not np, reason="scipy is not installed")
-    pytest.requires_eigen_and_numpy = skipif(not have_eigen or not np,
-                                             reason="eigen and/or numpy are not installed")
-    pytest.requires_eigen_and_scipy = skipif(
-        not have_eigen or not scipy, reason="eigen and/or scipy are not installed")
-    pytest.unsupported_on_pypy = skipif(pytest.PYPY, reason="unsupported on PyPy")
-    pytest.bug_in_pypy = pytest.mark.xfail(pytest.PYPY, reason="bug in PyPy")
-    pytest.unsupported_on_pypy3 = skipif(pytest.PYPY and not pytest.PY2,
-                                         reason="unsupported on PyPy3")
-    pytest.unsupported_on_pypy_lt_6 = skipif(pytest.PYPY and sys.pypy_version_info[0] < 6,
-                                             reason="unsupported on PyPy<6")
-    pytest.unsupported_on_py2 = skipif(pytest.PY2,
-                                       reason="unsupported on Python 2.x")
     pytest.gc_collect = gc_collect
-
-
-def _test_import_pybind11():
-    """Early diagnostic for test module initialization errors
-
-    When there is an error during initialization, the first import will report the
-    real error while all subsequent imports will report nonsense. This import test
-    is done early (in the pytest configuration file, before any tests) in order to
-    avoid the noise of having all tests fail with identical error messages.
-
-    Any possible exception is caught here and reported manually *without* the stack
-    trace. This further reduces noise since the trace would only show pytest internals
-    which are not useful for debugging pybind11 module issues.
-    """
-    # noinspection PyBroadException
-    try:
-        import pybind11_tests  # noqa: F401 imported but unused
-    except Exception as e:
-        print("Failed to import pybind11_tests from pytest:")
-        print("  {}: {}".format(type(e).__name__, e))
-        sys.exit(1)
-
-
-_test_import_pybind11()
diff --git a/tests/env.py b/tests/env.py
new file mode 100644
index 0000000..f246b08
--- /dev/null
+++ b/tests/env.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+import platform
+import sys
+
+LINUX = sys.platform.startswith("linux")
+MACOS = sys.platform.startswith("darwin")
+WIN = sys.platform.startswith("win32") or sys.platform.startswith("cygwin")
+
+CPYTHON = platform.python_implementation() == "CPython"
+PYPY = platform.python_implementation() == "PyPy"
+
+PY2 = sys.version_info.major == 2
diff --git a/tests/pybind11_tests.cpp b/tests/pybind11_tests.cpp
index bc7d2c3..76e0298 100644
--- a/tests/pybind11_tests.cpp
+++ b/tests/pybind11_tests.cpp
@@ -88,6 +88,4 @@
 
     for (const auto &initializer : initializers())
         initializer(m);
-
-    if (!py::hasattr(m, "have_eigen")) m.attr("have_eigen") = false;
 }
diff --git a/tests/pytest.ini b/tests/pytest.ini
index f209964..6d758ea 100644
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@@ -1,11 +1,14 @@
 [pytest]
-minversion = 3.0
+minversion = 3.1
 norecursedirs = test_cmake_build test_embed
+xfail_strict = True
 addopts =
     # show summary of skipped tests
     -rs
     # capture only Python print and C++ py::print, but not C output (low-level Python errors)
     --capture=sys
+    # enable all warnings
+    -Wa
 filterwarnings =
     # make warnings into errors but ignore certain third-party extension issues
     error
diff --git a/tests/test_async.py b/tests/test_async.py
index e9292c9..df4489c 100644
--- a/tests/test_async.py
+++ b/tests/test_async.py
@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
-import asyncio
 import pytest
-from pybind11_tests import async_module as m
+
+asyncio = pytest.importorskip("asyncio")
+m = pytest.importorskip("pybind11_tests.async_module")
 
 
 @pytest.fixture
diff --git a/tests/test_buffers.py b/tests/test_buffers.py
index db1871e..d6adaf1 100644
--- a/tests/test_buffers.py
+++ b/tests/test_buffers.py
@@ -4,13 +4,12 @@
 
 import pytest
 
+import env  # noqa: F401
+
 from pybind11_tests import buffers as m
 from pybind11_tests import ConstructorStats
 
-pytestmark = pytest.requires_numpy
-
-with pytest.suppress(ImportError):
-    import numpy as np
+np = pytest.importorskip("numpy")
 
 
 def test_from_python():
@@ -36,9 +35,7 @@
     assert cstats.move_assignments == 0
 
 
-# PyPy: Memory leak in the "np.array(m, copy=False)" call
-# https://bitbucket.org/pypy/pypy/issues/2444
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2444
 def test_to_python():
     mat = m.Matrix(5, 4)
     assert memoryview(mat).shape == (5, 4)
@@ -73,7 +70,6 @@
     assert cstats.move_assignments == 0
 
 
-@pytest.unsupported_on_pypy
 def test_inherited_protocol():
     """SquareMatrix is derived from Matrix and inherits the buffer protocol"""
 
@@ -82,7 +78,6 @@
     assert np.asarray(matrix).shape == (5, 5)
 
 
-@pytest.unsupported_on_pypy
 def test_pointer_to_member_fn():
     for cls in [m.Buffer, m.ConstBuffer, m.DerivedBuffer]:
         buf = cls()
@@ -91,19 +86,17 @@
         assert value == 0x12345678
 
 
-@pytest.unsupported_on_pypy
 def test_readonly_buffer():
     buf = m.BufferReadOnly(0x64)
     view = memoryview(buf)
-    assert view[0] == b'd' if pytest.PY2 else 0x64
+    assert view[0] == b'd' if env.PY2 else 0x64
     assert view.readonly
 
 
-@pytest.unsupported_on_pypy
 def test_selective_readonly_buffer():
     buf = m.BufferReadOnlySelect()
 
-    memoryview(buf)[0] = b'd' if pytest.PY2 else 0x64
+    memoryview(buf)[0] = b'd' if env.PY2 else 0x64
     assert buf.value == 0x64
 
     io.BytesIO(b'A').readinto(buf)
@@ -111,6 +104,6 @@
 
     buf.readonly = True
     with pytest.raises(TypeError):
-        memoryview(buf)[0] = b'\0' if pytest.PY2 else 0
+        memoryview(buf)[0] = b'\0' if env.PY2 else 0
     with pytest.raises(TypeError):
         io.BytesIO(b'1').readinto(buf)
diff --git a/tests/test_builtin_casters.py b/tests/test_builtin_casters.py
index c905766..08d38bc 100644
--- a/tests/test_builtin_casters.py
+++ b/tests/test_builtin_casters.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
 
+import env  # noqa: F401
+
 from pybind11_tests import builtin_casters as m
 from pybind11_tests import UserType, IncType
 
@@ -117,10 +119,7 @@
     # Issue #816
 
     def to_bytes(s):
-        if pytest.PY2:
-            b = s
-        else:
-            b = s.encode("utf8")
+        b = s if env.PY2 else s.encode("utf8")
         assert isinstance(b, bytes)
         return b
 
@@ -197,7 +196,7 @@
     assert m.i64_str(-1) == "-1"
     assert m.i32_str(2000000000) == "2000000000"
     assert m.u32_str(2000000000) == "2000000000"
-    if pytest.PY2:
+    if env.PY2:
         assert m.i32_str(long(-1)) == "-1"  # noqa: F821 undefined name 'long'
         assert m.i64_str(long(-1)) == "-1"  # noqa: F821 undefined name 'long'
         assert m.i64_str(long(-999999999999)) == "-999999999999"  # noqa: F821 undefined name
@@ -219,7 +218,7 @@
         m.i32_str(3000000000)
     assert "incompatible function arguments" in str(excinfo.value)
 
-    if pytest.PY2:
+    if env.PY2:
         with pytest.raises(TypeError) as excinfo:
             m.u32_str(long(-1))  # noqa: F821 undefined name 'long'
         assert "incompatible function arguments" in str(excinfo.value)
@@ -360,9 +359,9 @@
     assert convert(A(False)) is False
 
 
-@pytest.requires_numpy
 def test_numpy_bool():
-    import numpy as np
+    np = pytest.importorskip("numpy")
+
     convert, noconvert = m.bool_passthrough, m.bool_passthrough_noconvert
 
     def cant_convert(v):
diff --git a/tests/test_call_policies.py b/tests/test_call_policies.py
index 0e3230c..ec005c1 100644
--- a/tests/test_call_policies.py
+++ b/tests/test_call_policies.py
@@ -1,9 +1,13 @@
 # -*- coding: utf-8 -*-
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import call_policies as m
 from pybind11_tests import ConstructorStats
 
 
+@pytest.mark.xfail("env.PYPY", reason="sometimes comes out 1 off on PyPy", strict=False)
 def test_keep_alive_argument(capture):
     n_inst = ConstructorStats.detail_reg_inst()
     with capture:
@@ -70,8 +74,8 @@
     """
 
 
-# https://bitbucket.org/pypy/pypy/issues/2447
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2447
+@pytest.mark.xfail("env.PYPY", reason="_PyObject_GetDictPtr is unimplemented")
 def test_alive_gc(capture):
     n_inst = ConstructorStats.detail_reg_inst()
     p = m.ParentGC()
diff --git a/tests/test_class.py b/tests/test_class.py
index bbf8481..4214fe7 100644
--- a/tests/test_class.py
+++ b/tests/test_class.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
 
+import env  # noqa: F401
+
 from pybind11_tests import class_ as m
 from pybind11_tests import UserType, ConstructorStats
 
@@ -261,7 +263,7 @@
     assert b.vec == [123, 456]
 
 
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 def test_class_refcount():
     """Instances must correctly increase/decrease the reference count of their types (#1029)"""
     from sys import getrefcount
@@ -307,8 +309,8 @@
         assert p % 1024 == 0
 
 
-# https://bitbucket.org/pypy/pypy/issues/2742
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2742
+@pytest.mark.xfail("env.PYPY")
 def test_final():
     with pytest.raises(TypeError) as exc_info:
         class PyFinalChild(m.IsFinal):
@@ -316,8 +318,8 @@
     assert str(exc_info.value).endswith("is not an acceptable base type")
 
 
-# https://bitbucket.org/pypy/pypy/issues/2742
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2742
+@pytest.mark.xfail("env.PYPY")
 def test_non_final_final():
     with pytest.raises(TypeError) as exc_info:
         class PyNonFinalFinalChild(m.IsNonFinalFinal):
diff --git a/tests/test_eigen.cpp b/tests/test_eigen.cpp
index aba088d..56aa1a4 100644
--- a/tests/test_eigen.cpp
+++ b/tests/test_eigen.cpp
@@ -87,8 +87,6 @@
     using SparseMatrixR = Eigen::SparseMatrix<float, Eigen::RowMajor>;
     using SparseMatrixC = Eigen::SparseMatrix<float>;
 
-    m.attr("have_eigen") = true;
-
     // various tests
     m.def("double_col", [](const Eigen::VectorXf &x) -> Eigen::VectorXf { return 2.0f * x; });
     m.def("double_row", [](const Eigen::RowVectorXf &x) -> Eigen::RowVectorXf { return 2.0f * x; });
diff --git a/tests/test_eigen.py b/tests/test_eigen.py
index ae868da..ac68471 100644
--- a/tests/test_eigen.py
+++ b/tests/test_eigen.py
@@ -2,17 +2,15 @@
 import pytest
 from pybind11_tests import ConstructorStats
 
-pytestmark = pytest.requires_eigen_and_numpy
+np = pytest.importorskip("numpy")
+m = pytest.importorskip("pybind11_tests.eigen")
 
-with pytest.suppress(ImportError):
-    from pybind11_tests import eigen as m
-    import numpy as np
 
-    ref = np.array([[ 0.,  3,  0,  0,  0, 11],
-                    [22,  0,  0,  0, 17, 11],
-                    [ 7,  5,  0,  1,  0, 11],
-                    [ 0,  0,  0,  0,  0, 11],
-                    [ 0,  0, 14,  0,  8, 11]])
+ref = np.array([[ 0.,  3,  0,  0,  0, 11],
+                [22,  0,  0,  0, 17, 11],
+                [ 7,  5,  0,  1,  0, 11],
+                [ 0,  0,  0,  0,  0, 11],
+                [ 0,  0, 14,  0,  8, 11]])
 
 
 def assert_equal_ref(mat):
@@ -646,8 +644,8 @@
     assert str(excinfo.value) == 'Nonconformable matrices!'
 
 
-@pytest.requires_eigen_and_scipy
 def test_sparse():
+    pytest.importorskip("scipy")
     assert_sparse_equal_ref(m.sparse_r())
     assert_sparse_equal_ref(m.sparse_c())
     assert_sparse_equal_ref(m.sparse_copy_r(m.sparse_r()))
@@ -656,8 +654,8 @@
     assert_sparse_equal_ref(m.sparse_copy_c(m.sparse_r()))
 
 
-@pytest.requires_eigen_and_scipy
 def test_sparse_signature(doc):
+    pytest.importorskip("scipy")
     assert doc(m.sparse_copy_r) == """
         sparse_copy_r(arg0: scipy.sparse.csr_matrix[numpy.float32]) -> scipy.sparse.csr_matrix[numpy.float32]
     """  # noqa: E501 line too long
diff --git a/tests/test_eval.py b/tests/test_eval.py
index 66bec55..b6f9d18 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -1,6 +1,10 @@
 # -*- coding: utf-8 -*-
 import os
+
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import eval_ as m
 
 
@@ -15,7 +19,7 @@
     assert m.test_eval_failure()
 
 
-@pytest.unsupported_on_pypy3
+@pytest.mark.xfail("env.PYPY and not env.PY2", raises=RuntimeError)
 def test_eval_file():
     filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py")
     assert m.test_eval_file(filename)
diff --git a/tests/test_factory_constructors.py b/tests/test_factory_constructors.py
index 49e6f4f..8465c59 100644
--- a/tests/test_factory_constructors.py
+++ b/tests/test_factory_constructors.py
@@ -2,6 +2,8 @@
 import pytest
 import re
 
+import env  # noqa: F401
+
 from pybind11_tests import factory_constructors as m
 from pybind11_tests.factory_constructors import tag
 from pybind11_tests import ConstructorStats
@@ -418,7 +420,7 @@
     """)
 
 
-@pytest.unsupported_on_py2
+@pytest.mark.skipif("env.PY2")
 def test_invalid_self():
     """Tests invocation of the pybind-registered base class with an invalid `self` argument.  You
     can only actually do this on Python 3: Python 2 raises an exception itself if you try."""
diff --git a/tests/test_kwargs_and_defaults.py b/tests/test_kwargs_and_defaults.py
index df354ad..5257e0c 100644
--- a/tests/test_kwargs_and_defaults.py
+++ b/tests/test_kwargs_and_defaults.py
@@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import kwargs_and_defaults as m
 
 
@@ -146,8 +149,7 @@
     """
 
 
-@pytest.mark.xfail(pytest.PYPY and pytest.PY2,
-                   reason="PyPy2 doesn't seem to double count")
+@pytest.mark.xfail("env.PYPY and env.PY2", reason="PyPy2 doesn't double count")
 def test_args_refcount():
     """Issue/PR #1216 - py::args elements get double-inc_ref()ed when combined with regular
     arguments"""
diff --git a/tests/test_local_bindings.py b/tests/test_local_bindings.py
index 913cf0e..5460727 100644
--- a/tests/test_local_bindings.py
+++ b/tests/test_local_bindings.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
 
+import env  # noqa: F401
+
 from pybind11_tests import local_bindings as m
 
 
@@ -153,7 +155,7 @@
     assert m.local_cpp_types_addr() != cm.local_cpp_types_addr()
 
 
-@pytest.bug_in_pypy
+@pytest.mark.xfail("env.PYPY")
 def test_stl_caster_vs_stl_bind(msg):
     """One module uses a generic vector caster from `<pybind11/stl.h>` while the other
     exports `std::vector<int>` via `py:bind_vector` and `py::module_local`"""
diff --git a/tests/test_methods_and_attributes.py b/tests/test_methods_and_attributes.py
index 25a01c7..c296b68 100644
--- a/tests/test_methods_and_attributes.py
+++ b/tests/test_methods_and_attributes.py
@@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import methods_and_attributes as m
 from pybind11_tests import ConstructorStats
 
@@ -257,8 +260,8 @@
     assert os.value == 1
 
 
-# https://bitbucket.org/pypy/pypy/issues/2447
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2447
+@pytest.mark.xfail("env.PYPY")
 def test_dynamic_attributes():
     instance = m.DynamicClass()
     assert not hasattr(instance, "foo")
@@ -299,8 +302,8 @@
         assert cstats.alive() == 0
 
 
-# https://bitbucket.org/pypy/pypy/issues/2447
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2447
+@pytest.mark.xfail("env.PYPY")
 def test_cyclic_gc():
     # One object references itself
     instance = m.DynamicClass()
diff --git a/tests/test_multiple_inheritance.py b/tests/test_multiple_inheritance.py
index bb602f8..7a0259d 100644
--- a/tests/test_multiple_inheritance.py
+++ b/tests/test_multiple_inheritance.py
@@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import ConstructorStats
 from pybind11_tests import multiple_inheritance as m
 
@@ -11,7 +14,8 @@
     assert mt.bar() == 4
 
 
-@pytest.bug_in_pypy
+@pytest.mark.skipif("env.PYPY and env.PY2")
+@pytest.mark.xfail("env.PYPY and not env.PY2")
 def test_multiple_inheritance_mix1():
     class Base1:
         def __init__(self, i):
@@ -32,7 +36,6 @@
 
 
 def test_multiple_inheritance_mix2():
-
     class Base2:
         def __init__(self, i):
             self.i = i
@@ -51,7 +54,8 @@
     assert mt.bar() == 4
 
 
-@pytest.bug_in_pypy
+@pytest.mark.skipif("env.PYPY and env.PY2")
+@pytest.mark.xfail("env.PYPY and not env.PY2")
 def test_multiple_inheritance_python():
 
     class MI1(m.Base1, m.Base2):
@@ -256,7 +260,7 @@
         assert d.static_value == 0
 
 
-@pytest.unsupported_on_pypy_lt_6
+# Requires PyPy 6+
 def test_mi_dynamic_attributes():
     """Mixing bases with and without dynamic attribute support"""
 
diff --git a/tests/test_numpy_array.py b/tests/test_numpy_array.py
index 1b6599d..ad3ca58 100644
--- a/tests/test_numpy_array.py
+++ b/tests/test_numpy_array.py
@@ -1,11 +1,11 @@
 # -*- coding: utf-8 -*-
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import numpy_array as m
 
-pytestmark = pytest.requires_numpy
-
-with pytest.suppress(ImportError):
-    import numpy as np
+np = pytest.importorskip("numpy")
 
 
 def test_dtypes():
@@ -243,7 +243,6 @@
     """
 
 
-@pytest.unsupported_on_pypy
 def test_cast_numpy_int64_to_uint64():
     m.function_taking_uint64(123)
     m.function_taking_uint64(np.uint64(123))
@@ -424,7 +423,7 @@
     assert(b.shape == (8, 8))
 
 
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 def test_array_create_and_resize(msg):
     a = m.create_and_resize(2)
     assert(a.size == 4)
@@ -436,7 +435,7 @@
     assert a.shape == (6,)
 
 
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 def test_dtype_refcount_leak():
     from sys import getrefcount
     dtype = np.dtype(np.float_)
diff --git a/tests/test_numpy_dtypes.py b/tests/test_numpy_dtypes.py
index d173435..417d6f1 100644
--- a/tests/test_numpy_dtypes.py
+++ b/tests/test_numpy_dtypes.py
@@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
 import re
+
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import numpy_dtypes as m
 
-pytestmark = pytest.requires_numpy
-
-with pytest.suppress(ImportError):
-    import numpy as np
+np = pytest.importorskip("numpy")
 
 
 @pytest.fixture(scope='module')
@@ -294,7 +295,7 @@
     assert 'dtype is already registered' in str(excinfo.value)
 
 
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 def test_str_leak():
     from sys import getrefcount
     fmt = "f4"
diff --git a/tests/test_numpy_vectorize.py b/tests/test_numpy_vectorize.py
index bd3c013..54e44cd 100644
--- a/tests/test_numpy_vectorize.py
+++ b/tests/test_numpy_vectorize.py
@@ -2,10 +2,7 @@
 import pytest
 from pybind11_tests import numpy_vectorize as m
 
-pytestmark = pytest.requires_numpy
-
-with pytest.suppress(ImportError):
-    import numpy as np
+np = pytest.importorskip("numpy")
 
 
 def test_vectorize(capture):
diff --git a/tests/test_pickling.py b/tests/test_pickling.py
index 58d67a6..9aee705 100644
--- a/tests/test_pickling.py
+++ b/tests/test_pickling.py
@@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import pickling as m
 
 try:
@@ -22,7 +25,7 @@
     assert p2.extra2() == p.extra2()
 
 
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 @pytest.mark.parametrize("cls_name", ["PickleableWithDict", "PickleableWithDictNew"])
 def test_roundtrip_with_dict(cls_name):
     cls = getattr(m, cls_name)
diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py
index 289b4aa..c21ad61 100644
--- a/tests/test_pytypes.py
+++ b/tests/test_pytypes.py
@@ -3,6 +3,8 @@
 import pytest
 import sys
 
+import env  # noqa: F401
+
 from pybind11_tests import pytypes as m
 from pybind11_tests import debug_enabled
 
@@ -113,7 +115,7 @@
     assert m.bytes_from_str().decode() == "bar"
 
     assert doc(m.bytes_from_str) == "bytes_from_str() -> {}".format(
-        "str" if pytest.PY2 else "bytes"
+        "str" if env.PY2 else "bytes"
     )
 
 
@@ -224,7 +226,7 @@
     # specifically to exercise pybind11::str::raw_str
     cvt = m.convert_to_pybind11_str
     assert cvt(u"Str") == u"Str"
-    assert cvt(b'Bytes') == u"Bytes" if pytest.PY2 else "b'Bytes'"
+    assert cvt(b'Bytes') == u"Bytes" if env.PY2 else "b'Bytes'"
     assert cvt(None) == u"None"
     assert cvt(False) == u"False"
     assert cvt(True) == u"True"
@@ -237,8 +239,8 @@
     assert cvt([28]) == u"[28]"
     assert cvt({}) == u"{}"
     assert cvt({3: 4}) == u"{3: 4}"
-    assert cvt(set()) == u"set([])" if pytest.PY2 else "set()"
-    assert cvt({3, 3}) == u"set([3])" if pytest.PY2 else "{3}"
+    assert cvt(set()) == u"set([])" if env.PY2 else "set()"
+    assert cvt({3, 3}) == u"set([3])" if env.PY2 else "{3}"
 
     valid_orig = u"DZ"
     valid_utf8 = valid_orig.encode("utf-8")
@@ -324,7 +326,7 @@
     view = method(*args)
     assert isinstance(view, memoryview)
     assert view.format == fmt
-    if isinstance(expected_view, bytes) or not pytest.PY2:
+    if isinstance(expected_view, bytes) or not env.PY2:
         view_as_list = list(view)
     else:
         # Using max to pick non-zero byte (big-endian vs little-endian).
@@ -332,9 +334,7 @@
     assert view_as_list == list(expected_view)
 
 
-@pytest.mark.skipif(
-    not hasattr(sys, 'getrefcount'),
-    reason='getrefcount is not available')
+@pytest.mark.xfail("env.PYPY", reason="getrefcount is not available")
 @pytest.mark.parametrize('method', [
     m.test_memoryview_object,
     m.test_memoryview_buffer_info,
@@ -352,7 +352,7 @@
     view = m.test_memoryview_from_buffer_empty_shape()
     assert isinstance(view, memoryview)
     assert view.format == 'B'
-    if pytest.PY2:
+    if env.PY2:
         # Python 2 behavior is weird, but Python 3 (the future) is fine.
         # PyPy3 has <memoryview, while CPython 2 has <memory
         assert bytes(view).startswith(b'<memory')
@@ -366,14 +366,14 @@
 
 
 def test_test_memoryview_from_buffer_nullptr():
-    if pytest.PY2:
+    if env.PY2:
         m.test_memoryview_from_buffer_nullptr()
     else:
         with pytest.raises(ValueError):
             m.test_memoryview_from_buffer_nullptr()
 
 
-@pytest.unsupported_on_py2
+@pytest.mark.skipif("env.PY2")
 def test_memoryview_from_memory():
     view = m.test_memoryview_from_memory()
     assert isinstance(view, memoryview)
diff --git a/tests/test_stl_binders.py b/tests/test_stl_binders.py
index 4cb4591..f9b8ea4 100644
--- a/tests/test_stl_binders.py
+++ b/tests/test_stl_binders.py
@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
 import pytest
-from pybind11_tests import stl_binders as m
 
-with pytest.suppress(ImportError):
-    import numpy as np
+import env  # noqa: F401
+
+from pybind11_tests import stl_binders as m
 
 
 def test_vector_int():
@@ -68,15 +68,14 @@
     assert len(v_int2) == 0
 
 
-# related to the PyPy's buffer protocol.
-@pytest.unsupported_on_pypy
+# Older PyPy's failed here, related to the PyPy's buffer protocol.
 def test_vector_buffer():
     b = bytearray([1, 2, 3, 4])
     v = m.VectorUChar(b)
     assert v[1] == 2
     v[2] = 5
     mv = memoryview(v)  # We expose the buffer interface
-    if not pytest.PY2:
+    if not env.PY2:
         assert mv[2] == 5
         mv[2] = 6
     else:
@@ -84,7 +83,7 @@
         mv[2] = '\x06'
     assert v[2] == 6
 
-    if not pytest.PY2:
+    if not env.PY2:
         mv = memoryview(b)
         v = m.VectorUChar(mv[::2])
         assert v[1] == 3
@@ -94,9 +93,8 @@
     assert "NumPy type info missing for " in str(excinfo.value)
 
 
-@pytest.unsupported_on_pypy
-@pytest.requires_numpy
 def test_vector_buffer_numpy():
+    np = pytest.importorskip("numpy")
     a = np.array([1, 2, 3, 4], dtype=np.int32)
     with pytest.raises(TypeError):
         m.VectorInt(a)
diff --git a/tests/test_virtual_functions.py b/tests/test_virtual_functions.py
index 0f2d85f..b7bd5ba 100644
--- a/tests/test_virtual_functions.py
+++ b/tests/test_virtual_functions.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
 
+import env  # noqa: F401
+
 from pybind11_tests import virtual_functions as m
 from pybind11_tests import ConstructorStats
 
@@ -160,7 +162,7 @@
 
 # PyPy: Reference count > 1 causes call with noncopyable instance
 # to fail in ncv1.print_nc()
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 @pytest.mark.skipif(not hasattr(m, "NCVirt"), reason="NCVirt test broken on ICPC")
 def test_move_support():
     class NCVirtExt(m.NCVirt):