tests: cleanup and ci hardening (#2397)

* tests: refactor and cleanup

* refactor: more consistent

* tests: vendor six

* tests: more xfails, nicer system

* tests: simplify to info

* tests: suggestions from @YannickJadoul and @bstaletic

* tests: restore some pypy tests that now pass

* tests: rename info to env

* tests: strict False/True

* tests: drop explicit strict=True again

* tests: reduce minimum PyTest to 3.1
diff --git a/tests/test_numpy_array.py b/tests/test_numpy_array.py
index 1b6599d..ad3ca58 100644
--- a/tests/test_numpy_array.py
+++ b/tests/test_numpy_array.py
@@ -1,11 +1,11 @@
 # -*- coding: utf-8 -*-
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import numpy_array as m
 
-pytestmark = pytest.requires_numpy
-
-with pytest.suppress(ImportError):
-    import numpy as np
+np = pytest.importorskip("numpy")
 
 
 def test_dtypes():
@@ -243,7 +243,6 @@
     """
 
 
-@pytest.unsupported_on_pypy
 def test_cast_numpy_int64_to_uint64():
     m.function_taking_uint64(123)
     m.function_taking_uint64(np.uint64(123))
@@ -424,7 +423,7 @@
     assert(b.shape == (8, 8))
 
 
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 def test_array_create_and_resize(msg):
     a = m.create_and_resize(2)
     assert(a.size == 4)
@@ -436,7 +435,7 @@
     assert a.shape == (6,)
 
 
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 def test_dtype_refcount_leak():
     from sys import getrefcount
     dtype = np.dtype(np.float_)