tests: cleanup and ci hardening (#2397)

* tests: refactor and cleanup

* refactor: more consistent

* tests: vendor six

* tests: more xfails, nicer system

* tests: simplify to info

* tests: suggestions from @YannickJadoul and @bstaletic

* tests: restore some pypy tests that now pass

* tests: rename info to env

* tests: strict False/True

* tests: drop explicit strict=True again

* tests: reduce minimum PyTest to 3.1
diff --git a/tests/test_local_bindings.py b/tests/test_local_bindings.py
index 913cf0e..5460727 100644
--- a/tests/test_local_bindings.py
+++ b/tests/test_local_bindings.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
 
+import env  # noqa: F401
+
 from pybind11_tests import local_bindings as m
 
 
@@ -153,7 +155,7 @@
     assert m.local_cpp_types_addr() != cm.local_cpp_types_addr()
 
 
-@pytest.bug_in_pypy
+@pytest.mark.xfail("env.PYPY")
 def test_stl_caster_vs_stl_bind(msg):
     """One module uses a generic vector caster from `<pybind11/stl.h>` while the other
     exports `std::vector<int>` via `py:bind_vector` and `py::module_local`"""