tests: cleanup and ci hardening (#2397)

* tests: refactor and cleanup

* refactor: more consistent

* tests: vendor six

* tests: more xfails, nicer system

* tests: simplify to info

* tests: suggestions from @YannickJadoul and @bstaletic

* tests: restore some pypy tests that now pass

* tests: rename info to env

* tests: strict False/True

* tests: drop explicit strict=True again

* tests: reduce minimum PyTest to 3.1
diff --git a/tests/test_pickling.py b/tests/test_pickling.py
index 58d67a6..9aee705 100644
--- a/tests/test_pickling.py
+++ b/tests/test_pickling.py
@@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import pickling as m
 
 try:
@@ -22,7 +25,7 @@
     assert p2.extra2() == p.extra2()
 
 
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 @pytest.mark.parametrize("cls_name", ["PickleableWithDict", "PickleableWithDictNew"])
 def test_roundtrip_with_dict(cls_name):
     cls = getattr(m, cls_name)