tests: cleanup and ci hardening (#2397)

* tests: refactor and cleanup

* refactor: more consistent

* tests: vendor six

* tests: more xfails, nicer system

* tests: simplify to info

* tests: suggestions from @YannickJadoul and @bstaletic

* tests: restore some pypy tests that now pass

* tests: rename info to env

* tests: strict False/True

* tests: drop explicit strict=True again

* tests: reduce minimum PyTest to 3.1
diff --git a/tests/test_eval.py b/tests/test_eval.py
index 66bec55..b6f9d18 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -1,6 +1,10 @@
 # -*- coding: utf-8 -*-
 import os
+
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import eval_ as m
 
 
@@ -15,7 +19,7 @@
     assert m.test_eval_failure()
 
 
-@pytest.unsupported_on_pypy3
+@pytest.mark.xfail("env.PYPY and not env.PY2", raises=RuntimeError)
 def test_eval_file():
     filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py")
     assert m.test_eval_file(filename)