tests: cleanup and ci hardening (#2397)

* tests: refactor and cleanup

* refactor: more consistent

* tests: vendor six

* tests: more xfails, nicer system

* tests: simplify to info

* tests: suggestions from @YannickJadoul and @bstaletic

* tests: restore some pypy tests that now pass

* tests: rename info to env

* tests: strict False/True

* tests: drop explicit strict=True again

* tests: reduce minimum PyTest to 3.1
diff --git a/tests/test_class.py b/tests/test_class.py
index bbf8481..4214fe7 100644
--- a/tests/test_class.py
+++ b/tests/test_class.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 import pytest
 
+import env  # noqa: F401
+
 from pybind11_tests import class_ as m
 from pybind11_tests import UserType, ConstructorStats
 
@@ -261,7 +263,7 @@
     assert b.vec == [123, 456]
 
 
-@pytest.unsupported_on_pypy
+@pytest.mark.xfail("env.PYPY")
 def test_class_refcount():
     """Instances must correctly increase/decrease the reference count of their types (#1029)"""
     from sys import getrefcount
@@ -307,8 +309,8 @@
         assert p % 1024 == 0
 
 
-# https://bitbucket.org/pypy/pypy/issues/2742
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2742
+@pytest.mark.xfail("env.PYPY")
 def test_final():
     with pytest.raises(TypeError) as exc_info:
         class PyFinalChild(m.IsFinal):
@@ -316,8 +318,8 @@
     assert str(exc_info.value).endswith("is not an acceptable base type")
 
 
-# https://bitbucket.org/pypy/pypy/issues/2742
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2742
+@pytest.mark.xfail("env.PYPY")
 def test_non_final_final():
     with pytest.raises(TypeError) as exc_info:
         class PyNonFinalFinalChild(m.IsNonFinalFinal):