ci: harden chrono test, mark another macos 4.9 dev failure (#2448)

* ci: harden chrono test, mark another macos 4.9 dev failure

This should help with a little of the flakiness seen with the timing test

* Update tests/test_chrono.py

* Can also fail
diff --git a/tests/test_gil_scoped.py b/tests/test_gil_scoped.py
index c85eb7c..27122cc 100644
--- a/tests/test_gil_scoped.py
+++ b/tests/test_gil_scoped.py
@@ -54,6 +54,8 @@
         thread.join()
 
 
+# TODO: FIXME, sometimes returns -11 instead of 0
+@pytest.mark.xfail("env.PY > (3,8) and env.MACOS", strict=False)
 def test_python_to_cpp_to_python_from_thread():
     """Makes sure there is no GIL deadlock when running in a thread.
 
@@ -72,6 +74,8 @@
     assert _run_in_process(_python_to_cpp_to_python_from_threads, 8, parallel=True) == 0
 
 
+# TODO: FIXME
+@pytest.mark.xfail("env.PY > (3,8) and env.MACOS", strict=False)
 def test_python_to_cpp_to_python_from_thread_multiple_sequential():
     """Makes sure there is no GIL deadlock when running in a thread multiple times sequentially.