bpo-36842: Implement PEP 578 (GH-12613)

Adds sys.audit, sys.addaudithook, io.open_code, and associated C APIs.
diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst
index 27d3f76..fd3f691 100644
--- a/Doc/c-api/code.rst
+++ b/Doc/c-api/code.rst
@@ -40,6 +40,7 @@
    :c:func:`PyCode_New` directly can bind you to a precise Python
    version since the definition of the bytecode changes often.
 
+   .. audit-event:: code.__new__ "code filename name argcount kwonlyargcount nlocals stacksize flags"
 
 .. c:function:: PyCodeObject* PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno)
 
diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst
index defc859..543dc60 100644
--- a/Doc/c-api/file.rst
+++ b/Doc/c-api/file.rst
@@ -60,6 +60,32 @@
    raised if the end of the file is reached immediately.
 
 
+.. c:function:: int PyFile_SetOpenCodeHook(Py_OpenCodeHookFunction handler)
+
+   Overrides the normal behavior of :func:`io.open_code` to pass its parameter
+   through the provided handler.
+
+   The handler is a function of type :c:type:`PyObject *(\*)(PyObject *path,
+   void *userData)`, where *path* is guaranteed to be :c:type:`PyUnicodeObject`.
+
+   The *userData* pointer is passed into the hook function. Since hook
+   functions may be called from different runtimes, this pointer should not
+   refer directly to Python state.
+
+   As this hook is intentionally used during import, avoid importing new modules
+   during its execution unless they are known to be frozen or available in
+   ``sys.modules``.
+
+   Once a hook has been set, it cannot be removed or replaced, and later calls to
+   :c:func:`PyFile_SetOpenCodeHook` will fail. On failure, the function returns
+   -1 and sets an exception if the interpreter has been initialized.
+
+   This function is safe to call before :c:func:`Py_Initialize`.
+
+   .. versionadded:: 3.8
+
+
+
 .. c:function:: int PyFile_WriteObject(PyObject *obj, PyObject *p, int flags)
 
    .. index:: single: Py_PRINT_RAW
diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst
index 04e169a..2091da6 100644
--- a/Doc/c-api/sys.rst
+++ b/Doc/c-api/sys.rst
@@ -289,6 +289,56 @@
    .. versionadded:: 3.2
 
 
+.. c:function:: int PySys_Audit(const char *event, const char *format, ...)
+
+   .. index:: single: audit events
+
+   Raises an auditing event with any active hooks. Returns zero for success
+   and non-zero with an exception set on failure.
+
+   If any hooks have been added, *format* and other arguments will be used
+   to construct a tuple to pass. Apart from ``N``, the same format characters
+   as used in :c:func:`Py_BuildValue` are available. If the built value is not
+   a tuple, it will be added into a single-element tuple. (The ``N`` format
+   option consumes a reference, but since there is no way to know whether
+   arguments to this function will be consumed, using it may cause reference
+   leaks.)
+
+   :func:`sys.audit` performs the same function from Python code.
+
+   .. versionadded:: 3.8
+
+
+.. c:function:: int PySys_AddAuditHook(Py_AuditHookFunction hook, void *userData)
+
+   .. index:: single: audit events
+
+   Adds to the collection of active auditing hooks. Returns zero for success
+   and non-zero on failure. If the runtime has been initialized, also sets an
+   error on failure. Hooks added through this API are called for all
+   interpreters created by the runtime.
+
+   This function is safe to call before :c:func:`Py_Initialize`. When called
+   after runtime initialization, existing audit hooks are notified and may
+   silently abort the operation by raising an error subclassed from
+   :class:`Exception` (other errors will not be silenced).
+
+   The hook function is of type :c:type:`int (*)(const char *event, PyObject
+   *args, void *userData)`, where *args* is guaranteed to be a
+   :c:type:`PyTupleObject`. The hook function is always called with the GIL
+   held by the Python interpreter that raised the event.
+
+   The *userData* pointer is passed into the hook function. Since hook
+   functions may be called from different runtimes, this pointer should not
+   refer directly to Python state.
+
+   See :pep:`578` for a detailed decription of auditing. Functions in the
+   runtime and standard library that raise events include the details in each
+   function's documentation.
+
+   .. versionadded:: 3.8
+
+
 .. _processcontrol:
 
 Process Control
diff --git a/Doc/howto/instrumentation.rst b/Doc/howto/instrumentation.rst
index 50cde35..909deb5 100644
--- a/Doc/howto/instrumentation.rst
+++ b/Doc/howto/instrumentation.rst
@@ -332,6 +332,15 @@
    .. versionadded:: 3.7
 
 
+.. c:function:: audit(str event, void *tuple)
+
+   Fires when :func:`sys.audit` or :c:func:`PySys_Audit` is called.
+   ``arg0`` is the event name as C string, ``arg1`` is a :c:type:`PyObject`
+   pointer to a tuple object.
+
+   .. versionadded:: 3.8
+
+
 SystemTap Tapsets
 -----------------
 
diff --git a/Doc/library/array.rst b/Doc/library/array.rst
index 4ac7bb5..1f95dd6 100644
--- a/Doc/library/array.rst
+++ b/Doc/library/array.rst
@@ -83,6 +83,7 @@
    to add initial items to the array.  Otherwise, the iterable initializer is
    passed to the :meth:`extend` method.
 
+   .. audit-event:: array.__new__ "typecode initializer"
 
 .. data:: typecodes
 
diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst
index 1c60b4b..97172c5 100644
--- a/Doc/library/ctypes.rst
+++ b/Doc/library/ctypes.rst
@@ -1509,6 +1509,17 @@
    :c:type:`int`, which is of course not always the truth, so you have to assign
    the correct :attr:`restype` attribute to use these functions.
 
+.. audit-event:: ctypes.dlopen name
+
+   Loading a library through any of these objects raises an
+   :ref:`auditing event <auditing>` ``ctypes.dlopen`` with string argument
+   ``name``, the name used to load the library.
+
+.. audit-event:: ctypes.dlsym "library name"
+
+   Accessing a function on a loaded library raises an auditing event
+   ``ctypes.dlsym`` with arguments ``library`` (the library object) and ``name``
+   (the symbol's name as a string or integer).
 
 .. _ctypes-foreign-functions:
 
@@ -2032,6 +2043,12 @@
       This method returns a ctypes type instance using the memory specified by
       *address* which must be an integer.
 
+      .. audit-event:: ctypes.cdata address
+
+         This method, and others that indirectly call this method, raises an
+         :func:`auditing event <sys.audit>` ``ctypes.cdata`` with argument
+         ``address``.
+
    .. method:: from_param(obj)
 
       This method adapts *obj* to a ctypes type.  It is called with the actual
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index 1a9a8b5..7170a78 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -275,6 +275,12 @@
    If you want to parse Python code into its AST representation, see
    :func:`ast.parse`.
 
+   .. audit-event:: compile "source filename"
+
+      Raises an :func:`auditing event <sys.audit>` ``compile`` with arguments
+      ``source`` and ``filename``. This event may also be raised by implicit
+      compilation.
+
    .. note::
 
       When compiling a string with multi-line code in ``'single'`` or
@@ -473,6 +479,11 @@
    See :func:`ast.literal_eval` for a function that can safely evaluate strings
    with expressions containing only literals.
 
+   .. audit-event:: exec code_object
+
+      Raises an :func:`auditing event <sys.audit>` ``exec`` with the code object as
+      the argument. Code compilation events may also be raised.
+
 .. index:: builtin: exec
 
 .. function:: exec(object[, globals[, locals]])
@@ -502,6 +513,11 @@
    builtins are available to the executed code by inserting your own
    ``__builtins__`` dictionary into *globals* before passing it to :func:`exec`.
 
+   .. audit-event:: exec code_object
+
+      Raises an :func:`auditing event <sys.audit>` ``exec`` with the code object as
+      the argument. Code compilation events may also be raised.
+
    .. note::
 
       The built-in functions :func:`globals` and :func:`locals` return the current
@@ -747,6 +763,16 @@
    If the :mod:`readline` module was loaded, then :func:`input` will use it
    to provide elaborate line editing and history features.
 
+   .. audit-event:: builtins.input prompt
+
+      Raises an :func:`auditing event <sys.audit>` ``builtins.input`` with
+      argument ``prompt`` before reading input
+
+   .. audit-event:: builtins.input/result result
+
+      Raises an auditing event ``builtins.input/result`` with the result after
+      successfully reading input.
+
 
 .. class:: int([x])
            int(x, base=10)
@@ -1176,6 +1202,11 @@
    (where :func:`open` is declared), :mod:`os`, :mod:`os.path`, :mod:`tempfile`,
    and :mod:`shutil`.
 
+   .. audit-event:: open "file mode flags"
+
+   The ``mode`` and ``flags`` arguments may have been modified or inferred from
+   the original call.
+
    .. versionchanged::
       3.3
 
diff --git a/Doc/library/io.rst b/Doc/library/io.rst
index 0f12516..2fb27c3 100644
--- a/Doc/library/io.rst
+++ b/Doc/library/io.rst
@@ -120,6 +120,27 @@
 
    This is an alias for the builtin :func:`open` function.
 
+   .. audit-event:: open "path mode flags"
+
+      This function raises an :func:`auditing event <sys.audit>` ``open`` with
+      arguments ``path``, ``mode`` and ``flags``. The ``mode`` and ``flags``
+      arguments may have been modified or inferred from the original call.
+
+
+.. function:: open_code(path)
+
+   Opens the provided file with mode ``'rb'``. This function should be used
+   when the intent is to treat the contents as executable code.
+
+   ``path`` should be an absolute path.
+
+   The behavior of this function may be overridden by an earlier call to the
+   :c:func:`PyFile_SetOpenCodeHook`, however, it should always be considered
+   interchangeable with ``open(path, 'rb')``. Overriding the behavior is
+   intended for additional validation or preprocessing of the file.
+
+   .. versionadded:: 3.8
+
 
 .. exception:: BlockingIOError
 
diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst
index 0f895d7..a82caf8 100644
--- a/Doc/library/mmap.rst
+++ b/Doc/library/mmap.rst
@@ -67,6 +67,7 @@
    will be relative to the offset from the beginning of the file. *offset*
    defaults to 0.  *offset* must be a multiple of the :const:`ALLOCATIONGRANULARITY`.
 
+   .. audit-event:: mmap.__new__ "fileno length access offset"
 
 .. class:: mmap(fileno, length, flags=MAP_SHARED, prot=PROT_WRITE|PROT_READ, access=ACCESS_DEFAULT[, offset])
    :noindex:
@@ -155,6 +156,7 @@
 
           mm.close()
 
+   .. audit-event:: mmap.__new__ "fileno length access offset"
 
    Memory-mapped file objects support the following methods:
 
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index 0bbfce9..6df2b49 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -651,7 +651,7 @@
 File Object Creation
 --------------------
 
-This function creates new :term:`file objects <file object>`.  (See also
+These functions create new :term:`file objects <file object>`.  (See also
 :func:`~os.open` for opening file descriptors.)
 
 
@@ -829,11 +829,14 @@
    most *length* bytes in size.  As of Python 3.3, this is equivalent to
    ``os.truncate(fd, length)``.
 
+   .. audit-event:: os.truncate "fd length"
+
    .. availability:: Unix, Windows.
 
    .. versionchanged:: 3.5
       Added support for Windows
 
+
 .. function:: get_blocking(fd)
 
    Get the blocking mode of the file descriptor: ``False`` if the
@@ -845,6 +848,7 @@
 
    .. versionadded:: 3.5
 
+
 .. function:: isatty(fd)
 
    Return ``True`` if the file descriptor *fd* is open and connected to a
@@ -912,6 +916,8 @@
    This function can support :ref:`paths relative to directory descriptors
    <dir_fd>` with the *dir_fd* parameter.
 
+   .. audit-event:: open "path mode flags"
+
    .. versionchanged:: 3.4
       The new file descriptor is now non-inheritable.
 
@@ -2756,6 +2762,8 @@
 
    This function can support :ref:`specifying a file descriptor <path_fd>`.
 
+   .. audit-event:: os.truncate "path length"
+
    .. availability:: Unix, Windows.
 
    .. versionadded:: 3.3
@@ -3715,6 +3723,8 @@
    to using this function.  See the :ref:`subprocess-replacements` section in
    the :mod:`subprocess` documentation for some helpful recipes.
 
+   .. audit-event:: os.system command
+
    .. availability:: Unix, Windows.
 
 
diff --git a/Doc/library/pickle.rst b/Doc/library/pickle.rst
index 27721e6..f4c41ac 100644
--- a/Doc/library/pickle.rst
+++ b/Doc/library/pickle.rst
@@ -427,6 +427,7 @@
       how they can be loaded, potentially reducing security risks. Refer to
       :ref:`pickle-restrict` for details.
 
+      .. audit-event:: pickle.find_class "module name"
 
 .. _pickle-picklable:
 
diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst
index 379633a..e23a4f5 100644
--- a/Doc/library/socket.rst
+++ b/Doc/library/socket.rst
@@ -526,6 +526,8 @@
 
    The newly created socket is :ref:`non-inheritable <fd_inheritance>`.
 
+   .. audit-event:: socket.__new__ "self family type protocol"
+
    .. versionchanged:: 3.3
       The AF_CAN family was added.
       The AF_RDS family was added.
@@ -718,6 +720,8 @@
    :const:`AF_INET6`), and is meant to be passed to the :meth:`socket.connect`
    method.
 
+   .. audit-event:: socket.getaddrinfo "host port family type protocol"
+
    The following example fetches address information for a hypothetical TCP
    connection to ``example.org`` on port 80 (results may differ on your
    system if IPv6 isn't enabled)::
@@ -753,6 +757,8 @@
    interface. :func:`gethostbyname` does not support IPv6 name resolution, and
    :func:`getaddrinfo` should be used instead for IPv4/v6 dual stack support.
 
+   .. audit-event:: socket.gethostbyname hostname
+
 
 .. function:: gethostbyname_ex(hostname)
 
@@ -765,12 +771,16 @@
    resolution, and :func:`getaddrinfo` should be used instead for IPv4/v6 dual
    stack support.
 
+   .. audit-event:: socket.gethostbyname hostname
+
 
 .. function:: gethostname()
 
    Return a string containing the hostname of the machine where  the Python
    interpreter is currently executing.
 
+   .. audit-event:: socket.gethostname
+
    Note: :func:`gethostname` doesn't always return the fully qualified domain
    name; use :func:`getfqdn` for that.
 
@@ -785,6 +795,8 @@
    domain name, use the function :func:`getfqdn`. :func:`gethostbyaddr` supports
    both IPv4 and IPv6.
 
+   .. audit-event:: socket.gethostbyaddr ip_address
+
 
 .. function:: getnameinfo(sockaddr, flags)
 
@@ -798,6 +810,8 @@
 
    For more information about *flags* you can consult :manpage:`getnameinfo(3)`.
 
+   .. audit-event:: socket.getnameinfo sockaddr
+
 .. function:: getprotobyname(protocolname)
 
    Translate an Internet protocol name (for example, ``'icmp'``) to a constant
@@ -813,6 +827,8 @@
    service.  The optional protocol name, if given, should be ``'tcp'`` or
    ``'udp'``, otherwise any protocol will match.
 
+   .. audit-event:: socket.getservbyname "servicename protocolname"
+
 
 .. function:: getservbyport(port[, protocolname])
 
@@ -820,6 +836,8 @@
    service.  The optional protocol name, if given, should be ``'tcp'`` or
    ``'udp'``, otherwise any protocol will match.
 
+   .. audit-event:: socket.getservbyport "port protocolname"
+
 
 .. function:: ntohl(x)
 
@@ -1003,6 +1021,8 @@
    Set the machine's hostname to *name*.  This will raise an
    :exc:`OSError` if you don't have enough rights.
 
+   .. audit-event:: socket.sethostname name
+
    .. availability:: Unix.
 
    .. versionadded:: 3.3
@@ -1078,6 +1098,7 @@
    Bind the socket to *address*.  The socket must not already be bound. (The format
    of *address* depends on the address family --- see above.)
 
+   .. audit-event:: socket.bind "self address"
 
 .. method:: socket.close()
 
@@ -1115,6 +1136,8 @@
    :exc:`InterruptedError` exception if the connection is interrupted by a
    signal (or the exception raised by the signal handler).
 
+   .. audit-event:: socket.connect "self address"
+
    .. versionchanged:: 3.5
       The method now waits until the connection completes instead of raising an
       :exc:`InterruptedError` exception if the connection is interrupted by a
@@ -1131,6 +1154,7 @@
    :c:data:`errno` variable.  This is useful to support, for example, asynchronous
    connects.
 
+   .. audit-event:: socket.connect "self address"
 
 .. method:: socket.detach()
 
@@ -1472,6 +1496,8 @@
    bytes sent. (The format of *address* depends on the address family --- see
    above.)
 
+   .. audit-event:: socket.sendto "self address"
+
    .. versionchanged:: 3.5
       If the system call is interrupted and the signal handler does not raise
       an exception, the method now retries the system call instead of raising
@@ -1511,6 +1537,8 @@
 
    .. availability:: most Unix platforms, possibly others.
 
+   .. audit-event:: socket.sendmsg "self address"
+
    .. versionadded:: 3.3
 
    .. versionchanged:: 3.5
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index 3b754bd..0294f74 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -19,6 +19,30 @@
    .. versionadded:: 3.2
 
 
+.. function:: addaudithook(hook)
+
+   Adds the callable *hook* to the collection of active auditing hooks for the
+   current interpreter.
+
+   When an auditing event is raised through the :func:`sys.audit` function, each
+   hook will be called in the order it was added with the event name and the
+   tuple of arguments. Native hooks added by :c:func:`PySys_AddAuditHook` are
+   called first, followed by hooks added in the current interpreter.
+
+   Calling this function will trigger an event for all existing hooks, and if
+   any raise an exception derived from :class:`Exception`, the add will be
+   silently ignored. As a result, callers cannot assume that their hook has been
+   added unless they control all existing hooks.
+
+   .. versionadded:: 3.8
+
+   .. impl-detail::
+
+      When tracing is enabled, Python hooks are only traced if the callable has
+      a ``__cantrace__`` member that is set to a true value. Otherwise, trace
+      functions will not see the hook.
+
+
 .. data:: argv
 
    The list of command line arguments passed to a Python script. ``argv[0]`` is the
@@ -37,6 +61,30 @@
       ``[os.fsencode(arg) for arg in sys.argv]``.
 
 
+.. _auditing:
+
+.. function:: audit(event, *args)
+
+   .. index:: single: auditing
+
+   Raises an auditing event with any active hooks. The event name is a string
+   identifying the event and its associated schema, which is the number and
+   types of arguments. The schema for a given event is considered public and
+   stable API and should not be modified between releases.
+
+   This function will raise the first exception raised by any hook. In general,
+   these errors should not be handled and should terminate the process as
+   quickly as possible.
+
+   Hooks are added using the :func:`sys.addaudithook` or
+   :c:func:`PySys_AddAuditHook` functions.
+
+   The native equivalent of this function is :c:func:`PySys_Audit`. Using the
+   native function is preferred when possible.
+
+   .. versionadded:: 3.8
+
+
 .. data:: base_exec_prefix
 
    Set during Python startup, before ``site.py`` is run, to the same value as
@@ -114,6 +162,8 @@
 
    This function should be used for internal and specialized purposes only.
 
+   .. audit-event:: sys._current_frames
+
 
 .. function:: breakpointhook()
 
@@ -617,6 +667,8 @@
    that is deeper than the call stack, :exc:`ValueError` is raised.  The default
    for *depth* is zero, returning the frame at the top of the call stack.
 
+   .. audit-event:: sys._getframe
+
    .. impl-detail::
 
       This function should be used for internal and specialized purposes only.
@@ -1146,6 +1198,8 @@
    ``'return'``, ``'c_call'``, ``'c_return'``, or ``'c_exception'``. *arg* depends
    on the event type.
 
+   .. audit-event:: sys.setprofile
+
    The events have the following meaning:
 
    ``'call'``
@@ -1266,6 +1320,8 @@
 
    For more information on code and frame objects, refer to :ref:`types`.
 
+   .. audit-event:: sys.settrace
+
    .. impl-detail::
 
       The :func:`settrace` function is intended only for implementing debuggers,
@@ -1286,6 +1342,13 @@
    first time. The *finalizer* will be called when an asynchronous generator
    is about to be garbage collected.
 
+   .. audit-event:: sys.set_asyncgen_hooks_firstiter
+
+   .. audit-event:: sys.set_asyncgen_hooks_finalizer
+
+   Two auditing events are raised because the underlying API consists of two
+   calls, each of which must raise its own event.
+
    .. versionadded:: 3.6
       See :pep:`525` for more details, and for a reference example of a
       *finalizer* method see the implementation of
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index 14fa27b..1895ae7 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -95,6 +95,12 @@
    parameter to ``urllib.urlopen``, can be obtained by using
    :class:`ProxyHandler` objects.
 
+   .. audit-event:: urllib.request "fullurl data headers method"
+
+      The default opener raises an :func:`auditing event <sys.audit>`
+      ``urllib.request`` with arguments ``fullurl``, ``data``, ``headers``,
+      ``method`` taken from the request object.
+
    .. versionchanged:: 3.2
       *cafile* and *capath* were added.
 
@@ -118,6 +124,7 @@
        :func:`ssl.create_default_context` select the system's trusted CA
        certificates for you.
 
+
 .. function:: install_opener(opener)
 
    Install an :class:`OpenerDirector` instance as the default global opener.
diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py
index e097c13..f79b250 100644
--- a/Doc/tools/extensions/pyspecific.py
+++ b/Doc/tools/extensions/pyspecific.py
@@ -151,6 +151,45 @@
         return [pnode]
 
 
+# Support for documenting audit event
+
+class AuditEvent(Directive):
+
+    has_content = True
+    required_arguments = 1
+    optional_arguments = 1
+    final_argument_whitespace = True
+
+    _label = [
+        "Raises an :ref:`auditing event <auditing>` {name} with no arguments.",
+        "Raises an :ref:`auditing event <auditing>` {name} with argument {args}.",
+        "Raises an :ref:`auditing event <auditing>` {name} with arguments {args}.",
+    ]
+
+    def run(self):
+        if len(self.arguments) >= 2 and self.arguments[1]:
+            args = [
+                "``{}``".format(a.strip())
+                for a in self.arguments[1].strip("'\"").split()
+                if a.strip()
+            ]
+        else:
+            args = []
+
+        label = translators['sphinx'].gettext(self._label[min(2, len(args))])
+        text = label.format(name="``{}``".format(self.arguments[0]),
+                            args=", ".join(args))
+
+        pnode = nodes.paragraph(text, classes=["audit-hook"])
+        if self.content:
+            self.state.nested_parse(self.content, self.content_offset, pnode)
+        else:
+            n, m = self.state.inline_text(text, self.lineno)
+            pnode.extend(n + m)
+
+        return [pnode]
+
+
 # Support for documenting decorators
 
 class PyDecoratorMixin(object):
@@ -424,6 +463,7 @@
     app.add_role('source', source_role)
     app.add_directive('impl-detail', ImplementationDetail)
     app.add_directive('availability', Availability)
+    app.add_directive('audit-event', AuditEvent)
     app.add_directive('deprecated-removed', DeprecatedRemoved)
     app.add_builder(PydocTopicsBuilder)
     app.add_builder(suspicious.CheckSuspiciousMarkupBuilder)