diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst
index 7638798ca2552f..a4695547921faa 100644
--- a/Doc/library/argparse.rst
+++ b/Doc/library/argparse.rst
@@ -801,7 +801,8 @@ Only actions that consume command-line arguments (e.g. ``'store'``,
 
 The recommended way to create a custom action is to extend :class:`Action`,
 overriding the :meth:`!__call__` method and optionally the :meth:`!__init__` and
-:meth:`!format_usage` methods.
+:meth:`!format_usage` methods. You can also register custom actions using the
+:meth:`~ArgumentParser.register` method and reference them by their registered name.
 
 An example of a custom action::
 
@@ -1020,10 +1021,11 @@ necessary type-checking and type conversions to be performed.
 If the type_ keyword is used with the default_ keyword, the type converter
 is only applied if the default is a string.
 
-The argument to ``type`` can be any callable that accepts a single string.
+The argument to ``type`` can be a callable that accepts a single string or
+the name of a registered type (see :meth:`~ArgumentParser.register`)
 If the function raises :exc:`ArgumentTypeError`, :exc:`TypeError`, or
 :exc:`ValueError`, the exception is caught and a nicely formatted error
-message is displayed.  No other exception types are handled.
+message is displayed. Other exception types are not handled.
 
 Common built-in types and functions can be used as type converters:
 
@@ -2163,6 +2165,34 @@ Intermixed parsing
    .. versionadded:: 3.7
 
 
+Registering custom types or actions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. method:: ArgumentParser.register(registry_name, value, object)
+
+   Sometimes it's desirable to use a custom string in error messages to provide
+   more user-friendly output. In these cases, :meth:`!register` can be used to
+   register custom actions or types with a parser and allow you to reference the
+   type by their registered name instead of their callable name.
+
+   The :meth:`!register` method accepts three arguments - a *registry_name*,
+   specifying the internal registry where the object will be stored (e.g.,
+   ``action``, ``type``), *value*, which is the key under which the object will
+   be registered, and object, the callable to be registered.
+
+   The following example shows how to register a custom type with a parser::
+
+      >>> import argparse
+      >>> parser = argparse.ArgumentParser()
+      >>> parser.register('type', 'hexadecimal integer', lambda s: int(s, 16))
+      >>> parser.add_argument('--foo', type='hexadecimal integer')
+      _StoreAction(option_strings=['--foo'], dest='foo', nargs=None, const=None, default=None, type='hexadecimal integer', choices=None, required=False, help=None, metavar=None, deprecated=False)
+      >>> parser.parse_args(['--foo', '0xFA'])
+      Namespace(foo=250)
+      >>> parser.parse_args(['--foo', '1.2'])
+      usage: PROG [-h] [--foo FOO]
+      PROG: error: argument --foo: invalid 'hexadecimal integer' value: '1.2'
+
 Exceptions
 ----------
 
diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst
index 8ed75d9b7560b9..f490f7563b58a5 100644
--- a/Doc/library/ctypes.rst
+++ b/Doc/library/ctypes.rst
@@ -1413,13 +1413,15 @@ way is to instantiate one of the following classes:
 
 .. class:: OleDLL(name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False, winmode=None)
 
-   Windows only: Instances of this class represent loaded shared libraries,
+   Instances of this class represent loaded shared libraries,
    functions in these libraries use the ``stdcall`` calling convention, and are
    assumed to return the windows specific :class:`HRESULT` code.  :class:`HRESULT`
    values contain information specifying whether the function call failed or
    succeeded, together with additional error code.  If the return value signals a
    failure, an :class:`OSError` is automatically raised.
 
+   .. availability:: Windows
+
    .. versionchanged:: 3.3
       :exc:`WindowsError` used to be raised,
       which is now an alias of :exc:`OSError`.
@@ -1431,14 +1433,17 @@ way is to instantiate one of the following classes:
 
 .. class:: WinDLL(name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False, winmode=None)
 
-   Windows only: Instances of this class represent loaded shared libraries,
+   Instances of this class represent loaded shared libraries,
    functions in these libraries use the ``stdcall`` calling convention, and are
    assumed to return :c:expr:`int` by default.
 
+   .. availability:: Windows
+
    .. versionchanged:: 3.12
 
       The *name* parameter can now be a :term:`path-like object`.
 
+
 The Python :term:`global interpreter lock` is released before calling any
 function exported by these libraries, and reacquired afterwards.
 
@@ -1574,13 +1579,17 @@ These prefabricated library loaders are available:
 .. data:: windll
    :noindex:
 
-   Windows only: Creates :class:`WinDLL` instances.
+   Creates :class:`WinDLL` instances.
+
+   .. availability:: Windows
 
 
 .. data:: oledll
    :noindex:
 
-   Windows only: Creates :class:`OleDLL` instances.
+   Creates :class:`OleDLL` instances.
+
+   .. availability:: Windows
 
 
 .. data:: pydll
@@ -1746,11 +1755,13 @@ See :ref:`ctypes-callback-functions` for examples.
 
 .. function:: WINFUNCTYPE(restype, *argtypes, use_errno=False, use_last_error=False)
 
-   Windows only: The returned function prototype creates functions that use the
+   The returned function prototype creates functions that use the
    ``stdcall`` calling convention.  The function will
    release the GIL during the call.  *use_errno* and *use_last_error* have the
    same meaning as above.
 
+   .. availability:: Windows
+
 
 .. function:: PYFUNCTYPE(restype, *argtypes)
 
@@ -1981,17 +1992,21 @@ Utility functions
 
 .. function:: DllCanUnloadNow()
 
-   Windows only: This function is a hook which allows implementing in-process
+   This function is a hook which allows implementing in-process
    COM servers with ctypes.  It is called from the DllCanUnloadNow function that
    the _ctypes extension dll exports.
 
+   .. availability:: Windows
+
 
 .. function:: DllGetClassObject()
 
-   Windows only: This function is a hook which allows implementing in-process
+   This function is a hook which allows implementing in-process
    COM servers with ctypes.  It is called from the DllGetClassObject function
    that the ``_ctypes`` extension dll exports.
 
+   .. availability:: Windows
+
 
 .. function:: find_library(name)
    :module: ctypes.util
@@ -2007,7 +2022,7 @@ Utility functions
 .. function:: find_msvcrt()
    :module: ctypes.util
 
-   Windows only: return the filename of the VC runtime library used by Python,
+   Returns the filename of the VC runtime library used by Python,
    and by the extension modules.  If the name of the library cannot be
    determined, ``None`` is returned.
 
@@ -2015,20 +2030,27 @@ Utility functions
    with a call to the ``free(void *)``, it is important that you use the
    function in the same library that allocated the memory.
 
+   .. availability:: Windows
+
 
 .. function:: FormatError([code])
 
-   Windows only: Returns a textual description of the error code *code*.  If no
+   Returns a textual description of the error code *code*.  If no
    error code is specified, the last error code is used by calling the Windows
    api function GetLastError.
 
+   .. availability:: Windows
+
 
 .. function:: GetLastError()
 
-   Windows only: Returns the last error code set by Windows in the calling thread.
+   Returns the last error code set by Windows in the calling thread.
    This function calls the Windows ``GetLastError()`` function directly,
    it does not return the ctypes-private copy of the error code.
 
+   .. availability:: Windows
+
+
 .. function:: get_errno()
 
    Returns the current value of the ctypes-private copy of the system
@@ -2038,11 +2060,14 @@ Utility functions
 
 .. function:: get_last_error()
 
-   Windows only: returns the current value of the ctypes-private copy of the system
+   Returns the current value of the ctypes-private copy of the system
    :data:`!LastError` variable in the calling thread.
 
+   .. availability:: Windows
+
    .. audit-event:: ctypes.get_last_error "" ctypes.get_last_error
 
+
 .. function:: memmove(dst, src, count)
 
    Same as the standard C memmove library function: copies *count* bytes from
@@ -2091,10 +2116,12 @@ Utility functions
 
 .. function:: set_last_error(value)
 
-   Windows only: set the current value of the ctypes-private copy of the system
+   Sets the current value of the ctypes-private copy of the system
    :data:`!LastError` variable in the calling thread to *value* and return the
    previous value.
 
+   .. availability:: Windows
+
    .. audit-event:: ctypes.set_last_error error ctypes.set_last_error
 
 
@@ -2115,12 +2142,14 @@ Utility functions
 
 .. function:: WinError(code=None, descr=None)
 
-   Windows only: this function is probably the worst-named thing in ctypes. It
+   This function is probably the worst-named thing in ctypes. It
    creates an instance of :exc:`OSError`.  If *code* is not specified,
    ``GetLastError`` is called to determine the error code. If *descr* is not
    specified, :func:`FormatError` is called to get a textual description of the
    error.
 
+   .. availability:: Windows
+
    .. versionchanged:: 3.3
       An instance of :exc:`WindowsError` used to be created, which is now an
       alias of :exc:`OSError`.
@@ -2484,9 +2513,11 @@ These are the fundamental ctypes data types:
 
 .. class:: HRESULT
 
-   Windows only: Represents a :c:type:`!HRESULT` value, which contains success or
+   Represents a :c:type:`!HRESULT` value, which contains success or
    error information for a function or method call.
 
+   .. availability:: Windows
+
 
 .. class:: py_object
 
@@ -2755,7 +2786,7 @@ Exceptions
 
 .. exception:: COMError(hresult, text, details)
 
-   Windows only: This exception is raised when a COM method call failed.
+   This exception is raised when a COM method call failed.
 
    .. attribute:: hresult
 
@@ -2775,4 +2806,6 @@ Exceptions
       identifier.  *progid* is the ``ProgID`` of the interface that defined the
       error.
 
+   .. availability:: Windows
+
    .. versionadded:: next
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index cdd58b84a995b7..e0831bf7e65ad2 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -152,6 +152,11 @@ The :mod:`urllib.request` module defines the following functions:
    the path component of a URL.  This does not produce a complete URL.  The return
    value will already be quoted using the :func:`~urllib.parse.quote` function.
 
+   .. versionchanged:: 3.14
+      On Windows, ``:`` characters not following a drive letter are quoted. In
+      previous versions, :exc:`OSError` was raised if a colon character was
+      found in any position other than the second character.
+
 
 .. function:: url2pathname(path)
 
diff --git a/Include/cpython/code.h b/Include/cpython/code.h
index 370f1d259abe0f..3899d4269233a1 100644
--- a/Include/cpython/code.h
+++ b/Include/cpython/code.h
@@ -131,7 +131,8 @@ typedef struct {
                                                                                \
     /* redundant values (derived from co_localsplusnames and                   \
        co_localspluskinds) */                                                  \
-    int co_nlocalsplus;           /* number of local + cell + free variables */ \
+    int co_nlocalsplus;           /* number of spaces for holding local, cell, \
+                                     and free variables */                     \
     int co_framesize;             /* Size of frame in words */                 \
     int co_nlocals;               /* number of local variables */              \
     int co_ncellvars;             /* total number of cell variables */         \
diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index f7d747cc6c62a1..6e4a308226f3fe 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -90,6 +90,17 @@ extern PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *);
 extern uint32_t _PyDictKeys_GetVersionForCurrentState(
         PyInterpreterState *interp, PyDictKeysObject *dictkeys);
 
+/* Gets a version number unique to the current state of the keys of dict, if possible.
+ *
+ * In free-threaded builds ensures that the dict can be used for lock-free
+ * reads if a version was assigned.
+ *
+ * The caller must hold the per-object lock on dict.
+ *
+ * Returns the version number, or zero if it was not possible to get a version number. */
+extern uint32_t _PyDict_GetKeysVersionForCurrentState(
+        PyInterpreterState *interp, PyDictObject *dict);
+
 extern size_t _PyDict_KeysSize(PyDictKeysObject *keys);
 
 extern void _PyDictKeys_DecRef(PyDictKeysObject *keys);
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index 38a1c56c09d9db..479fe10d00066d 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -50,7 +50,6 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
 #  define _PyGC_BITS_UNREACHABLE    (4)
 #  define _PyGC_BITS_FROZEN         (8)
 #  define _PyGC_BITS_SHARED         (16)
-#  define _PyGC_BITS_SHARED_INLINE  (32)
 #  define _PyGC_BITS_DEFERRED       (64)    // Use deferred reference counting
 #endif
 
@@ -119,23 +118,6 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) {
 }
 #define _PyObject_GC_SET_SHARED(op) _PyObject_GC_SET_SHARED(_Py_CAST(PyObject*, op))
 
-/* True if the memory of the object is shared between multiple
- * threads and needs special purpose when freeing due to
- * the possibility of in-flight lock-free reads occurring.
- * Objects with this bit that are GC objects will automatically
- * delay-freed by PyObject_GC_Del. */
-static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) {
-    return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
-}
-#define _PyObject_GC_IS_SHARED_INLINE(op) \
-    _PyObject_GC_IS_SHARED_INLINE(_Py_CAST(PyObject*, op))
-
-static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) {
-    _PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
-}
-#define _PyObject_GC_SET_SHARED_INLINE(op) \
-    _PyObject_GC_SET_SHARED_INLINE(_Py_CAST(PyObject*, op))
-
 #endif
 
 /* Bit flags for _gc_prev */
diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index e4f0138e17edfa..c12e242d560bde 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -982,6 +982,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hi));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hook));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hour));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(id));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ident));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(identity_hint));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignore));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index e70f11e2a26cd5..dfd9f2b799ec8e 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -471,6 +471,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(hi)
         STRUCT_FOR_ID(hook)
         STRUCT_FOR_ID(hour)
+        STRUCT_FOR_ID(id)
         STRUCT_FOR_ID(ident)
         STRUCT_FOR_ID(identity_hint)
         STRUCT_FOR_ID(ignore)
diff --git a/Include/internal/pycore_modsupport.h b/Include/internal/pycore_modsupport.h
index c661f1d82a84f6..614e9f93751834 100644
--- a/Include/internal/pycore_modsupport.h
+++ b/Include/internal/pycore_modsupport.h
@@ -1,8 +1,6 @@
 #ifndef Py_INTERNAL_MODSUPPORT_H
 #define Py_INTERNAL_MODSUPPORT_H
 
-#include "pycore_lock.h"    // _PyOnceFlag
-
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index cafc02f892499c..783d88cb51ffbd 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -14,6 +14,7 @@ extern "C" {
 #include "pycore_interp.h"        // PyInterpreterState.gc
 #include "pycore_pyatomic_ft_wrappers.h"  // FT_ATOMIC_STORE_PTR_RELAXED
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
+#include "pycore_stackref.h"
 #include "pycore_uniqueid.h"      // _PyObject_ThreadIncrefSlow()
 
 // This value is added to `ob_ref_shared` for objects that use deferred
@@ -595,6 +596,20 @@ _Py_TryIncrefCompare(PyObject **src, PyObject *op)
     return 1;
 }
 
+static inline int
+_Py_TryIncrefCompareStackRef(PyObject **src, PyObject *op, _PyStackRef *out)
+{
+    if (_Py_IsImmortal(op) || _PyObject_HasDeferredRefcount(op)) {
+        *out = (_PyStackRef){ .bits = (intptr_t)op | Py_TAG_DEFERRED };
+        return 1;
+    }
+    if (_Py_TryIncrefCompare(src, op)) {
+        *out = PyStackRef_FromPyObjectSteal(op);
+        return 1;
+    }
+    return 0;
+}
+
 /* Loads and increfs an object from ptr, which may contain a NULL value.
    Safe with concurrent (atomic) updates to ptr.
    NOTE: The writer must set maybe-weakref on the stored object! */
diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h
index dd6b0762370c92..5bb34001aab1b4 100644
--- a/Include/internal/pycore_pymem.h
+++ b/Include/internal/pycore_pymem.h
@@ -120,11 +120,25 @@ extern int _PyMem_DebugEnabled(void);
 extern void _PyMem_FreeDelayed(void *ptr);
 
 // Enqueue an object to be freed possibly after some delay
-extern void _PyObject_FreeDelayed(void *ptr);
+#ifdef Py_GIL_DISABLED
+extern void _PyObject_XDecRefDelayed(PyObject *obj);
+#else
+static inline void _PyObject_XDecRefDelayed(PyObject *obj)
+{
+    Py_XDECREF(obj);
+}
+#endif
 
 // Periodically process delayed free requests.
 extern void _PyMem_ProcessDelayed(PyThreadState *tstate);
 
+
+// Periodically process delayed free requests when the world is stopped.
+// Notify of any objects whic should be freeed.
+typedef void (*delayed_dealloc_cb)(PyObject *, void *);
+extern void _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate,
+                                           delayed_dealloc_cb cb, void *state);
+
 // Abandon all thread-local delayed free requests and push them to the
 // interpreter's queue.
 extern void _PyMem_AbandonDelayed(PyThreadState *tstate);
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index f4fbf3734e2d44..54d8803bc0bdb6 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -269,6 +269,15 @@ extern int _PyOS_InterruptOccurred(PyThreadState *tstate);
 #define HEAD_UNLOCK(runtime) \
     PyMutex_Unlock(&(runtime)->interpreters.mutex)
 
+#define _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) \
+    for (PyThreadState *t = interp->threads.head; t; t = t->next)
+#define _Py_FOR_EACH_TSTATE_BEGIN(interp, t) \
+    HEAD_LOCK(interp->runtime); \
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t)
+#define _Py_FOR_EACH_TSTATE_END(interp) \
+    HEAD_UNLOCK(interp->runtime)
+
+
 // Get the configuration of the current interpreter.
 // The caller must hold the GIL.
 // Export for test_peg_generator.
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 5d404c8fd91ca6..b631382cae058a 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -980,6 +980,7 @@ extern "C" {
     INIT_ID(hi), \
     INIT_ID(hook), \
     INIT_ID(hour), \
+    INIT_ID(id), \
     INIT_ID(ident), \
     INIT_ID(identity_hint), \
     INIT_ID(ignore), \
diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h
index 5debdd68fe94ca..7b39d07f976ee3 100644
--- a/Include/internal/pycore_typeobject.h
+++ b/Include/internal/pycore_typeobject.h
@@ -269,6 +269,16 @@ extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp);
 PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version);
 PyTypeObject *_PyType_LookupByVersion(unsigned int version);
 
+// Function pointer type for user-defined validation function that will be
+// called by _PyType_Validate().
+// It should return 0 if the validation is passed, otherwise it will return -1.
+typedef int (*_py_validate_type)(PyTypeObject *);
+
+// It will verify the ``ty`` through user-defined validation function ``validate``,
+// and if the validation is passed, it will set the ``tp_version`` as valid
+// tp_version_tag from the ``ty``.
+extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index d0bc8d7186c053..24cec3a4fded7a 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1680,6 +1680,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(id);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(ident);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py
index 61cba1ac4932d0..753238354f6d36 100644
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@@ -23,11 +23,6 @@
 read_mime_types(file) -- parse one file, return a dictionary or None
 """
 
-import os
-import sys
-import posixpath
-import urllib.parse
-
 try:
     from _winapi import _mimetypes_read_windows_registry
 except ImportError:
@@ -119,6 +114,10 @@ def guess_type(self, url, strict=True):
         Optional 'strict' argument when False adds a bunch of commonly found,
         but non-standard types.
         """
+        # Lazy import to improve module import time
+        import os
+        import urllib.parse
+
         # TODO: Deprecate accepting file paths (in particular path-like objects).
         url = os.fspath(url)
         p = urllib.parse.urlparse(url)
@@ -146,6 +145,10 @@ def guess_type(self, url, strict=True):
             if '=' in type or '/' not in type:
                 type = 'text/plain'
             return type, None           # never compressed, so encoding is None
+
+        # Lazy import to improve module import time
+        import posixpath
+
         return self._guess_file_type(url, strict, posixpath.splitext)
 
     def guess_file_type(self, path, *, strict=True):
@@ -153,6 +156,9 @@ def guess_file_type(self, path, *, strict=True):
 
         Similar to guess_type(), but takes file path instead of URL.
         """
+        # Lazy import to improve module import time
+        import os
+
         path = os.fsdecode(path)
         path = os.path.splitdrive(path)[1]
         return self._guess_file_type(path, strict, os.path.splitext)
@@ -399,6 +405,9 @@ def init(files=None):
     else:
         db = _db
 
+    # Lazy import to improve module import time
+    import os
+
     for file in files:
         if os.path.isfile(file):
             db.read(file)
@@ -445,7 +454,7 @@ def _default_mime_types():
         }
 
     # Before adding new types, make sure they are either registered with IANA,
-    # at http://www.iana.org/assignments/media-types
+    # at https://www.iana.org/assignments/media-types/media-types.xhtml
     # or extensions, i.e. using the x- prefix
 
     # If you add to these, please keep them sorted by mime type.
@@ -646,6 +655,7 @@ def _default_mime_types():
 
 def _main():
     import getopt
+    import sys
 
     USAGE = """\
 Usage: mimetypes.py [options] type
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py
index 255eb2f547c2ce..66092e4821a0ec 100644
--- a/Lib/nturl2path.py
+++ b/Lib/nturl2path.py
@@ -15,14 +15,20 @@ def url2pathname(url):
     # become
     #   C:\foo\bar\spam.foo
     import string, urllib.parse
+    if url[:3] == '///':
+        # URL has an empty authority section, so the path begins on the third
+        # character.
+        url = url[2:]
+    elif url[:12] == '//localhost/':
+        # Skip past 'localhost' authority.
+        url = url[11:]
+    if url[:3] == '///':
+        # Skip past extra slash before UNC drive in URL path.
+        url = url[1:]
     # Windows itself uses ":" even in URLs.
     url = url.replace(':', '|')
     if not '|' in url:
         # No drive specifier, just convert slashes
-        if url[:3] == '///':
-            # URL has an empty authority section, so the path begins on the
-            # third character.
-            url = url[2:]
         # make sure not to convert quoted slashes :-)
         return urllib.parse.unquote(url.replace('/', '\\'))
     comp = url.split('|')
@@ -40,6 +46,7 @@ def pathname2url(p):
     #   C:\foo\bar\spam.foo
     # becomes
     #   ///C:/foo/bar/spam.foo
+    import ntpath
     import urllib.parse
     # First, clean up some special forms. We are going to sacrifice
     # the additional information anyway
@@ -48,16 +55,13 @@ def pathname2url(p):
         p = p[4:]
         if p[:4].upper() == 'UNC/':
             p = '//' + p[4:]
-        elif p[1:2] != ':':
-            raise OSError('Bad path: ' + p)
-    if not ':' in p:
-        # No DOS drive specified, just quote the pathname
-        return urllib.parse.quote(p)
-    comp = p.split(':', maxsplit=2)
-    if len(comp) != 2 or len(comp[0]) > 1:
-        error = 'Bad path: ' + p
-        raise OSError(error)
+    drive, tail = ntpath.splitdrive(p)
+    if drive[1:] == ':':
+        # DOS drive specified. Add three slashes to the start, producing
+        # an authority section with a zero-length authority, and a path
+        # section starting with a single slash.
+        drive = f'///{drive.upper()}'
 
-    drive = urllib.parse.quote(comp[0].upper())
-    tail = urllib.parse.quote(comp[1])
-    return '///' + drive + ':' + tail
+    drive = urllib.parse.quote(drive, safe='/:')
+    tail = urllib.parse.quote(tail)
+    return drive + tail
diff --git a/Lib/test/test_free_threading/test_code.py b/Lib/test/test_free_threading/test_code.py
new file mode 100644
index 00000000000000..a5136a3ba4edc7
--- /dev/null
+++ b/Lib/test/test_free_threading/test_code.py
@@ -0,0 +1,30 @@
+import unittest
+
+from threading import Thread
+from unittest import TestCase
+
+from test.support import threading_helper
+
+@threading_helper.requires_working_threading()
+class TestCode(TestCase):
+    def test_code_attrs(self):
+        """Test concurrent accesses to lazily initialized code attributes"""
+        code_objects = []
+        for _ in range(1000):
+            code_objects.append(compile("a + b", "<string>", "eval"))
+
+        def run_in_thread():
+            for code in code_objects:
+                self.assertIsInstance(code.co_code, bytes)
+                self.assertIsInstance(code.co_freevars, tuple)
+                self.assertIsInstance(code.co_varnames, tuple)
+
+        threads = [Thread(target=run_in_thread) for _ in range(2)]
+        for thread in threads:
+            thread.start()
+        for thread in threads:
+            thread.join()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_free_threading/test_dict.py b/Lib/test/test_free_threading/test_dict.py
index 80daf0d9cae9e0..13717cb39fa35d 100644
--- a/Lib/test/test_free_threading/test_dict.py
+++ b/Lib/test/test_free_threading/test_dict.py
@@ -142,6 +142,70 @@ def writer_func(l):
             for ref in thread_list:
                 self.assertIsNone(ref())
 
+    def test_racing_set_object_dict(self):
+        """Races assigning to __dict__ should be thread safe"""
+        class C: pass
+        class MyDict(dict): pass
+        for cyclic in (False, True):
+            f = C()
+            f.__dict__ = {"foo": 42}
+            THREAD_COUNT = 10
+
+            def writer_func(l):
+                for i in range(1000):
+                    if cyclic:
+                        other_d = {}
+                    d = MyDict({"foo": 100})
+                    if cyclic:
+                        d["x"] = other_d
+                        other_d["bar"] = d
+                    l.append(weakref.ref(d))
+                    f.__dict__ = d
+
+            def reader_func():
+                for i in range(1000):
+                    f.foo
+
+            lists = []
+            readers = []
+            writers = []
+            for x in range(THREAD_COUNT):
+                thread_list = []
+                lists.append(thread_list)
+                writer = Thread(target=partial(writer_func, thread_list))
+                writers.append(writer)
+
+            for x in range(THREAD_COUNT):
+                reader = Thread(target=partial(reader_func))
+                readers.append(reader)
+
+            for writer in writers:
+                writer.start()
+            for reader in readers:
+                reader.start()
+
+            for writer in writers:
+                writer.join()
+
+            for reader in readers:
+                reader.join()
+
+            f.__dict__ = {}
+            gc.collect()
+            gc.collect()
+
+            count = 0
+            ids = set()
+            for thread_list in lists:
+                for i, ref in enumerate(thread_list):
+                    if ref() is None:
+                        continue
+                    count += 1
+                    ids.add(id(ref()))
+                    count += 1
+
+            self.assertEqual(count, 0)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py
index bf2cb1160723b0..2ea6dba12effc1 100644
--- a/Lib/test/test_generators.py
+++ b/Lib/test/test_generators.py
@@ -758,7 +758,8 @@ def check_stack_names(self, frame, expected):
         while frame:
             name = frame.f_code.co_name
             # Stop checking frames when we get to our test helper.
-            if name.startswith('check_') or name.startswith('call_'):
+            if (name.startswith('check_') or name.startswith('call_')
+                    or name.startswith('test')):
                 break
 
             names.append(name)
@@ -799,6 +800,25 @@ def call_throw(gen):
 
         self.check_yield_from_example(call_throw)
 
+    def test_throw_with_yield_from_custom_generator(self):
+
+        class CustomGen:
+            def __init__(self, test):
+                self.test = test
+            def throw(self, *args):
+                self.test.check_stack_names(sys._getframe(), ['throw', 'g'])
+            def __iter__(self):
+                return self
+            def __next__(self):
+                return 42
+
+        def g(target):
+            yield from target
+
+        gen = g(CustomGen(self))
+        gen.send(None)
+        gen.throw(RuntimeError)
+
 
 class YieldFromTests(unittest.TestCase):
     def test_generator_gi_yieldfrom(self):
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index 4f59184dfcfdc7..6715071af8c752 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -347,13 +347,18 @@ def test_normpath(self):
 
         tester("ntpath.normpath('..')", r'..')
         tester("ntpath.normpath('.')", r'.')
+        tester("ntpath.normpath('c:.')", 'c:')
         tester("ntpath.normpath('')", r'.')
         tester("ntpath.normpath('/')", '\\')
         tester("ntpath.normpath('c:/')", 'c:\\')
         tester("ntpath.normpath('/../.././..')", '\\')
         tester("ntpath.normpath('c:/../../..')", 'c:\\')
+        tester("ntpath.normpath('/./a/b')", r'\a\b')
+        tester("ntpath.normpath('c:/./a/b')", r'c:\a\b')
         tester("ntpath.normpath('../.././..')", r'..\..\..')
         tester("ntpath.normpath('K:../.././..')", r'K:..\..\..')
+        tester("ntpath.normpath('./a/b')", r'a\b')
+        tester("ntpath.normpath('c:./a/b')", r'c:a\b')
         tester("ntpath.normpath('C:////a/b')", r'C:\a\b')
         tester("ntpath.normpath('//machine/share//a/b')", r'\\machine\share\a\b')
 
diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py
index 78e4bf44f7ea0c..a0292b31af1be5 100644
--- a/Lib/test/test_opcache.py
+++ b/Lib/test/test_opcache.py
@@ -546,7 +546,6 @@ def count_args(self, *args):
 
 
 @threading_helper.requires_working_threading()
-@requires_specialization
 class TestRacesDoNotCrash(TestBase):
     # Careful with these. Bigger numbers have a higher chance of catching bugs,
     # but you can also burn through a *ton* of type/dict/function versions:
@@ -588,6 +587,7 @@ def assert_races_do_not_crash(
             for writer in writers:
                 writer.join()
 
+    @requires_specialization
     def test_binary_subscr_getitem(self):
         def get_items():
             class C:
@@ -617,6 +617,7 @@ def write(items):
         opname = "BINARY_SUBSCR_GETITEM"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_binary_subscr_list_int(self):
         def get_items():
             items = []
@@ -640,6 +641,7 @@ def write(items):
         opname = "BINARY_SUBSCR_LIST_INT"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_for_iter_gen(self):
         def get_items():
             def g():
@@ -671,6 +673,7 @@ def write(items):
         opname = "FOR_ITER_GEN"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_for_iter_list(self):
         def get_items():
             items = []
@@ -692,6 +695,7 @@ def write(items):
         opname = "FOR_ITER_LIST"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_load_attr_class(self):
         def get_items():
             class C:
@@ -721,6 +725,7 @@ def write(items):
         opname = "LOAD_ATTR_CLASS"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_load_attr_getattribute_overridden(self):
         def get_items():
             class C:
@@ -750,6 +755,7 @@ def write(items):
         opname = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_load_attr_instance_value(self):
         def get_items():
             class C:
@@ -773,6 +779,7 @@ def write(items):
         opname = "LOAD_ATTR_INSTANCE_VALUE"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_load_attr_method_lazy_dict(self):
         def get_items():
             class C(Exception):
@@ -802,6 +809,7 @@ def write(items):
         opname = "LOAD_ATTR_METHOD_LAZY_DICT"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_load_attr_method_no_dict(self):
         def get_items():
             class C:
@@ -832,6 +840,7 @@ def write(items):
         opname = "LOAD_ATTR_METHOD_NO_DICT"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_load_attr_method_with_values(self):
         def get_items():
             class C:
@@ -861,6 +870,7 @@ def write(items):
         opname = "LOAD_ATTR_METHOD_WITH_VALUES"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_load_attr_module(self):
         def get_items():
             items = []
@@ -885,6 +895,7 @@ def write(items):
         opname = "LOAD_ATTR_MODULE"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_load_attr_property(self):
         def get_items():
             class C:
@@ -914,6 +925,7 @@ def write(items):
         opname = "LOAD_ATTR_PROPERTY"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_load_attr_with_hint(self):
         def get_items():
             class C:
@@ -940,6 +952,7 @@ def write(items):
         opname = "LOAD_ATTR_WITH_HINT"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization_ft
     def test_load_global_module(self):
         def get_items():
             items = []
@@ -961,6 +974,7 @@ def write(items):
             opname, get_items, read, write, check_items=True
         )
 
+    @requires_specialization
     def test_store_attr_instance_value(self):
         def get_items():
             class C:
@@ -983,6 +997,7 @@ def write(items):
         opname = "STORE_ATTR_INSTANCE_VALUE"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_store_attr_with_hint(self):
         def get_items():
             class C:
@@ -1008,6 +1023,7 @@ def write(items):
         opname = "STORE_ATTR_WITH_HINT"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_store_subscr_list_int(self):
         def get_items():
             items = []
@@ -1031,6 +1047,7 @@ def write(items):
         opname = "STORE_SUBSCR_LIST_INT"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
+    @requires_specialization
     def test_unpack_sequence_list(self):
         def get_items():
             items = []
@@ -1255,6 +1272,72 @@ def g():
         self.assert_specialized(g, "CONTAINS_OP_SET")
         self.assert_no_opcode(g, "CONTAINS_OP")
 
+    @cpython_only
+    @requires_specialization_ft
+    def test_to_bool(self):
+        def to_bool_bool():
+            true_cnt, false_cnt = 0, 0
+            elems = [e % 2 == 0 for e in range(100)]
+            for e in elems:
+                if e:
+                    true_cnt += 1
+                else:
+                    false_cnt += 1
+            self.assertEqual(true_cnt, 50)
+            self.assertEqual(false_cnt, 50)
+
+        to_bool_bool()
+        self.assert_specialized(to_bool_bool, "TO_BOOL_BOOL")
+        self.assert_no_opcode(to_bool_bool, "TO_BOOL")
+
+        def to_bool_int():
+            count = 0
+            for i in range(100):
+                if i:
+                    count += 1
+                else:
+                    count -= 1
+            self.assertEqual(count, 98)
+
+        to_bool_int()
+        self.assert_specialized(to_bool_int, "TO_BOOL_INT")
+        self.assert_no_opcode(to_bool_int, "TO_BOOL")
+
+        def to_bool_list():
+            count = 0
+            elems = [1, 2, 3]
+            while elems:
+                count += elems.pop()
+            self.assertEqual(elems, [])
+            self.assertEqual(count, 6)
+
+        to_bool_list()
+        self.assert_specialized(to_bool_list, "TO_BOOL_LIST")
+        self.assert_no_opcode(to_bool_list, "TO_BOOL")
+
+        def to_bool_none():
+            count = 0
+            elems = [None, None, None, None]
+            for e in elems:
+                if not e:
+                    count += 1
+            self.assertEqual(count, len(elems))
+
+        to_bool_none()
+        self.assert_specialized(to_bool_none, "TO_BOOL_NONE")
+        self.assert_no_opcode(to_bool_none, "TO_BOOL")
+
+        def to_bool_str():
+            count = 0
+            elems = ["", "foo", ""]
+            for e in elems:
+                if e:
+                    count += 1
+            self.assertEqual(count, 1)
+
+        to_bool_str()
+        self.assert_specialized(to_bool_str, "TO_BOOL_STR")
+        self.assert_no_opcode(to_bool_str, "TO_BOOL")
 
 
 if __name__ == "__main__":
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index 467da78f9e1779..99515dfc71f9ba 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -4177,9 +4177,9 @@ def test_eventfd_select(self):
 @support.requires_linux_version(2, 6, 30)
 class TimerfdTests(unittest.TestCase):
     # 1 ms accuracy is reliably achievable on every platform except Android
-    # emulators, where we allow 10 ms (gh-108277).
+    # emulators, where we allow 100 ms (gh-124873).
     if sys.platform == "android" and platform.android_ver().is_emulator:
-        CLOCK_RES_PLACES = 2
+        CLOCK_RES_PLACES = 1
     else:
         CLOCK_RES_PLACES = 3
 
diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py
index b39255ebc79ac1..43e4fbc610e5f7 100644
--- a/Lib/test/test_posixpath.py
+++ b/Lib/test/test_posixpath.py
@@ -379,6 +379,7 @@ def test_expanduser_pwd2(self):
         ("/.", "/"),
         ("/./", "/"),
         ("/.//.", "/"),
+        ("/./foo/bar", "/foo/bar"),
         ("/foo", "/foo"),
         ("/foo/bar", "/foo/bar"),
         ("//", "//"),
@@ -388,6 +389,7 @@ def test_expanduser_pwd2(self):
         ("///..//./foo/.//bar", "/foo/bar"),
         (".", "."),
         (".//.", "."),
+        ("./foo/bar", "foo/bar"),
         ("..", ".."),
         ("../", ".."),
         ("../foo", "../foo"),
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index c66b1c49c316e6..a204ef41c3ce90 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1429,8 +1429,9 @@ def test_pathname2url_win(self):
         self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c')
         self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9')
         self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo")
-        # Long drive letter
-        self.assertRaises(IOError, fn, "XX:\\")
+        # NTFS alternate data streams
+        self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar')
+        self.assertEqual(fn('foo:bar'), 'foo%3Abar')
         # No drive letter
         self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
         self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
@@ -1491,10 +1492,12 @@ def test_url2pathname_win(self):
         # UNC paths
         self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file')
         self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file')
-        self.assertEqual(fn('/////server/path/to/file'), '\\\\\\server\\path\\to\\file')
+        self.assertEqual(fn('/////server/path/to/file'), '\\\\server\\path\\to\\file')
         # Localhost paths
         self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file')
         self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file')
+        self.assertEqual(fn('//localhost/path/to/file'), '\\path\\to\\file')
+        self.assertEqual(fn('//localhost//server/path/to/file'), '\\\\server\\path\\to\\file')
         # Percent-encoded forward slashes are preserved for backwards compatibility
         self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar')
         self.assertEqual(fn('//server/share/foo%2fbar'), '\\\\server\\share\\foo/bar')
@@ -1513,7 +1516,7 @@ def test_url2pathname_posix(self):
         self.assertEqual(fn('//foo/bar'), '//foo/bar')
         self.assertEqual(fn('///foo/bar'), '/foo/bar')
         self.assertEqual(fn('////foo/bar'), '//foo/bar')
-        self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
+        self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar')
 
     @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
     def test_url2pathname_nonascii(self):
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index bcfdcc51fac369..80be65c613e971 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1657,6 +1657,9 @@ def url2pathname(pathname):
             # URL has an empty authority section, so the path begins on the
             # third character.
             pathname = pathname[2:]
+        elif pathname[:12] == '//localhost/':
+            # Skip past 'localhost' authority.
+            pathname = pathname[11:]
         encoding = sys.getfilesystemencoding()
         errors = sys.getfilesystemencodeerrors()
         return unquote(pathname, encoding=encoding, errors=errors)
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst
new file mode 100644
index 00000000000000..8f2f37146d3c13
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-21-50-23.gh-issue-124470.pFr3_d.rst
@@ -0,0 +1 @@
+Fix crash in free-threaded builds when replacing object dictionary while reading attribute on another thread
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-07-21-48-23.gh-issue-126091.ETaRGE.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-07-21-48-23.gh-issue-126091.ETaRGE.rst
new file mode 100644
index 00000000000000..08118ff1af657d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-07-21-48-23.gh-issue-126091.ETaRGE.rst
@@ -0,0 +1,2 @@
+Ensure stack traces are complete when throwing into a generator chain that
+ends in a custom generator.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-19-21-49-58.gh-issue-127020.5vvI17.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-19-21-49-58.gh-issue-127020.5vvI17.rst
new file mode 100644
index 00000000000000..a8fd9272f5a923
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-19-21-49-58.gh-issue-127020.5vvI17.rst
@@ -0,0 +1,4 @@
+Fix a crash in the free threading build when :c:func:`PyCode_GetCode`,
+:c:func:`PyCode_GetVarnames`, :c:func:`PyCode_GetCellvars`, or
+:c:func:`PyCode_GetFreevars` were called from multiple threads at the same
+time.
diff --git a/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst b/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst
new file mode 100644
index 00000000000000..11e2b7350a0e48
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst
@@ -0,0 +1,3 @@
+Fix issue where :func:`urllib.request.pathname2url` raised :exc:`OSError`
+when given a Windows path containing a colon character not following a
+drive letter, such as before an NTFS alternate data stream.
diff --git a/Misc/NEWS.d/next/Library/2024-11-13-19-15-18.gh-issue-126780.ZZqJvI.rst b/Misc/NEWS.d/next/Library/2024-11-13-19-15-18.gh-issue-126780.ZZqJvI.rst
new file mode 100644
index 00000000000000..93d45caf5cad72
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-13-19-15-18.gh-issue-126780.ZZqJvI.rst
@@ -0,0 +1 @@
+Fix :func:`os.path.normpath` for drive-relative paths on Windows.
diff --git a/Misc/NEWS.d/next/Library/2024-11-18-22-02-47.gh-issue-118761.GQKD_J.rst b/Misc/NEWS.d/next/Library/2024-11-18-22-02-47.gh-issue-118761.GQKD_J.rst
new file mode 100644
index 00000000000000..ebb9fe8016de21
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-18-22-02-47.gh-issue-118761.GQKD_J.rst
@@ -0,0 +1,2 @@
+Improve import time of :mod:`mimetypes` by around 11-16 times. Patch by Hugo
+van Kemenade.
diff --git a/Misc/NEWS.d/next/Library/2024-11-20-11-37-08.gh-issue-126316.ElkZmE.rst b/Misc/NEWS.d/next/Library/2024-11-20-11-37-08.gh-issue-126316.ElkZmE.rst
new file mode 100644
index 00000000000000..d643254c5b3564
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-20-11-37-08.gh-issue-126316.ElkZmE.rst
@@ -0,0 +1,2 @@
+:mod:`grp`: Make :func:`grp.getgrall` thread-safe by adding a mutex. Patch
+by Victor Stinner.
diff --git a/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst
new file mode 100644
index 00000000000000..998c99bf4358d5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst
@@ -0,0 +1,2 @@
+Fix issue where :func:`urllib.request.url2pathname` failed to discard any
+'localhost' authority present in the URL.
diff --git a/Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst b/Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst
new file mode 100644
index 00000000000000..a84c06f3c7a273
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-22-03-40-02.gh-issue-127078.gI_PaP.rst
@@ -0,0 +1,2 @@
+Fix issue where :func:`urllib.request.url2pathname` failed to discard an
+extra slash before a UNC drive in the URL path on Windows.
diff --git a/Modules/clinic/grpmodule.c.h b/Modules/clinic/grpmodule.c.h
index cc0ad210f42743..facfa3a43e490e 100644
--- a/Modules/clinic/grpmodule.c.h
+++ b/Modules/clinic/grpmodule.c.h
@@ -2,6 +2,12 @@
 preserve
 [clinic start generated code]*/
 
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+#  include "pycore_gc.h"          // PyGC_Head
+#  include "pycore_runtime.h"     // _Py_ID()
+#endif
+#include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()
+
 PyDoc_STRVAR(grp_getgrgid__doc__,
 "getgrgid($module, /, id)\n"
 "--\n"
@@ -11,21 +17,49 @@ PyDoc_STRVAR(grp_getgrgid__doc__,
 "If id is not valid, raise KeyError.");
 
 #define GRP_GETGRGID_METHODDEF    \
-    {"getgrgid", (PyCFunction)(void(*)(void))grp_getgrgid, METH_VARARGS|METH_KEYWORDS, grp_getgrgid__doc__},
+    {"getgrgid", _PyCFunction_CAST(grp_getgrgid), METH_FASTCALL|METH_KEYWORDS, grp_getgrgid__doc__},
 
 static PyObject *
 grp_getgrgid_impl(PyObject *module, PyObject *id);
 
 static PyObject *
-grp_getgrgid(PyObject *module, PyObject *args, PyObject *kwargs)
+grp_getgrgid(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
-    static char *_keywords[] = {"id", NULL};
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 1
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(id), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"id", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "getgrgid",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[1];
     PyObject *id;
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O:getgrgid", _keywords,
-        &id))
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+            /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+    if (!args) {
         goto exit;
+    }
+    id = args[0];
     return_value = grp_getgrgid_impl(module, id);
 
 exit:
@@ -41,21 +75,53 @@ PyDoc_STRVAR(grp_getgrnam__doc__,
 "If name is not valid, raise KeyError.");
 
 #define GRP_GETGRNAM_METHODDEF    \
-    {"getgrnam", (PyCFunction)(void(*)(void))grp_getgrnam, METH_VARARGS|METH_KEYWORDS, grp_getgrnam__doc__},
+    {"getgrnam", _PyCFunction_CAST(grp_getgrnam), METH_FASTCALL|METH_KEYWORDS, grp_getgrnam__doc__},
 
 static PyObject *
 grp_getgrnam_impl(PyObject *module, PyObject *name);
 
 static PyObject *
-grp_getgrnam(PyObject *module, PyObject *args, PyObject *kwargs)
+grp_getgrnam(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
-    static char *_keywords[] = {"name", NULL};
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 1
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(name), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"name", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "getgrnam",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[1];
     PyObject *name;
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "U:getgrnam", _keywords,
-        &name))
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+            /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    if (!PyUnicode_Check(args[0])) {
+        _PyArg_BadArgument("getgrnam", "argument 'name'", "str", args[0]);
         goto exit;
+    }
+    name = args[0];
     return_value = grp_getgrnam_impl(module, name);
 
 exit:
@@ -82,4 +148,4 @@ grp_getgrall(PyObject *module, PyObject *Py_UNUSED(ignored))
 {
     return grp_getgrall_impl(module);
 }
-/*[clinic end generated code: output=81f180beb67fc585 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=2154194308dab038 input=a9049054013a1b77]*/
diff --git a/Modules/grpmodule.c b/Modules/grpmodule.c
index f7d3e12f347ec2..29da9936b65504 100644
--- a/Modules/grpmodule.c
+++ b/Modules/grpmodule.c
@@ -1,9 +1,8 @@
 /* UNIX group file access module */
 
-// Need limited C API version 3.13 for PyMem_RawRealloc()
-#include "pyconfig.h"   // Py_GIL_DISABLED
-#ifndef Py_GIL_DISABLED
-#define Py_LIMITED_API 0x030d0000
+// Argument Clinic uses the internal C API
+#ifndef Py_BUILD_CORE_BUILTIN
+#  define Py_BUILD_CORE_MODULE 1
 #endif
 
 #include "Python.h"
@@ -281,23 +280,33 @@ static PyObject *
 grp_getgrall_impl(PyObject *module)
 /*[clinic end generated code: output=585dad35e2e763d7 input=d7df76c825c367df]*/
 {
-    PyObject *d;
-    struct group *p;
-
-    if ((d = PyList_New(0)) == NULL)
+    PyObject *d = PyList_New(0);
+    if (d == NULL) {
         return NULL;
+    }
+
+    static PyMutex getgrall_mutex = {0};
+    PyMutex_Lock(&getgrall_mutex);
     setgrent();
+
+    struct group *p;
     while ((p = getgrent()) != NULL) {
+        // gh-126316: Don't release the mutex around mkgrent() since
+        // setgrent()/endgrent() are not reentrant / thread-safe. A deadlock
+        // is unlikely since mkgrent() should not be able to call arbitrary
+        // Python code.
         PyObject *v = mkgrent(module, p);
         if (v == NULL || PyList_Append(d, v) != 0) {
             Py_XDECREF(v);
-            Py_DECREF(d);
-            endgrent();
-            return NULL;
+            Py_CLEAR(d);
+            goto done;
         }
         Py_DECREF(v);
     }
+
+done:
     endgrent();
+    PyMutex_Unlock(&getgrall_mutex);
     return d;
 }
 
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index dba43d5911da95..148350cc4b9195 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -302,21 +302,32 @@ validate_and_copy_tuple(PyObject *tup)
 }
 
 static int
-init_co_cached(PyCodeObject *self) {
-    if (self->_co_cached == NULL) {
-        self->_co_cached = PyMem_New(_PyCoCached, 1);
-        if (self->_co_cached == NULL) {
+init_co_cached(PyCodeObject *self)
+{
+    _PyCoCached *cached = FT_ATOMIC_LOAD_PTR(self->_co_cached);
+    if (cached != NULL) {
+        return 0;
+    }
+
+    Py_BEGIN_CRITICAL_SECTION(self);
+    cached = self->_co_cached;
+    if (cached == NULL) {
+        cached = PyMem_New(_PyCoCached, 1);
+        if (cached == NULL) {
             PyErr_NoMemory();
-            return -1;
         }
-        self->_co_cached->_co_code = NULL;
-        self->_co_cached->_co_cellvars = NULL;
-        self->_co_cached->_co_freevars = NULL;
-        self->_co_cached->_co_varnames = NULL;
+        else {
+            cached->_co_code = NULL;
+            cached->_co_cellvars = NULL;
+            cached->_co_freevars = NULL;
+            cached->_co_varnames = NULL;
+            FT_ATOMIC_STORE_PTR(self->_co_cached, cached);
+        }
     }
-    return 0;
-
+    Py_END_CRITICAL_SECTION();
+    return cached != NULL ? 0 : -1;
 }
+
 /******************
  * _PyCode_New()
  ******************/
@@ -1571,16 +1582,21 @@ get_cached_locals(PyCodeObject *co, PyObject **cached_field,
 {
     assert(cached_field != NULL);
     assert(co->_co_cached != NULL);
-    if (*cached_field != NULL) {
-        return Py_NewRef(*cached_field);
+    PyObject *varnames = FT_ATOMIC_LOAD_PTR(*cached_field);
+    if (varnames != NULL) {
+        return Py_NewRef(varnames);
     }
-    assert(*cached_field == NULL);
-    PyObject *varnames = get_localsplus_names(co, kind, num);
+
+    Py_BEGIN_CRITICAL_SECTION(co);
+    varnames = *cached_field;
     if (varnames == NULL) {
-        return NULL;
+        varnames = get_localsplus_names(co, kind, num);
+        if (varnames != NULL) {
+            FT_ATOMIC_STORE_PTR(*cached_field, varnames);
+        }
     }
-    *cached_field = Py_NewRef(varnames);
-    return varnames;
+    Py_END_CRITICAL_SECTION();
+    return Py_XNewRef(varnames);
 }
 
 PyObject *
@@ -1674,18 +1690,26 @@ _PyCode_GetCode(PyCodeObject *co)
     if (init_co_cached(co)) {
         return NULL;
     }
-    if (co->_co_cached->_co_code != NULL) {
-        return Py_NewRef(co->_co_cached->_co_code);
+
+    _PyCoCached *cached = co->_co_cached;
+    PyObject *code = FT_ATOMIC_LOAD_PTR(cached->_co_code);
+    if (code != NULL) {
+        return Py_NewRef(code);
     }
-    PyObject *code = PyBytes_FromStringAndSize((const char *)_PyCode_CODE(co),
-                                               _PyCode_NBYTES(co));
+
+    Py_BEGIN_CRITICAL_SECTION(co);
+    code = cached->_co_code;
     if (code == NULL) {
-        return NULL;
+        code = PyBytes_FromStringAndSize((const char *)_PyCode_CODE(co),
+                                         _PyCode_NBYTES(co));
+        if (code != NULL) {
+            deopt_code(co, (_Py_CODEUNIT *)PyBytes_AS_STRING(code));
+            assert(cached->_co_code == NULL);
+            FT_ATOMIC_STORE_PTR(cached->_co_code, code);
+        }
     }
-    deopt_code(co, (_Py_CODEUNIT *)PyBytes_AS_STRING(code));
-    assert(co->_co_cached->_co_code == NULL);
-    co->_co_cached->_co_code = Py_NewRef(code);
-    return code;
+    Py_END_CRITICAL_SECTION();
+    return Py_XNewRef(code);
 }
 
 PyObject *
@@ -2871,20 +2895,22 @@ get_indices_in_use(PyInterpreterState *interp, struct flag_set *in_use)
     assert(interp->stoptheworld.world_stopped);
     assert(in_use->flags == NULL);
     int32_t max_index = 0;
-    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
         int32_t idx = ((_PyThreadStateImpl *) p)->tlbc_index;
         if (idx > max_index) {
             max_index = idx;
         }
     }
+    _Py_FOR_EACH_TSTATE_END(interp);
     in_use->size = (size_t) max_index + 1;
     in_use->flags = PyMem_Calloc(in_use->size, sizeof(*in_use->flags));
     if (in_use->flags == NULL) {
         return -1;
     }
-    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
         in_use->flags[((_PyThreadStateImpl *) p)->tlbc_index] = 1;
     }
+    _Py_FOR_EACH_TSTATE_END(interp);
     return 0;
 }
 
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 23616b3918b0d6..49b213eaa817e2 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1285,6 +1285,20 @@ ensure_shared_on_resize(PyDictObject *mp)
 #endif
 }
 
+static inline void
+ensure_shared_on_keys_version_assignment(PyDictObject *mp)
+{
+    ASSERT_DICT_LOCKED((PyObject *) mp);
+    #ifdef Py_GIL_DISABLED
+    if (!IS_DICT_SHARED(mp)) {
+        // This ensures that a concurrent resize operation will delay
+        // freeing the old keys or values using QSBR, which is necessary to
+        // safely allow concurrent reads without locking.
+        SET_DICT_SHARED(mp);
+    }
+    #endif
+}
+
 #ifdef Py_GIL_DISABLED
 
 static inline Py_ALWAYS_INLINE int
@@ -1644,7 +1658,7 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp,
     }
 
     _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value);
-    mp->ma_keys->dk_version = 0;
+    FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0);
 
     Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
     dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
@@ -1686,7 +1700,7 @@ insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash)
     ix = unicodekeys_lookup_unicode(keys, key, hash);
     if (ix == DKIX_EMPTY && keys->dk_usable > 0) {
         // Insert into new slot
-        keys->dk_version = 0;
+        FT_ATOMIC_STORE_UINT32_RELAXED(keys->dk_version, 0);
         Py_ssize_t hashpos = find_empty_slot(keys, hash);
         ix = keys->dk_nentries;
         dictkeys_set_index(keys, hashpos, ix);
@@ -2617,7 +2631,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
         ASSERT_CONSISTENT(mp);
     }
     else {
-        mp->ma_keys->dk_version = 0;
+        FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0);
         dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
         if (DK_IS_UNICODE(mp->ma_keys)) {
             PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix];
@@ -4429,7 +4443,7 @@ dict_popitem_impl(PyDictObject *self)
             return NULL;
         }
     }
-    self->ma_keys->dk_version = 0;
+    FT_ATOMIC_STORE_UINT32_RELAXED(self->ma_keys->dk_version, 0);
 
     /* Pop last item */
     PyObject *key, *value;
@@ -7087,51 +7101,146 @@ set_dict_inline_values(PyObject *obj, PyDictObject *new_dict)
     }
 }
 
-int
-_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
+#ifdef Py_GIL_DISABLED
+
+// Trys and sets the dictionary for an object in the easy case when our current
+// dictionary is either completely not materialized or is a dictionary which
+// does not point at the inline values.
+static bool
+try_set_dict_inline_only_or_other_dict(PyObject *obj, PyObject *new_dict, PyDictObject **cur_dict)
+{
+    bool replaced = false;
+    Py_BEGIN_CRITICAL_SECTION(obj);
+
+    PyDictObject *dict = *cur_dict = _PyObject_GetManagedDict(obj);
+    if (dict == NULL) {
+        // We only have inline values, we can just completely replace them.
+        set_dict_inline_values(obj, (PyDictObject *)new_dict);
+        replaced = true;
+        goto exit_lock;
+    }
+
+    if (FT_ATOMIC_LOAD_PTR_RELAXED(dict->ma_values) != _PyObject_InlineValues(obj)) {
+        // We have a materialized dict which doesn't point at the inline values,
+        // We get to simply swap dictionaries and free the old dictionary.
+        FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
+                            (PyDictObject *)Py_XNewRef(new_dict));
+        replaced = true;
+        goto exit_lock;
+    }
+    else {
+        // We have inline values, we need to lock the dict and the object
+        // at the same time to safely dematerialize them. To do that while releasing
+        // the object lock we need a strong reference to the current dictionary.
+        Py_INCREF(dict);
+    }
+exit_lock:
+    Py_END_CRITICAL_SECTION();
+    return replaced;
+}
+
+// Replaces a dictionary that is probably the dictionary which has been
+// materialized and points at the inline values. We could have raced
+// and replaced it with another dictionary though.
+static int
+replace_dict_probably_inline_materialized(PyObject *obj, PyDictObject *inline_dict,
+                                          PyDictObject *cur_dict, PyObject *new_dict)
+{
+    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj);
+
+    if (cur_dict == inline_dict) {
+        assert(FT_ATOMIC_LOAD_PTR_RELAXED(inline_dict->ma_values) == _PyObject_InlineValues(obj));
+
+        int err = _PyDict_DetachFromObject(inline_dict, obj);
+        if (err != 0) {
+            assert(new_dict == NULL);
+            return err;
+        }
+    }
+
+    FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
+                        (PyDictObject *)Py_XNewRef(new_dict));
+    return 0;
+}
+
+#endif
+
+static void
+decref_maybe_delay(PyObject *obj, bool delay)
+{
+    if (delay) {
+        _PyObject_XDecRefDelayed(obj);
+    }
+    else {
+        Py_XDECREF(obj);
+    }
+}
+
+static int
+set_or_clear_managed_dict(PyObject *obj, PyObject *new_dict, bool clear)
 {
     assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+#ifndef NDEBUG
+    Py_BEGIN_CRITICAL_SECTION(obj);
     assert(_PyObject_InlineValuesConsistencyCheck(obj));
+    Py_END_CRITICAL_SECTION();
+#endif
     int err = 0;
     PyTypeObject *tp = Py_TYPE(obj);
     if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
-        PyDictObject *dict = _PyObject_GetManagedDict(obj);
-        if (dict == NULL) {
 #ifdef Py_GIL_DISABLED
-            Py_BEGIN_CRITICAL_SECTION(obj);
+        PyDictObject *prev_dict;
+        if (!try_set_dict_inline_only_or_other_dict(obj, new_dict, &prev_dict)) {
+            // We had a materialized dictionary which pointed at the inline
+            // values. We need to lock both the object and the dict at the
+            // same time to safely replace it. We can't merely lock the dictionary
+            // while the object is locked because it could suspend the object lock.
+            PyDictObject *cur_dict;
 
-            dict = _PyObject_ManagedDictPointer(obj)->dict;
-            if (dict == NULL) {
-                set_dict_inline_values(obj, (PyDictObject *)new_dict);
-            }
+            assert(prev_dict != NULL);
+            Py_BEGIN_CRITICAL_SECTION2(obj, prev_dict);
 
-            Py_END_CRITICAL_SECTION();
+            // We could have had another thread race in between the call to
+            // try_set_dict_inline_only_or_other_dict where we locked the object
+            // and when we unlocked and re-locked the dictionary.
+            cur_dict = _PyObject_GetManagedDict(obj);
 
-            if (dict == NULL) {
-                return 0;
+            err = replace_dict_probably_inline_materialized(obj, prev_dict,
+                                                            cur_dict, new_dict);
+
+            Py_END_CRITICAL_SECTION2();
+
+            // Decref for the dictionary we incref'd in try_set_dict_inline_only_or_other_dict
+            // while the object was locked
+            decref_maybe_delay((PyObject *)prev_dict,
+                               !clear && prev_dict != cur_dict);
+            if (err != 0) {
+                return err;
             }
-#else
-            set_dict_inline_values(obj, (PyDictObject *)new_dict);
-            return 0;
-#endif
-        }
 
-        Py_BEGIN_CRITICAL_SECTION2(dict, obj);
+            prev_dict = cur_dict;
+        }
 
-        // We've locked dict, but the actual dict could have changed
-        // since we locked it.
-        dict = _PyObject_ManagedDictPointer(obj)->dict;
-        err = _PyDict_DetachFromObject(dict, obj);
-        assert(err == 0 || new_dict == NULL);
-        if (err == 0) {
-            FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
-                                (PyDictObject *)Py_XNewRef(new_dict));
+        if (prev_dict != NULL) {
+            // decref for the dictionary that we replaced
+            decref_maybe_delay((PyObject *)prev_dict, !clear);
         }
-        Py_END_CRITICAL_SECTION2();
 
-        if (err == 0) {
-            Py_XDECREF(dict);
+        return 0;
+#else
+        PyDictObject *dict = _PyObject_GetManagedDict(obj);
+        if (dict == NULL) {
+            set_dict_inline_values(obj, (PyDictObject *)new_dict);
+            return 0;
         }
+        if (_PyDict_DetachFromObject(dict, obj) == 0) {
+            _PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)Py_XNewRef(new_dict);
+            Py_DECREF(dict);
+            return 0;
+        }
+        assert(new_dict == NULL);
+        return -1;
+#endif
     }
     else {
         PyDictObject *dict;
@@ -7144,17 +7253,22 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
                             (PyDictObject *)Py_XNewRef(new_dict));
 
         Py_END_CRITICAL_SECTION();
-
-        Py_XDECREF(dict);
+        decref_maybe_delay((PyObject *)dict, !clear);
     }
     assert(_PyObject_InlineValuesConsistencyCheck(obj));
     return err;
 }
 
+int
+_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
+{
+    return set_or_clear_managed_dict(obj, new_dict, false);
+}
+
 void
 PyObject_ClearManagedDict(PyObject *obj)
 {
-    if (_PyObject_SetManagedDict(obj, NULL) < 0) {
+    if (set_or_clear_managed_dict(obj, NULL, true) < 0) {
         /* Must be out of memory */
         assert(PyErr_Occurred() == PyExc_MemoryError);
         PyErr_WriteUnraisable(NULL);
@@ -7317,20 +7431,54 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys)
     dictkeys_decref(interp, keys, false);
 }
 
-uint32_t _PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp,
-                                               PyDictKeysObject *dictkeys)
+static inline uint32_t
+get_next_dict_keys_version(PyInterpreterState *interp)
 {
-    if (dictkeys->dk_version != 0) {
-        return dictkeys->dk_version;
-    }
+#ifdef Py_GIL_DISABLED
+    uint32_t v;
+    do {
+        v = _Py_atomic_load_uint32_relaxed(
+            &interp->dict_state.next_keys_version);
+        if (v == 0) {
+            return 0;
+        }
+    } while (!_Py_atomic_compare_exchange_uint32(
+        &interp->dict_state.next_keys_version, &v, v + 1));
+#else
     if (interp->dict_state.next_keys_version == 0) {
         return 0;
     }
     uint32_t v = interp->dict_state.next_keys_version++;
-    dictkeys->dk_version = v;
+#endif
     return v;
 }
 
+// In free-threaded builds the caller must ensure that the keys object is not
+// being mutated concurrently by another thread.
+uint32_t
+_PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp,
+                                      PyDictKeysObject *dictkeys)
+{
+    uint32_t dk_version = FT_ATOMIC_LOAD_UINT32_RELAXED(dictkeys->dk_version);
+    if (dk_version != 0) {
+        return dk_version;
+    }
+    dk_version = get_next_dict_keys_version(interp);
+    FT_ATOMIC_STORE_UINT32_RELAXED(dictkeys->dk_version, dk_version);
+    return dk_version;
+}
+
+uint32_t
+_PyDict_GetKeysVersionForCurrentState(PyInterpreterState *interp,
+                                      PyDictObject *dict)
+{
+    ASSERT_DICT_LOCKED((PyObject *) dict);
+    uint32_t dk_version =
+        _PyDictKeys_GetVersionForCurrentState(interp, dict->ma_keys);
+    ensure_shared_on_keys_version_assignment(dict);
+    return dk_version;
+}
+
 static inline int
 validate_watcher_id(PyInterpreterState *interp, int watcher_id)
 {
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index 1f2387f68440aa..4ba47285f7152f 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -289,12 +289,14 @@ functions is running.
 
 */
 
+#ifndef Py_GIL_DISABLED
 static inline struct _func_version_cache_item *
 get_cache_item(PyInterpreterState *interp, uint32_t version)
 {
     return interp->func_state.func_version_cache +
            (version % FUNC_VERSION_CACHE_SIZE);
 }
+#endif
 
 void
 _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
diff --git a/Objects/genobject.c b/Objects/genobject.c
index 19c2c4e3331a89..e87f199c2504ba 100644
--- a/Objects/genobject.c
+++ b/Objects/genobject.c
@@ -471,14 +471,14 @@ _gen_throw(PyGenObject *gen, int close_on_genexit,
                 return gen_send_ex(gen, Py_None, 1, 0);
             goto throw_here;
         }
+        PyThreadState *tstate = _PyThreadState_GET();
+        assert(tstate != NULL);
         if (PyGen_CheckExact(yf) || PyCoro_CheckExact(yf)) {
             /* `yf` is a generator or a coroutine. */
-            PyThreadState *tstate = _PyThreadState_GET();
-            /* Since we are fast-tracking things by skipping the eval loop,
-               we need to update the current frame so the stack trace
-               will be reported correctly to the user. */
-            /* XXX We should probably be updating the current frame
-               somewhere in ceval.c. */
+
+            /* Link frame into the stack to enable complete backtraces. */
+            /* XXX We should probably be updating the current frame somewhere in
+               ceval.c. */
             _PyInterpreterFrame *prev = tstate->current_frame;
             frame->previous = prev;
             tstate->current_frame = frame;
@@ -502,10 +502,16 @@ _gen_throw(PyGenObject *gen, int close_on_genexit,
                 Py_DECREF(yf);
                 goto throw_here;
             }
+
+            _PyInterpreterFrame *prev = tstate->current_frame;
+            frame->previous = prev;
+            tstate->current_frame = frame;
             PyFrameState state = gen->gi_frame_state;
             gen->gi_frame_state = FRAME_EXECUTING;
             ret = PyObject_CallFunctionObjArgs(meth, typ, val, tb, NULL);
             gen->gi_frame_state = state;
+            tstate->current_frame = prev;
+            frame->previous = NULL;
             Py_DECREF(meth);
         }
         Py_DECREF(yf);
diff --git a/Objects/object.c b/Objects/object.c
index b115bc756d90b3..8868fa29066404 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -119,7 +119,7 @@ get_reftotal(PyInterpreterState *interp)
        since we can't determine which interpreter updated it. */
     Py_ssize_t total = REFTOTAL(interp);
 #ifdef Py_GIL_DISABLED
-    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, p) {
         /* This may race with other threads modifications to their reftotal */
         _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)p;
         total += _Py_atomic_load_ssize_relaxed(&tstate_impl->reftotal);
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index dfeccfa4dd7658..2cc0377f68f990 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -1093,10 +1093,24 @@ struct _mem_work_chunk {
 };
 
 static void
-free_work_item(uintptr_t ptr)
+free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
 {
     if (ptr & 0x01) {
-        PyObject_Free((char *)(ptr - 1));
+        PyObject *obj = (PyObject *)(ptr - 1);
+#ifdef Py_GIL_DISABLED
+        if (cb == NULL) {
+            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
+            Py_DECREF(obj);
+            return;
+        }
+
+        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
+        if (refcount == 0) {
+            cb(obj, state);
+        }
+#else
+        Py_DECREF(obj);
+#endif
     }
     else {
         PyMem_Free((void *)ptr);
@@ -1107,7 +1121,7 @@ static void
 free_delayed(uintptr_t ptr)
 {
 #ifndef Py_GIL_DISABLED
-    free_work_item(ptr);
+    free_work_item(ptr, NULL, NULL);
 #else
     PyInterpreterState *interp = _PyInterpreterState_GET();
     if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
@@ -1115,7 +1129,8 @@ free_delayed(uintptr_t ptr)
     {
         // Free immediately during interpreter shutdown or if the world is
         // stopped.
-        free_work_item(ptr);
+        assert(!interp->stoptheworld.world_stopped || !(ptr & 0x01));
+        free_work_item(ptr, NULL, NULL);
         return;
     }
 
@@ -1142,7 +1157,8 @@ free_delayed(uintptr_t ptr)
     if (buf == NULL) {
         // failed to allocate a buffer, free immediately
         _PyEval_StopTheWorld(tstate->base.interp);
-        free_work_item(ptr);
+        // TODO: Fix me
+        free_work_item(ptr, NULL, NULL);
         _PyEval_StartTheWorld(tstate->base.interp);
         return;
     }
@@ -1166,12 +1182,16 @@ _PyMem_FreeDelayed(void *ptr)
     free_delayed((uintptr_t)ptr);
 }
 
+#ifdef Py_GIL_DISABLED
 void
-_PyObject_FreeDelayed(void *ptr)
+_PyObject_XDecRefDelayed(PyObject *ptr)
 {
     assert(!((uintptr_t)ptr & 0x01));
-    free_delayed(((uintptr_t)ptr)|0x01);
+    if (ptr != NULL) {
+        free_delayed(((uintptr_t)ptr)|0x01);
+    }
 }
+#endif
 
 static struct _mem_work_chunk *
 work_queue_first(struct llist_node *head)
@@ -1181,7 +1201,7 @@ work_queue_first(struct llist_node *head)
 
 static void
 process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
-              bool keep_empty)
+              bool keep_empty, delayed_dealloc_cb cb, void *state)
 {
     while (!llist_empty(head)) {
         struct _mem_work_chunk *buf = work_queue_first(head);
@@ -1192,7 +1212,7 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
                 return;
             }
 
-            free_work_item(item->ptr);
+            free_work_item(item->ptr, cb, state);
             buf->rd_idx++;
         }
 
@@ -1210,7 +1230,8 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
 
 static void
 process_interp_queue(struct _Py_mem_interp_free_queue *queue,
-                     struct _qsbr_thread_state *qsbr)
+                     struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb,
+                     void *state)
 {
     if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
         return;
@@ -1218,7 +1239,7 @@ process_interp_queue(struct _Py_mem_interp_free_queue *queue,
 
     // Try to acquire the lock, but don't block if it's already held.
     if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
-        process_queue(&queue->head, qsbr, false);
+        process_queue(&queue->head, qsbr, false, cb, state);
 
         int more_work = !llist_empty(&queue->head);
         _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
@@ -1234,10 +1255,23 @@ _PyMem_ProcessDelayed(PyThreadState *tstate)
     _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
 
     // Process thread-local work
-    process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true);
+    process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, NULL, NULL);
+
+    // Process shared interpreter work
+    process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL);
+}
+
+void
+_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
+{
+    PyInterpreterState *interp = tstate->interp;
+    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
+
+    // Process thread-local work
+    process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, cb, state);
 
     // Process shared interpreter work
-    process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr);
+    process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state);
 }
 
 void
@@ -1279,7 +1313,7 @@ _PyMem_FiniDelayed(PyInterpreterState *interp)
             // Free the remaining items immediately. There should be no other
             // threads accessing the memory at this point during shutdown.
             struct _mem_work_item *item = &buf->array[buf->rd_idx];
-            free_work_item(item->ptr);
+            free_work_item(item->ptr, NULL, NULL);
             buf->rd_idx++;
         }
 
@@ -1405,7 +1439,7 @@ get_mimalloc_allocated_blocks(PyInterpreterState *interp)
 {
     size_t allocated_blocks = 0;
 #ifdef Py_GIL_DISABLED
-    for (PyThreadState *t = interp->threads.head; t != NULL; t = t->next) {
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
         _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
         for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
             mi_heap_t *heap = &tstate->mimalloc.heaps[i];
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 840d004d3d98c7..2611404a3d0d61 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -5645,6 +5645,24 @@ _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags)
     END_TYPE_LOCK();
 }
 
+int
+_PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version)
+{
+    int err;
+    BEGIN_TYPE_LOCK();
+    err = validate(ty);
+    if (!err) {
+        if(assign_version_tag(_PyInterpreterState_GET(), ty)) {
+            *tp_version = ty->tp_version_tag;
+        }
+        else {
+            err = -1;
+        }
+    }
+    END_TYPE_LOCK();
+    return err;
+}
+
 static void
 set_flags_recursive(PyTypeObject *self, unsigned long mask, unsigned long flags)
 {
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 7ffe2f5b940942..6ee886c2ba0fc8 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -391,7 +391,7 @@ dummy_func(
         };
 
         specializing op(_SPECIALIZE_TO_BOOL, (counter/1, value -- value)) {
-            #if ENABLE_SPECIALIZATION
+            #if ENABLE_SPECIALIZATION_FT
             if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
                 _Py_Specialize_ToBool(value, next_instr);
@@ -399,7 +399,7 @@ dummy_func(
             }
             OPCODE_DEFERRED_INC(TO_BOOL);
             ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-            #endif  /* ENABLE_SPECIALIZATION */
+            #endif  /* ENABLE_SPECIALIZATION_FT */
         }
 
         op(_TO_BOOL, (value -- res)) {
@@ -435,7 +435,7 @@ dummy_func(
             PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
             EXIT_IF(!PyList_CheckExact(value_o));
             STAT_INC(TO_BOOL, hit);
-            res = Py_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
+            res = PyList_GET_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
             DECREF_INPUTS();
         }
 
@@ -1569,7 +1569,7 @@ dummy_func(
         };
 
         specializing op(_SPECIALIZE_LOAD_GLOBAL, (counter/1 -- )) {
-            #if ENABLE_SPECIALIZATION
+            #if ENABLE_SPECIALIZATION_FT
             if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
                 next_instr = this_instr;
@@ -1578,7 +1578,7 @@ dummy_func(
             }
             OPCODE_DEFERRED_INC(LOAD_GLOBAL);
             ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-            #endif  /* ENABLE_SPECIALIZATION */
+            #endif  /* ENABLE_SPECIALIZATION_FT */
         }
 
         // res[1] because we need a pointer to res to pass it to _PyEval_LoadGlobalStackRef
@@ -1599,16 +1599,18 @@ dummy_func(
         op(_GUARD_GLOBALS_VERSION, (version/1 --)) {
             PyDictObject *dict = (PyDictObject *)GLOBALS();
             DEOPT_IF(!PyDict_CheckExact(dict));
-            DEOPT_IF(dict->ma_keys->dk_version != version);
-            assert(DK_IS_UNICODE(dict->ma_keys));
+            PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys);
+            DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version);
+            assert(DK_IS_UNICODE(keys));
         }
 
         op(_GUARD_GLOBALS_VERSION_PUSH_KEYS, (version / 1 -- globals_keys: PyDictKeysObject *))
         {
             PyDictObject *dict = (PyDictObject *)GLOBALS();
             DEOPT_IF(!PyDict_CheckExact(dict));
-            DEOPT_IF(dict->ma_keys->dk_version != version);
-            globals_keys = dict->ma_keys;
+            PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys);
+            DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version);
+            globals_keys = keys;
             assert(DK_IS_UNICODE(globals_keys));
         }
 
@@ -1616,33 +1618,44 @@ dummy_func(
         {
             PyDictObject *dict = (PyDictObject *)BUILTINS();
             DEOPT_IF(!PyDict_CheckExact(dict));
-            DEOPT_IF(dict->ma_keys->dk_version != version);
-            builtins_keys = dict->ma_keys;
+            PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys);
+            DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version);
+            builtins_keys = keys;
             assert(DK_IS_UNICODE(builtins_keys));
         }
 
         op(_LOAD_GLOBAL_MODULE_FROM_KEYS, (index/1, globals_keys: PyDictKeysObject* -- res, null if (oparg & 1))) {
             PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys);
-            PyObject *res_o = entries[index].me_value;
+            PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
             DEAD(globals_keys);
             SYNC_SP();
             DEOPT_IF(res_o == NULL);
+            #if Py_GIL_DISABLED
+            int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res);
+            DEOPT_IF(!increfed);
+            #else
             Py_INCREF(res_o);
+            res = PyStackRef_FromPyObjectSteal(res_o);
+            #endif
             STAT_INC(LOAD_GLOBAL, hit);
             null = PyStackRef_NULL;
-            res = PyStackRef_FromPyObjectSteal(res_o);
         }
 
         op(_LOAD_GLOBAL_BUILTINS_FROM_KEYS, (index/1, builtins_keys: PyDictKeysObject* -- res, null if (oparg & 1))) {
             PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys);
-            PyObject *res_o = entries[index].me_value;
+            PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
             DEAD(builtins_keys);
             SYNC_SP();
             DEOPT_IF(res_o == NULL);
+            #if Py_GIL_DISABLED
+            int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res);
+            DEOPT_IF(!increfed);
+            #else
             Py_INCREF(res_o);
+            res = PyStackRef_FromPyObjectSteal(res_o);
+            #endif
             STAT_INC(LOAD_GLOBAL, hit);
             null = PyStackRef_NULL;
-            res = PyStackRef_FromPyObjectSteal(res_o);
         }
 
         macro(LOAD_GLOBAL_MODULE) =
diff --git a/Python/ceval.c b/Python/ceval.c
index 892dc5f7b58ff8..2a3938572c1569 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -296,11 +296,12 @@ Py_SetRecursionLimit(int new_limit)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
     interp->ceval.recursion_limit = new_limit;
-    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
         int depth = p->py_recursion_limit - p->py_recursion_remaining;
         p->py_recursion_limit = new_limit;
         p->py_recursion_remaining = new_limit - depth;
     }
+    _Py_FOR_EACH_TSTATE_END(interp);
 }
 
 /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c
index 4c9f59f837e11b..1f811e72406130 100644
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@@ -977,25 +977,19 @@ make_pending_calls(PyThreadState *tstate)
 void
 _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit)
 {
-    _PyRuntimeState *runtime = &_PyRuntime;
-
-    HEAD_LOCK(runtime);
-    for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, tstate) {
         _Py_set_eval_breaker_bit(tstate, bit);
     }
-    HEAD_UNLOCK(runtime);
+    _Py_FOR_EACH_TSTATE_END(interp);
 }
 
 void
 _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit)
 {
-    _PyRuntimeState *runtime = &_PyRuntime;
-
-    HEAD_LOCK(runtime);
-    for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, tstate) {
         _Py_unset_eval_breaker_bit(tstate, bit);
     }
-    HEAD_UNLOCK(runtime);
+    _Py_FOR_EACH_TSTATE_END(interp);
 }
 
 void
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 976a3429b2e603..5c7138a94214a8 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -508,7 +508,7 @@
                 JUMP_TO_JUMP_TARGET();
             }
             STAT_INC(TO_BOOL, hit);
-            res = Py_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
+            res = PyList_GET_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
             PyStackRef_CLOSE(value);
             stack_pointer[-1] = res;
             break;
@@ -1870,11 +1870,12 @@
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            if (dict->ma_keys->dk_version != version) {
+            PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys);
+            if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            assert(DK_IS_UNICODE(dict->ma_keys));
+            assert(DK_IS_UNICODE(keys));
             break;
         }
 
@@ -1886,11 +1887,12 @@
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            if (dict->ma_keys->dk_version != version) {
+            PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys);
+            if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            globals_keys = dict->ma_keys;
+            globals_keys = keys;
             assert(DK_IS_UNICODE(globals_keys));
             stack_pointer[0].bits = (uintptr_t)globals_keys;
             stack_pointer += 1;
@@ -1906,11 +1908,12 @@
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            if (dict->ma_keys->dk_version != version) {
+            PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys);
+            if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            builtins_keys = dict->ma_keys;
+            builtins_keys = keys;
             assert(DK_IS_UNICODE(builtins_keys));
             stack_pointer[0].bits = (uintptr_t)builtins_keys;
             stack_pointer += 1;
@@ -1926,17 +1929,25 @@
             globals_keys = (PyDictKeysObject *)stack_pointer[-1].bits;
             uint16_t index = (uint16_t)CURRENT_OPERAND0();
             PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys);
-            PyObject *res_o = entries[index].me_value;
+            PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
             if (res_o == NULL) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
+            #if Py_GIL_DISABLED
+            int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res);
+            if (!increfed) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            #else
             Py_INCREF(res_o);
+            res = PyStackRef_FromPyObjectSteal(res_o);
+            #endif
             STAT_INC(LOAD_GLOBAL, hit);
             null = PyStackRef_NULL;
-            res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             if (oparg & 1) stack_pointer[1] = null;
             stack_pointer += 1 + (oparg & 1);
@@ -1952,17 +1963,25 @@
             builtins_keys = (PyDictKeysObject *)stack_pointer[-1].bits;
             uint16_t index = (uint16_t)CURRENT_OPERAND0();
             PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys);
-            PyObject *res_o = entries[index].me_value;
+            PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
             if (res_o == NULL) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
+            #if Py_GIL_DISABLED
+            int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res);
+            if (!increfed) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            #else
             Py_INCREF(res_o);
+            res = PyStackRef_FromPyObjectSteal(res_o);
+            #endif
             STAT_INC(LOAD_GLOBAL, hit);
             null = PyStackRef_NULL;
-            res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             if (oparg & 1) stack_pointer[1] = null;
             stack_pointer += 1 + (oparg & 1);
diff --git a/Python/fileutils.c b/Python/fileutils.c
index c9ae1b3f54e167..9529b14d377c60 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -2506,37 +2506,38 @@ _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
 #endif
 #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
 
-    if (p1[0] == L'.' && IS_SEP(&p1[1])) {
-        // Skip leading '.\'
-        path = &path[2];
-        while (IS_SEP(path)) {
-            path++;
-        }
-        p1 = p2 = minP2 = path;
-        lastC = SEP;
-    }
-    else {
-        Py_ssize_t drvsize, rootsize;
-        _Py_skiproot(path, size, &drvsize, &rootsize);
-        if (drvsize || rootsize) {
-            // Skip past root and update minP2
-            p1 = &path[drvsize + rootsize];
+    Py_ssize_t drvsize, rootsize;
+    _Py_skiproot(path, size, &drvsize, &rootsize);
+    if (drvsize || rootsize) {
+        // Skip past root and update minP2
+        p1 = &path[drvsize + rootsize];
 #ifndef ALTSEP
-            p2 = p1;
+        p2 = p1;
 #else
-            for (; p2 < p1; ++p2) {
-                if (*p2 == ALTSEP) {
-                    *p2 = SEP;
-                }
+        for (; p2 < p1; ++p2) {
+            if (*p2 == ALTSEP) {
+                *p2 = SEP;
             }
+        }
 #endif
-            minP2 = p2 - 1;
-            lastC = *minP2;
+        minP2 = p2 - 1;
+        lastC = *minP2;
 #ifdef MS_WINDOWS
-            if (lastC != SEP) {
-                minP2++;
-            }
+        if (lastC != SEP) {
+            minP2++;
+        }
+#endif
+    }
+    if (p1[0] == L'.' && SEP_OR_END(&p1[1])) {
+        // Skip leading '.\'
+        lastC = *++p1;
+#ifdef ALTSEP
+        if (lastC == ALTSEP) {
+            lastC = SEP;
+        }
 #endif
+        while (IS_SEP(p1)) {
+            p1++;
         }
     }
 
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index a6e0022340b7fe..f7f44407494e51 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -304,7 +304,7 @@ gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor
     Py_ssize_t offset_pre = offset_base + 2 * sizeof(PyObject*);
 
     // visit each thread's heaps for GC objects
-    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, p) {
         struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc;
         if (!_Py_atomic_load_int(&m->initialized)) {
             // The thread may not have called tstate_mimalloc_bind() yet.
@@ -374,8 +374,7 @@ gc_visit_stackref(_PyStackRef stackref)
 static void
 gc_visit_thread_stacks(PyInterpreterState *interp)
 {
-    HEAD_LOCK(&_PyRuntime);
-    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
         for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = f->previous) {
             PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable);
             if (executable == NULL || !PyCode_Check(executable)) {
@@ -390,7 +389,24 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
             }
         }
     }
-    HEAD_UNLOCK(&_PyRuntime);
+    _Py_FOR_EACH_TSTATE_END(interp);
+}
+
+static void
+queue_untracked_obj_decref(PyObject *op, struct collection_state *state)
+{
+    if (!_PyObject_GC_IS_TRACKED(op)) {
+        // GC objects with zero refcount are handled subsequently by the
+        // GC as if they were cyclic trash, but we have to handle dead
+        // non-GC objects here. Add one to the refcount so that we can
+        // decref and deallocate the object once we start the world again.
+        op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
+#ifdef Py_REF_DEBUG
+        _Py_IncRefTotal(_PyThreadState_GET());
+#endif
+        worklist_push(&state->objs_to_decref, op);
+    }
+
 }
 
 static void
@@ -404,22 +420,20 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
         // Subtract one when merging because the queue had a reference.
         Py_ssize_t refcount = merge_refcount(op, -1);
 
-        if (!_PyObject_GC_IS_TRACKED(op) && refcount == 0) {
-            // GC objects with zero refcount are handled subsequently by the
-            // GC as if they were cyclic trash, but we have to handle dead
-            // non-GC objects here. Add one to the refcount so that we can
-            // decref and deallocate the object once we start the world again.
-            op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
-#ifdef Py_REF_DEBUG
-            _Py_IncRefTotal(_PyThreadState_GET());
-#endif
-            worklist_push(&state->objs_to_decref, op);
+        if (refcount == 0) {
+            queue_untracked_obj_decref(op, state);
         }
     }
 }
 
 static void
-process_delayed_frees(PyInterpreterState *interp)
+queue_freed_object(PyObject *obj, void *arg)
+{
+    queue_untracked_obj_decref(obj, arg);
+}
+
+static void
+process_delayed_frees(PyInterpreterState *interp, struct collection_state *state)
 {
     // While we are in a "stop the world" pause, we can observe the latest
     // write sequence by advancing the write sequence immediately.
@@ -429,16 +443,15 @@ process_delayed_frees(PyInterpreterState *interp)
 
     // Merge the queues from other threads into our own queue so that we can
     // process all of the pending delayed free requests at once.
-    HEAD_LOCK(&_PyRuntime);
-    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
         _PyThreadStateImpl *other = (_PyThreadStateImpl *)p;
         if (other != current_tstate) {
             llist_concat(&current_tstate->mem_free_queue, &other->mem_free_queue);
         }
     }
-    HEAD_UNLOCK(&_PyRuntime);
+    _Py_FOR_EACH_TSTATE_END(interp);
 
-    _PyMem_ProcessDelayed((PyThreadState *)current_tstate);
+    _PyMem_ProcessDelayedNoDealloc((PyThreadState *)current_tstate, queue_freed_object, state);
 }
 
 // Subtract an incoming reference from the computed "gc_refs" refcount.
@@ -1219,8 +1232,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
         state->gcstate->old[i-1].count = 0;
     }
 
-    HEAD_LOCK(&_PyRuntime);
-    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
         _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;
 
         // merge per-thread refcount for types into the type's actual refcount
@@ -1229,9 +1241,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
         // merge refcounts for all queued objects
         merge_queued_objects(tstate, state);
     }
-    HEAD_UNLOCK(&_PyRuntime);
+    _Py_FOR_EACH_TSTATE_END(interp);
 
-    process_delayed_frees(interp);
+    process_delayed_frees(interp, state);
 
     // Find unreachable objects
     int err = deduce_unreachable_heap(interp, state);
@@ -1910,13 +1922,7 @@ PyObject_GC_Del(void *op)
     }
 
     record_deallocation(_PyThreadState_GET());
-    PyObject *self = (PyObject *)op;
-    if (_PyObject_GC_IS_SHARED_INLINE(self)) {
-        _PyObject_FreeDelayed(((char *)op)-presize);
-    }
-    else {
-        PyObject_Free(((char *)op)-presize);
-    }
+    PyObject_Free(((char *)op)-presize);
 }
 
 int
@@ -1984,13 +1990,11 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
 void
 _PyGC_ClearAllFreeLists(PyInterpreterState *interp)
 {
-    HEAD_LOCK(&_PyRuntime);
-    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head;
-    while (tstate != NULL) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
+        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;
         _PyObject_ClearFreeLists(&tstate->freelists, 0);
-        tstate = (_PyThreadStateImpl *)tstate->base.next;
     }
-    HEAD_UNLOCK(&_PyRuntime);
+    _Py_FOR_EACH_TSTATE_END(interp);
 }
 
 #endif  // Py_GIL_DISABLED
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index f3db2f9abc79d0..13947849942cd4 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -6098,7 +6098,7 @@
             {
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
-                #if ENABLE_SPECIALIZATION
+                #if ENABLE_SPECIALIZATION_FT
                 if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
                     next_instr = this_instr;
@@ -6109,7 +6109,7 @@
                 }
                 OPCODE_DEFERRED_INC(LOAD_GLOBAL);
                 ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-                #endif  /* ENABLE_SPECIALIZATION */
+                #endif  /* ENABLE_SPECIALIZATION_FT */
             }
             /* Skip 1 cache entry */
             /* Skip 1 cache entry */
@@ -6144,28 +6144,35 @@
                 uint16_t version = read_u16(&this_instr[2].cache);
                 PyDictObject *dict = (PyDictObject *)GLOBALS();
                 DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL);
-                DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL);
-                assert(DK_IS_UNICODE(dict->ma_keys));
+                PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL);
+                assert(DK_IS_UNICODE(keys));
             }
             // _GUARD_BUILTINS_VERSION_PUSH_KEYS
             {
                 uint16_t version = read_u16(&this_instr[3].cache);
                 PyDictObject *dict = (PyDictObject *)BUILTINS();
                 DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL);
-                DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL);
-                builtins_keys = dict->ma_keys;
+                PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL);
+                builtins_keys = keys;
                 assert(DK_IS_UNICODE(builtins_keys));
             }
             // _LOAD_GLOBAL_BUILTINS_FROM_KEYS
             {
                 uint16_t index = read_u16(&this_instr[4].cache);
                 PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys);
-                PyObject *res_o = entries[index].me_value;
+                PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
                 DEOPT_IF(res_o == NULL, LOAD_GLOBAL);
+                #if Py_GIL_DISABLED
+                int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res);
+                DEOPT_IF(!increfed, LOAD_GLOBAL);
+                #else
                 Py_INCREF(res_o);
+                res = PyStackRef_FromPyObjectSteal(res_o);
+                #endif
                 STAT_INC(LOAD_GLOBAL, hit);
                 null = PyStackRef_NULL;
-                res = PyStackRef_FromPyObjectSteal(res_o);
             }
             stack_pointer[0] = res;
             if (oparg & 1) stack_pointer[1] = null;
@@ -6188,8 +6195,9 @@
                 uint16_t version = read_u16(&this_instr[2].cache);
                 PyDictObject *dict = (PyDictObject *)GLOBALS();
                 DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL);
-                DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL);
-                globals_keys = dict->ma_keys;
+                PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL);
+                globals_keys = keys;
                 assert(DK_IS_UNICODE(globals_keys));
             }
             /* Skip 1 cache entry */
@@ -6197,12 +6205,17 @@
             {
                 uint16_t index = read_u16(&this_instr[4].cache);
                 PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys);
-                PyObject *res_o = entries[index].me_value;
+                PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value);
                 DEOPT_IF(res_o == NULL, LOAD_GLOBAL);
+                #if Py_GIL_DISABLED
+                int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res);
+                DEOPT_IF(!increfed, LOAD_GLOBAL);
+                #else
                 Py_INCREF(res_o);
+                res = PyStackRef_FromPyObjectSteal(res_o);
+                #endif
                 STAT_INC(LOAD_GLOBAL, hit);
                 null = PyStackRef_NULL;
-                res = PyStackRef_FromPyObjectSteal(res_o);
             }
             stack_pointer[0] = res;
             if (oparg & 1) stack_pointer[1] = null;
@@ -7745,7 +7758,7 @@
                 value = stack_pointer[-1];
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
-                #if ENABLE_SPECIALIZATION
+                #if ENABLE_SPECIALIZATION_FT
                 if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
                     _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -7755,7 +7768,7 @@
                 }
                 OPCODE_DEFERRED_INC(TO_BOOL);
                 ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-                #endif  /* ENABLE_SPECIALIZATION */
+                #endif  /* ENABLE_SPECIALIZATION_FT */
             }
             /* Skip 2 cache entries */
             // _TO_BOOL
@@ -7850,7 +7863,7 @@
             PyObject *value_o = PyStackRef_AsPyObjectBorrow(value);
             DEOPT_IF(!PyList_CheckExact(value_o), TO_BOOL);
             STAT_INC(TO_BOOL, hit);
-            res = Py_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
+            res = PyList_GET_SIZE(value_o) ? PyStackRef_True : PyStackRef_False;
             PyStackRef_CLOSE(value);
             stack_pointer[-1] = res;
             DISPATCH();
diff --git a/Python/instrumentation.c b/Python/instrumentation.c
index 87c2addaf809eb..3503809e3306cb 100644
--- a/Python/instrumentation.c
+++ b/Python/instrumentation.c
@@ -1006,13 +1006,10 @@ set_global_version(PyThreadState *tstate, uint32_t version)
 
 #ifdef Py_GIL_DISABLED
     // Set the version on all threads in free-threaded builds.
-    _PyRuntimeState *runtime = &_PyRuntime;
-    HEAD_LOCK(runtime);
-    for (tstate = interp->threads.head; tstate;
-         tstate = PyThreadState_Next(tstate)) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, tstate) {
         set_version_raw(&tstate->eval_breaker, version);
     };
-    HEAD_UNLOCK(runtime);
+    _Py_FOR_EACH_TSTATE_END(interp);
 #else
     // Normal builds take the current version from instrumentation_version when
     // attaching a thread, so we only have to set the current thread's version.
diff --git a/Python/optimizer.c b/Python/optimizer.c
index bc2ecc098b0e15..6a232218981dcd 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -205,8 +205,8 @@ _PyOptimizer_Optimize(
     return 1;
 }
 
-_PyExecutorObject *
-_Py_GetExecutor(PyCodeObject *code, int offset)
+static _PyExecutorObject *
+get_executor_lock_held(PyCodeObject *code, int offset)
 {
     int code_len = (int)Py_SIZE(code);
     for (int i = 0 ; i < code_len;) {
@@ -222,6 +222,16 @@ _Py_GetExecutor(PyCodeObject *code, int offset)
     return NULL;
 }
 
+_PyExecutorObject *
+_Py_GetExecutor(PyCodeObject *code, int offset)
+{
+    _PyExecutorObject *executor;
+    Py_BEGIN_CRITICAL_SECTION(code);
+    executor = get_executor_lock_held(code, offset);
+    Py_END_CRITICAL_SECTION();
+    return executor;
+}
+
 static PyObject *
 is_valid(PyObject *self, PyObject *Py_UNUSED(ignored))
 {
diff --git a/Python/pystate.c b/Python/pystate.c
index 44f55be5b5b7a8..975eb6d4fbd0f2 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -790,18 +790,15 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
     }
 
     // Clear the current/main thread state last.
-    HEAD_LOCK(runtime);
-    PyThreadState *p = interp->threads.head;
-    HEAD_UNLOCK(runtime);
-    while (p != NULL) {
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, p) {
         // See https://github.com/python/cpython/issues/102126
         // Must be called without HEAD_LOCK held as it can deadlock
         // if any finalizer tries to acquire that lock.
+        HEAD_UNLOCK(runtime);
         PyThreadState_Clear(p);
         HEAD_LOCK(runtime);
-        p = p->next;
-        HEAD_UNLOCK(runtime);
     }
+    _Py_FOR_EACH_TSTATE_END(interp);
     if (tstate->interp == interp) {
         /* We fix tstate->_status below when we for sure aren't using it
            (e.g. no longer need the GIL). */
@@ -1801,10 +1798,9 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
 static void
 zapthreads(PyInterpreterState *interp)
 {
-    PyThreadState *tstate;
     /* No need to lock the mutex here because this should only happen
        when the threads are all really dead (XXX famous last words). */
-    while ((tstate = interp->threads.head) != NULL) {
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) {
         tstate_verify_not_active(tstate);
         tstate_delete_common(tstate, 0);
         free_threadstate((_PyThreadStateImpl *)tstate);
@@ -2161,7 +2157,7 @@ decrement_stoptheworld_countdown(struct _stoptheworld_state *stw)
 }
 
 #ifdef Py_GIL_DISABLED
-// Interpreter for _Py_FOR_EACH_THREAD(). For global stop-the-world events,
+// Interpreter for _Py_FOR_EACH_STW_INTERP(). For global stop-the-world events,
 // we start with the first interpreter and then iterate over all interpreters.
 // For per-interpreter stop-the-world events, we only operate on the one
 // interpreter.
@@ -2176,10 +2172,9 @@ interp_for_stop_the_world(struct _stoptheworld_state *stw)
 // Loops over threads for a stop-the-world event.
 // For global: all threads in all interpreters
 // For per-interpreter: all threads in the interpreter
-#define _Py_FOR_EACH_THREAD(stw, i, t)                                      \
-    for (i = interp_for_stop_the_world((stw));                              \
-            i != NULL; i = ((stw->is_global) ? i->next : NULL))             \
-        for (t = i->threads.head; t; t = t->next)
+#define _Py_FOR_EACH_STW_INTERP(stw, i)                                     \
+    for (PyInterpreterState *i = interp_for_stop_the_world((stw));          \
+            i != NULL; i = ((stw->is_global) ? i->next : NULL))
 
 
 // Try to transition threads atomically from the "detached" state to the
@@ -2188,19 +2183,19 @@ static bool
 park_detached_threads(struct _stoptheworld_state *stw)
 {
     int num_parked = 0;
-    PyInterpreterState *i;
-    PyThreadState *t;
-    _Py_FOR_EACH_THREAD(stw, i, t) {
-        int state = _Py_atomic_load_int_relaxed(&t->state);
-        if (state == _Py_THREAD_DETACHED) {
-            // Atomically transition to "suspended" if in "detached" state.
-            if (_Py_atomic_compare_exchange_int(&t->state,
-                                                &state, _Py_THREAD_SUSPENDED)) {
-                num_parked++;
+    _Py_FOR_EACH_STW_INTERP(stw, i) {
+        _Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
+            int state = _Py_atomic_load_int_relaxed(&t->state);
+            if (state == _Py_THREAD_DETACHED) {
+                // Atomically transition to "suspended" if in "detached" state.
+                if (_Py_atomic_compare_exchange_int(
+                                &t->state, &state, _Py_THREAD_SUSPENDED)) {
+                    num_parked++;
+                }
+            }
+            else if (state == _Py_THREAD_ATTACHED && t != stw->requester) {
+                _Py_set_eval_breaker_bit(t, _PY_EVAL_PLEASE_STOP_BIT);
             }
-        }
-        else if (state == _Py_THREAD_ATTACHED && t != stw->requester) {
-            _Py_set_eval_breaker_bit(t, _PY_EVAL_PLEASE_STOP_BIT);
         }
     }
     stw->thread_countdown -= num_parked;
@@ -2227,12 +2222,12 @@ stop_the_world(struct _stoptheworld_state *stw)
     stw->stop_event = (PyEvent){0};  // zero-initialize (unset)
     stw->requester = _PyThreadState_GET();  // may be NULL
 
-    PyInterpreterState *i;
-    PyThreadState *t;
-    _Py_FOR_EACH_THREAD(stw, i, t) {
-        if (t != stw->requester) {
-            // Count all the other threads (we don't wait on ourself).
-            stw->thread_countdown++;
+    _Py_FOR_EACH_STW_INTERP(stw, i) {
+        _Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
+            if (t != stw->requester) {
+                // Count all the other threads (we don't wait on ourself).
+                stw->thread_countdown++;
+            }
         }
     }
 
@@ -2273,14 +2268,14 @@ start_the_world(struct _stoptheworld_state *stw)
     stw->requested = 0;
     stw->world_stopped = 0;
     // Switch threads back to the detached state.
-    PyInterpreterState *i;
-    PyThreadState *t;
-    _Py_FOR_EACH_THREAD(stw, i, t) {
-        if (t != stw->requester) {
-            assert(_Py_atomic_load_int_relaxed(&t->state) ==
-                   _Py_THREAD_SUSPENDED);
-            _Py_atomic_store_int(&t->state, _Py_THREAD_DETACHED);
-            _PyParkingLot_UnparkAll(&t->state);
+    _Py_FOR_EACH_STW_INTERP(stw, i) {
+        _Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
+            if (t != stw->requester) {
+                assert(_Py_atomic_load_int_relaxed(&t->state) ==
+                       _Py_THREAD_SUSPENDED);
+                _Py_atomic_store_int(&t->state, _Py_THREAD_DETACHED);
+                _PyParkingLot_UnparkAll(&t->state);
+            }
         }
     }
     stw->requester = NULL;
@@ -2344,7 +2339,6 @@ _PyEval_StartTheWorld(PyInterpreterState *interp)
 int
 PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
 {
-    _PyRuntimeState *runtime = &_PyRuntime;
     PyInterpreterState *interp = _PyInterpreterState_GET();
 
     /* Although the GIL is held, a few C API functions can be called
@@ -2353,12 +2347,16 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
      * list of thread states we're traversing, so to prevent that we lock
      * head_mutex for the duration.
      */
-    HEAD_LOCK(runtime);
-    for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) {
-        if (tstate->thread_id != id) {
-            continue;
+    PyThreadState *tstate = NULL;
+    _Py_FOR_EACH_TSTATE_BEGIN(interp, t) {
+        if (t->thread_id == id) {
+            tstate = t;
+            break;
         }
+    }
+    _Py_FOR_EACH_TSTATE_END(interp);
 
+    if (tstate != NULL) {
         /* Tricky:  we need to decref the current value
          * (if any) in tstate->async_exc, but that can in turn
          * allow arbitrary Python code to run, including
@@ -2368,14 +2366,12 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
          */
         Py_XINCREF(exc);
         PyObject *old_exc = _Py_atomic_exchange_ptr(&tstate->async_exc, exc);
-        HEAD_UNLOCK(runtime);
 
         Py_XDECREF(old_exc);
         _Py_set_eval_breaker_bit(tstate, _PY_ASYNC_EXCEPTION_BIT);
-        return 1;
     }
-    HEAD_UNLOCK(runtime);
-    return 0;
+
+    return tstate != NULL;
 }
 
 //---------------------------------
@@ -2515,8 +2511,7 @@ _PyThread_CurrentFrames(void)
     HEAD_LOCK(runtime);
     PyInterpreterState *i;
     for (i = runtime->interpreters.head; i != NULL; i = i->next) {
-        PyThreadState *t;
-        for (t = i->threads.head; t != NULL; t = t->next) {
+        _Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
             _PyInterpreterFrame *frame = t->current_frame;
             frame = _PyFrame_GetFirstComplete(frame);
             if (frame == NULL) {
@@ -2581,8 +2576,7 @@ _PyThread_CurrentExceptions(void)
     HEAD_LOCK(runtime);
     PyInterpreterState *i;
     for (i = runtime->interpreters.head; i != NULL; i = i->next) {
-        PyThreadState *t;
-        for (t = i->threads.head; t != NULL; t = t->next) {
+        _Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
             _PyErr_StackItem *err_info = _PyErr_GetTopmostException(t);
             if (err_info == NULL) {
                 continue;
diff --git a/Python/specialize.c b/Python/specialize.c
index ad41dfc39c0147..c69f61c8b449a1 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -1519,12 +1519,12 @@ PyObject *descr, DescriptorClassification kind, bool is_method)
     return 1;
 }
 
-void
-_Py_Specialize_LoadGlobal(
+static void
+specialize_load_global_lock_held(
     PyObject *globals, PyObject *builtins,
     _Py_CODEUNIT *instr, PyObject *name)
 {
-    assert(ENABLE_SPECIALIZATION);
+    assert(ENABLE_SPECIALIZATION_FT);
     assert(_PyOpcode_Caches[LOAD_GLOBAL] == INLINE_CACHE_ENTRIES_LOAD_GLOBAL);
     /* Use inline cache */
     _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)(instr + 1);
@@ -1549,8 +1549,8 @@ _Py_Specialize_LoadGlobal(
             SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE);
             goto fail;
         }
-        uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(
-                interp, globals_keys);
+        uint32_t keys_version = _PyDict_GetKeysVersionForCurrentState(
+                interp, (PyDictObject*) globals);
         if (keys_version == 0) {
             SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS);
             goto fail;
@@ -1561,8 +1561,8 @@ _Py_Specialize_LoadGlobal(
         }
         cache->index = (uint16_t)index;
         cache->module_keys_version = (uint16_t)keys_version;
-        instr->op.code = LOAD_GLOBAL_MODULE;
-        goto success;
+        specialize(instr, LOAD_GLOBAL_MODULE);
+        return;
     }
     if (!PyDict_CheckExact(builtins)) {
         SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_DICT);
@@ -1582,8 +1582,8 @@ _Py_Specialize_LoadGlobal(
         SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE);
         goto fail;
     }
-    uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState(
-            interp, globals_keys);
+    uint32_t globals_version = _PyDict_GetKeysVersionForCurrentState(
+            interp, (PyDictObject*) globals);
     if (globals_version == 0) {
         SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS);
         goto fail;
@@ -1592,8 +1592,8 @@ _Py_Specialize_LoadGlobal(
         SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE);
         goto fail;
     }
-    uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState(
-            interp, builtin_keys);
+    uint32_t builtins_version = _PyDict_GetKeysVersionForCurrentState(
+            interp, (PyDictObject*) builtins);
     if (builtins_version == 0) {
         SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS);
         goto fail;
@@ -1605,18 +1605,20 @@ _Py_Specialize_LoadGlobal(
     cache->index = (uint16_t)index;
     cache->module_keys_version = (uint16_t)globals_version;
     cache->builtin_keys_version = (uint16_t)builtins_version;
-    instr->op.code = LOAD_GLOBAL_BUILTIN;
-    goto success;
-fail:
-    STAT_INC(LOAD_GLOBAL, failure);
-    assert(!PyErr_Occurred());
-    instr->op.code = LOAD_GLOBAL;
-    cache->counter = adaptive_counter_backoff(cache->counter);
+    specialize(instr, LOAD_GLOBAL_BUILTIN);
     return;
-success:
-    STAT_INC(LOAD_GLOBAL, success);
-    assert(!PyErr_Occurred());
-    cache->counter = adaptive_counter_cooldown();
+fail:
+    unspecialize(instr);
+}
+
+void
+_Py_Specialize_LoadGlobal(
+    PyObject *globals, PyObject *builtins,
+    _Py_CODEUNIT *instr, PyObject *name)
+{
+    Py_BEGIN_CRITICAL_SECTION2(globals, builtins);
+    specialize_load_global_lock_held(globals, builtins, instr, name);
+    Py_END_CRITICAL_SECTION2();
 }
 
 #ifdef Py_STATS
@@ -2665,101 +2667,106 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr)
     cache->counter = adaptive_counter_cooldown();
 }
 
+#ifdef Py_STATS
+static int
+to_bool_fail_kind(PyObject *value)
+{
+    if (PyByteArray_CheckExact(value)) {
+        return SPEC_FAIL_TO_BOOL_BYTEARRAY;
+    }
+    if (PyBytes_CheckExact(value)) {
+        return SPEC_FAIL_TO_BOOL_BYTES;
+    }
+    if (PyDict_CheckExact(value)) {
+        return SPEC_FAIL_TO_BOOL_DICT;
+    }
+    if (PyFloat_CheckExact(value)) {
+        return SPEC_FAIL_TO_BOOL_FLOAT;
+    }
+    if (PyMemoryView_Check(value)) {
+        return SPEC_FAIL_TO_BOOL_MEMORY_VIEW;
+    }
+    if (PyAnySet_CheckExact(value)) {
+        return SPEC_FAIL_TO_BOOL_SET;
+    }
+    if (PyTuple_CheckExact(value)) {
+        return SPEC_FAIL_TO_BOOL_TUPLE;
+    }
+    return SPEC_FAIL_OTHER;
+}
+#endif  // Py_STATS
+
+static int
+check_type_always_true(PyTypeObject *ty)
+{
+    PyNumberMethods *nb = ty->tp_as_number;
+    if (nb && nb->nb_bool) {
+        return SPEC_FAIL_TO_BOOL_NUMBER;
+    }
+    PyMappingMethods *mp = ty->tp_as_mapping;
+    if (mp && mp->mp_length) {
+        return SPEC_FAIL_TO_BOOL_MAPPING;
+    }
+    PySequenceMethods *sq = ty->tp_as_sequence;
+    if (sq && sq->sq_length) {
+      return SPEC_FAIL_TO_BOOL_SEQUENCE;
+    }
+    return 0;
+}
+
 void
 _Py_Specialize_ToBool(_PyStackRef value_o, _Py_CODEUNIT *instr)
 {
-    assert(ENABLE_SPECIALIZATION);
+    assert(ENABLE_SPECIALIZATION_FT);
     assert(_PyOpcode_Caches[TO_BOOL] == INLINE_CACHE_ENTRIES_TO_BOOL);
     _PyToBoolCache *cache = (_PyToBoolCache *)(instr + 1);
     PyObject *value = PyStackRef_AsPyObjectBorrow(value_o);
+    uint8_t specialized_op;
     if (PyBool_Check(value)) {
-        instr->op.code = TO_BOOL_BOOL;
+        specialized_op = TO_BOOL_BOOL;
         goto success;
     }
     if (PyLong_CheckExact(value)) {
-        instr->op.code = TO_BOOL_INT;
+        specialized_op = TO_BOOL_INT;
         goto success;
     }
     if (PyList_CheckExact(value)) {
-        instr->op.code = TO_BOOL_LIST;
+        specialized_op = TO_BOOL_LIST;
         goto success;
     }
     if (Py_IsNone(value)) {
-        instr->op.code = TO_BOOL_NONE;
+        specialized_op = TO_BOOL_NONE;
         goto success;
     }
     if (PyUnicode_CheckExact(value)) {
-        instr->op.code = TO_BOOL_STR;
+        specialized_op = TO_BOOL_STR;
         goto success;
     }
     if (PyType_HasFeature(Py_TYPE(value), Py_TPFLAGS_HEAPTYPE)) {
-        PyNumberMethods *nb = Py_TYPE(value)->tp_as_number;
-        if (nb && nb->nb_bool) {
-            SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_NUMBER);
-            goto failure;
-        }
-        PyMappingMethods *mp = Py_TYPE(value)->tp_as_mapping;
-        if (mp && mp->mp_length) {
-            SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MAPPING);
-            goto failure;
-        }
-        PySequenceMethods *sq = Py_TYPE(value)->tp_as_sequence;
-        if (sq && sq->sq_length) {
-            SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SEQUENCE);
-            goto failure;
-        }
-        if (!PyUnstable_Type_AssignVersionTag(Py_TYPE(value))) {
+        unsigned int version = 0;
+        int err = _PyType_Validate(Py_TYPE(value), check_type_always_true, &version);
+        if (err < 0) {
             SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OUT_OF_VERSIONS);
             goto failure;
         }
-        uint32_t version = type_get_version(Py_TYPE(value), TO_BOOL);
-        if (version == 0) {
+        else if (err > 0) {
+            SPECIALIZATION_FAIL(TO_BOOL, err);
             goto failure;
         }
-        instr->op.code = TO_BOOL_ALWAYS_TRUE;
-        write_u32(cache->version, version);
+
+        assert(err == 0);
         assert(version);
+        write_u32(cache->version, version);
+        specialized_op = TO_BOOL_ALWAYS_TRUE;
         goto success;
     }
-#ifdef Py_STATS
-    if (PyByteArray_CheckExact(value)) {
-        SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTEARRAY);
-        goto failure;
-    }
-    if (PyBytes_CheckExact(value)) {
-        SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_BYTES);
-        goto failure;
-    }
-    if (PyDict_CheckExact(value)) {
-        SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_DICT);
-        goto failure;
-    }
-    if (PyFloat_CheckExact(value)) {
-        SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_FLOAT);
-        goto failure;
-    }
-    if (PyMemoryView_Check(value)) {
-        SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_MEMORY_VIEW);
-        goto failure;
-    }
-    if (PyAnySet_CheckExact(value)) {
-        SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_SET);
-        goto failure;
-    }
-    if (PyTuple_CheckExact(value)) {
-        SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_TO_BOOL_TUPLE);
-        goto failure;
-    }
-    SPECIALIZATION_FAIL(TO_BOOL, SPEC_FAIL_OTHER);
-#endif   // Py_STATS
+
+    SPECIALIZATION_FAIL(TO_BOOL, to_bool_fail_kind(value));
 failure:
-    STAT_INC(TO_BOOL, failure);
-    instr->op.code = TO_BOOL;
-    cache->counter = adaptive_counter_backoff(cache->counter);
+    unspecialize(instr);
     return;
 success:
-    STAT_INC(TO_BOOL, success);
-    cache->counter = adaptive_counter_cooldown();
+    specialize(instr, specialized_op);
 }
 
 #ifdef Py_STATS
diff --git a/Tools/c-analyzer/cpython/_analyzer.py b/Tools/c-analyzer/cpython/_analyzer.py
index f07fa8af495e17..6204353e9bd26a 100644
--- a/Tools/c-analyzer/cpython/_analyzer.py
+++ b/Tools/c-analyzer/cpython/_analyzer.py
@@ -280,12 +280,26 @@ def _is_kwlist(decl):
     vartype = ''.join(str(decl.vartype).split())
     return vartype == 'char*[]'
 
+def _is_local_static_mutex(decl):
+    if not hasattr(decl, "vartype"):
+        return False
+
+    if not hasattr(decl, "parent") or decl.parent is None:
+        # We only want to allow local variables
+        return False
+
+    vartype = decl.vartype
+    return (vartype.typespec == 'PyMutex') and (decl.storage == 'static')
 
 def _has_other_supported_type(decl):
     if hasattr(decl, 'file') and decl.file.filename.endswith('.c.h'):
         assert 'clinic' in decl.file.filename, (decl,)
         if decl.name == '_kwtuple':
             return True
+    if _is_local_static_mutex(decl):
+        # GH-127081: Local static mutexes are used to
+        # wrap libc functions that aren't thread safe
+        return True
     vartype = str(decl.vartype).split()
     if vartype[0] == 'struct':
         vartype = vartype[1:]
diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py
index 96b32445fb62e2..e02e07ec748231 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -623,6 +623,9 @@ def has_error_without_pop(op: parser.InstDef) -> bool:
     "_Py_NewRef",
     "_Py_SINGLETON",
     "_Py_STR",
+    "_Py_TryIncrefCompare",
+    "_Py_TryIncrefCompareStackRef",
+    "_Py_atomic_load_ptr_acquire",
     "_Py_atomic_load_uintptr_relaxed",
     "_Py_set_eval_breaker_bit",
     "advance_backoff_counter",