From a261868216a982de9dbe50f08071b31804105b76 Mon Sep 17 00:00:00 2001 From: Timo Ludwig Date: Fri, 24 Feb 2023 00:05:22 +0100 Subject: [PATCH] Migrate from `threading.Lock()` to `django-db-mutex` Avoid race conditions between processes --- .github/workflows/test.yml | 1 + CHANGELOG | 3 +++ README.rst | 2 +- linkcheck/__init__.py | 4 ---- linkcheck/apps.py | 6 ++++++ linkcheck/listeners.py | 11 +++++++++-- linkcheck/tests/test_linkcheck.py | 23 ++++++++++++++++++++++ linkcheck/views.py | 32 ++++++++++++++++--------------- runtests.py | 2 +- setup.py | 3 ++- 10 files changed, 63 insertions(+), 24 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 27b3de3..ff33a5f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,6 +32,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade django~=${{ matrix.django-version }} + python -m pip install --upgrade django-db-mutex python -m pip install --upgrade requests - name: Run tests diff --git a/CHANGELOG b/CHANGELOG index c4da0b1..d23cece 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ Unreleased +* Breaking change: Migrated from `threading.Lock()` to `django-db-mutex` + to avoid race conditions between multiple processes. + Please add `'db_mutex'` to your `INSTALLED_APPS` setting. * Enable re-checking after rate limit was hit (Timo Ludwig, #153) * Ignore raw `post_save` signal (Timo Ludwig, #106) * Retry with fallback user agent on forbidden response (Timo Ludwig, #159) diff --git a/README.rst b/README.rst index 82481df..3d61104 100644 --- a/README.rst +++ b/README.rst @@ -35,7 +35,7 @@ Basic usage #. Install app to somewhere on your Python path (e.g. ``pip install django-linkcheck``). -#. Add ``'linkcheck'`` to your ``settings.INSTALLED_APPS``. +#. Add ``'linkcheck'`` and ``'db_mutex'`` to your ``settings.INSTALLED_APPS``. #. Add a file named ``linklists.py`` to every app (see an example in ``examples/linklists.py``) that either: diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index ba0949e..4e946c9 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -1,9 +1,5 @@ -import threading from html.parser import HTMLParser -# A global lock, showing whether linkcheck is busy -update_lock = threading.Lock() - class Lister(HTMLParser): diff --git a/linkcheck/apps.py b/linkcheck/apps.py index cf29dc9..6fafb20 100644 --- a/linkcheck/apps.py +++ b/linkcheck/apps.py @@ -1,6 +1,7 @@ import importlib from django.apps import AppConfig, apps +from django.core.exceptions import ImproperlyConfigured from django.db.models.signals import post_delete @@ -17,6 +18,11 @@ class BaseLinkcheckConfig(AppConfig): all_linklists = {} def ready(self): + if not apps.is_installed('db_mutex'): + raise ImproperlyConfigured( + 'This library depends on django-db-mutex, ' + 'please add "db_mutex" to your INSTALLED_APPS setting.' + ) self.build_linklists() def build_linklists(self): diff --git a/linkcheck/listeners.py b/linkcheck/listeners.py index db17d67..9826429 100644 --- a/linkcheck/listeners.py +++ b/linkcheck/listeners.py @@ -5,12 +5,14 @@ from queue import Empty, LifoQueue from threading import Thread +from db_mutex import DBMutexError +from db_mutex.db_mutex import db_mutex from django.apps import apps from django.db.models import signals as model_signals from linkcheck.models import Link, Url -from . import filebrowser, update_lock +from . import filebrowser from .linkcheck_settings import MAX_URL_LENGTH logger = logging.getLogger(__name__) @@ -31,6 +33,11 @@ def linkcheck_worker(block=True): # An error in any task should not stop the worker from continuing with the queue try: task['target'](*task['args'], **task['kwargs']) + except DBMutexError: + # Wait and reschedule the task + logger.debug("Lock is busy, waiting and rescheduling the task...") + time.sleep(0.1) + tasks_queue.put(task) except Exception as e: logger.exception( "%s while running %s with args=%r and kwargs=%r: %s", @@ -74,7 +81,7 @@ def do_check_instance_links(sender, instance, wait=False): if wait: time.sleep(0.1) - with update_lock: + with db_mutex('linkcheck'): content_type = linklist_cls.content_type() new_links = [] old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) diff --git a/linkcheck/tests/test_linkcheck.py b/linkcheck/tests/test_linkcheck.py index 05676e0..85df451 100644 --- a/linkcheck/tests/test_linkcheck.py +++ b/linkcheck/tests/test_linkcheck.py @@ -3,6 +3,7 @@ from io import StringIO from unittest.mock import patch +from db_mutex import DBMutexError from django.apps import apps from django.conf import settings from django.contrib.auth.models import User @@ -902,6 +903,28 @@ def passing(): 'args=() and kwargs={}: Failing task' ) + def test_reschedule_busy_lock(self): + global attempt + attempt = 0 + + def busy(): + global attempt + if attempt < 5: + attempt += 1 + raise DBMutexError("Could not acquire lock") + + tasks_queue.put({ + 'target': busy, + 'args': (), + 'kwargs': {}, + }) + with self.assertLogs(level='DEBUG') as cm: + linkcheck_worker(block=False) + self.assertEqual( + cm.output, + ['DEBUG:linkcheck.listeners:Lock is busy, waiting and rescheduling the task...'] * 5 + ) + class ViewTestCase(TestCase): def setUp(self): diff --git a/linkcheck/views.py b/linkcheck/views.py index 41de93e..9ab2add 100644 --- a/linkcheck/views.py +++ b/linkcheck/views.py @@ -1,6 +1,8 @@ from itertools import groupby from operator import itemgetter +from db_mutex import DBMutexError +from db_mutex.db_mutex import db_mutex from django import forms from django.contrib.admin.views.decorators import staff_member_required from django.contrib.contenttypes.models import ContentType @@ -12,7 +14,6 @@ from django.urls import NoReverseMatch, reverse from django.utils.translation import gettext as _ -from linkcheck import update_lock from linkcheck.linkcheck_settings import RESULTS_PER_PAGE from linkcheck.models import Link from linkcheck.utils import get_coverage_data @@ -163,21 +164,22 @@ def get_jquery_min_js(): def get_status_message(): - if update_lock.locked(): - return "Still checking. Please refresh this page in a short while. " - else: - broken_links = Link.objects.filter(ignore=False, url__status=False).count() - if broken_links: - return ( - "We've found {} broken link{}.
" - "View/fix broken links".format( - broken_links, - "s" if broken_links > 1 else "", - reverse('linkcheck_report'), + try: + with db_mutex('linkcheck'): + broken_links = Link.objects.filter(ignore=False, url__status=False).count() + if broken_links: + return ( + "We've found {} broken link{}.
" + "View/fix broken links".format( + broken_links, + "s" if broken_links > 1 else "", + reverse('linkcheck_report'), + ) ) - ) - else: - return '' + else: + return '' + except DBMutexError: + return 'Still checking. Please refresh this page in a short while.' def is_ajax(request): diff --git a/runtests.py b/runtests.py index c9e50d7..4f04623 100644 --- a/runtests.py +++ b/runtests.py @@ -13,7 +13,7 @@ 'INSTALLED_APPS': [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.sessions', 'django.contrib.contenttypes', - 'django.contrib.messages', + 'django.contrib.messages', 'db_mutex', 'linkcheck', 'linkcheck.tests.sampleapp', ], 'ROOT_URLCONF': "linkcheck.tests.urls", diff --git a/setup.py b/setup.py index 0e85545..9fdf5e3 100644 --- a/setup.py +++ b/setup.py @@ -6,6 +6,7 @@ def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() + setup( name='django-linkcheck', version='2.1.0', @@ -18,7 +19,7 @@ def read(fname): url='https://github.com/DjangoAdminHackers/django-linkcheck', packages=find_packages(), include_package_data=True, - install_requires=['django', 'requests'], + install_requires=['django', 'django-db-mutex', 'requests'], extras_require={ "dev": ["flake8", "isort", "pre-commit"], },