From b67f35839501ae610e859bb3b5d14364d415108f Mon Sep 17 00:00:00 2001 From: David Venhoff Date: Thu, 31 Oct 2024 13:30:21 +0100 Subject: [PATCH] Improve the performance of `get_region_links` (#3120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improve region link loading performance Time for running the `get_urls` query (without the prefetch): - Augsburg: 1829ms → 190ms (90% reduction) - München: 2037ms → 424ms (80% reduction) I achieved another 30-40% reduction by adding an index to `Link`, though this will have to be a pr for django-linkcheck * Only prefetch region links if required Prefetching the region links normally takes more than the actual query, so not doing it if possible saves a lot of time --- integreat_cms/cms/utils/linkcheck_utils.py | 34 ++++++++++++------- .../cms/views/dashboard/dashboard_view.py | 4 ++- .../views/linkcheck/linkcheck_list_view.py | 8 +++-- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/integreat_cms/cms/utils/linkcheck_utils.py b/integreat_cms/cms/utils/linkcheck_utils.py index 17255e4fe8..9c12b98714 100644 --- a/integreat_cms/cms/utils/linkcheck_utils.py +++ b/integreat_cms/cms/utils/linkcheck_utils.py @@ -32,12 +32,14 @@ def get_urls( region_slug: str | None = None, url_ids: Any | None = None, + prefetch_region_links: bool = False, ) -> list[Url] | QuerySet[Url]: """ Collect all the urls which appear in the latest versions of the contents of the region, filtered by ID or region if given. :param region_slug: The slug of the current region :param url_ids: The list of requested url ids + :param prefetch_region_links: Whether to prefetch region links :return: The list (or queryset) of urls """ urls = Url.objects.all() @@ -49,17 +51,15 @@ def get_urls( region_links = get_region_links(region) # Prefetch all link objects of the requested region - urls = ( - urls.filter(links__in=region_links) - .distinct() - .prefetch_related( + urls = urls.filter(links__in=region_links).distinct() + if prefetch_region_links: + urls = urls.prefetch_related( Prefetch( "links", queryset=region_links, to_attr="region_links", ) ) - ) # Annotate with number of links that are not ignored. # If there is any link that is not ignored, the url is also not ignored. @@ -107,14 +107,18 @@ def get_region_links(region: Region) -> QuerySet: organizations = Organization.objects.filter(region=region, archived=False) # Get all link objects of the requested region region_links = Link.objects.filter( - Q(page_translation__id__in=latest_pagetranslation_versions) - | Q(imprint_translation__id__in=latest_imprinttranslation_versions) - | Q(event_translation__id__in=latest_eventtranslation_versions) - | Q(poi_translation__id__in=latest_poitranslation_versions) - | Q(organization__id__in=organizations) - ).order_by("id") + page_translation__id__in=latest_pagetranslation_versions + ).union( + Link.objects.filter( + imprint_translation__id__in=latest_imprinttranslation_versions + ), + Link.objects.filter(event_translation__id__in=latest_eventtranslation_versions), + Link.objects.filter(poi_translation__id__in=latest_poitranslation_versions), + Link.objects.filter(organization__id__in=organizations), + all=True, + ) - return region_links + return Link.objects.filter(id__in=region_links.values("pk")).order_by("id") def get_url_count(region_slug: str | None = None) -> dict[str, int]: @@ -132,6 +136,7 @@ def get_url_count(region_slug: str | None = None) -> dict[str, int]: def filter_urls( region_slug: str | None = None, url_filter: str | None = None, + prefetch_region_links: bool = False, ) -> tuple[list[Url], dict[str, int]]: """ Filter all urls of one region by the given category @@ -139,9 +144,12 @@ def filter_urls( :param region_slug: The slug of the current region :param url_filter: Which urls should be returned (one of ``valid``, ``invalid``, ``ignored``, ``unchecked``). If parameter is not in these choices or omitted, all urls are returned by default. + :param prefetch_region_links: Whether to prefetch region links :return: A tuple of the requested urls and a dict containing the counters of all remaining urls """ - urls = get_urls(region_slug=region_slug) + urls = get_urls( + region_slug=region_slug, prefetch_region_links=prefetch_region_links + ) # Split url lists into their respective categories ignored_urls, valid_urls, invalid_urls, email_links, phone_links, unchecked_urls = ( [] for _ in range(6) diff --git a/integreat_cms/cms/views/dashboard/dashboard_view.py b/integreat_cms/cms/views/dashboard/dashboard_view.py index d13588c117..fa8c361830 100644 --- a/integreat_cms/cms/views/dashboard/dashboard_view.py +++ b/integreat_cms/cms/views/dashboard/dashboard_view.py @@ -147,7 +147,9 @@ def get_broken_links_context( :return: Dictionary containing the context for broken links """ - invalid_urls = filter_urls(request.region.slug, "invalid")[0] + invalid_urls = filter_urls( + request.region.slug, "invalid", prefetch_region_links=True + )[0] invalid_url = invalid_urls[0] if invalid_urls else None relevant_translation = ( diff --git a/integreat_cms/cms/views/linkcheck/linkcheck_list_view.py b/integreat_cms/cms/views/linkcheck/linkcheck_list_view.py index f449b12b17..9c6fc0a1db 100644 --- a/integreat_cms/cms/views/linkcheck/linkcheck_list_view.py +++ b/integreat_cms/cms/views/linkcheck/linkcheck_list_view.py @@ -90,7 +90,9 @@ def get_queryset(self) -> list[Url]: :return: The QuerySet of the filtered urls """ urls, count_dict = filter_urls( - self.kwargs.get("region_slug"), self.kwargs.get("url_filter") + self.kwargs.get("region_slug"), + self.kwargs.get("url_filter"), + prefetch_region_links=True, ) self.extra_context.update(count_dict) return urls @@ -102,7 +104,9 @@ def dispatch(self, request: HttpRequest, *args: Any, **kwargs: Any) -> HttpRespo if edit_url_id := kwargs.pop("url_id", None): region = request.region.slug if request.region else None try: - self.instance = get_urls(region, url_ids=[edit_url_id])[0] + self.instance = get_urls( + region, url_ids=[edit_url_id], prefetch_region_links=True + )[0] except IndexError as e: raise Http404("This URL does not exist") from e if request.POST: