Skip to content

Commit

Permalink
Improve the performance of get_region_links (#3120)
Browse files Browse the repository at this point in the history
* Improve region link loading performance

Time for running the `get_urls` query (without the prefetch):
- Augsburg: 1829ms → 190ms (90% reduction)
- München: 2037ms → 424ms (80% reduction)

I achieved another 30-40% reduction by adding an index to `Link`,
though this will have to be a pr for django-linkcheck

* Only prefetch region links if required

Prefetching the region links normally takes more than the actual
query, so not doing it if possible saves a lot of time
  • Loading branch information
david-venhoff authored Oct 31, 2024
1 parent c582537 commit b67f358
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 16 deletions.
34 changes: 21 additions & 13 deletions integreat_cms/cms/utils/linkcheck_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@
def get_urls(
region_slug: str | None = None,
url_ids: Any | None = None,
prefetch_region_links: bool = False,
) -> list[Url] | QuerySet[Url]:
"""
Collect all the urls which appear in the latest versions of the contents of the region, filtered by ID or region if given.
:param region_slug: The slug of the current region
:param url_ids: The list of requested url ids
:param prefetch_region_links: Whether to prefetch region links
:return: The list (or queryset) of urls
"""
urls = Url.objects.all()
Expand All @@ -49,17 +51,15 @@ def get_urls(
region_links = get_region_links(region)

# Prefetch all link objects of the requested region
urls = (
urls.filter(links__in=region_links)
.distinct()
.prefetch_related(
urls = urls.filter(links__in=region_links).distinct()
if prefetch_region_links:
urls = urls.prefetch_related(
Prefetch(
"links",
queryset=region_links,
to_attr="region_links",
)
)
)

# Annotate with number of links that are not ignored.
# If there is any link that is not ignored, the url is also not ignored.
Expand Down Expand Up @@ -107,14 +107,18 @@ def get_region_links(region: Region) -> QuerySet:
organizations = Organization.objects.filter(region=region, archived=False)
# Get all link objects of the requested region
region_links = Link.objects.filter(
Q(page_translation__id__in=latest_pagetranslation_versions)
| Q(imprint_translation__id__in=latest_imprinttranslation_versions)
| Q(event_translation__id__in=latest_eventtranslation_versions)
| Q(poi_translation__id__in=latest_poitranslation_versions)
| Q(organization__id__in=organizations)
).order_by("id")
page_translation__id__in=latest_pagetranslation_versions
).union(
Link.objects.filter(
imprint_translation__id__in=latest_imprinttranslation_versions
),
Link.objects.filter(event_translation__id__in=latest_eventtranslation_versions),
Link.objects.filter(poi_translation__id__in=latest_poitranslation_versions),
Link.objects.filter(organization__id__in=organizations),
all=True,
)

return region_links
return Link.objects.filter(id__in=region_links.values("pk")).order_by("id")


def get_url_count(region_slug: str | None = None) -> dict[str, int]:
Expand All @@ -132,16 +136,20 @@ def get_url_count(region_slug: str | None = None) -> dict[str, int]:
def filter_urls(
region_slug: str | None = None,
url_filter: str | None = None,
prefetch_region_links: bool = False,
) -> tuple[list[Url], dict[str, int]]:
"""
Filter all urls of one region by the given category
:param region_slug: The slug of the current region
:param url_filter: Which urls should be returned (one of ``valid``, ``invalid``, ``ignored``, ``unchecked``).
If parameter is not in these choices or omitted, all urls are returned by default.
:param prefetch_region_links: Whether to prefetch region links
:return: A tuple of the requested urls and a dict containing the counters of all remaining urls
"""
urls = get_urls(region_slug=region_slug)
urls = get_urls(
region_slug=region_slug, prefetch_region_links=prefetch_region_links
)
# Split url lists into their respective categories
ignored_urls, valid_urls, invalid_urls, email_links, phone_links, unchecked_urls = (
[] for _ in range(6)
Expand Down
4 changes: 3 additions & 1 deletion integreat_cms/cms/views/dashboard/dashboard_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@ def get_broken_links_context(
:return: Dictionary containing the context for broken links
"""
invalid_urls = filter_urls(request.region.slug, "invalid")[0]
invalid_urls = filter_urls(
request.region.slug, "invalid", prefetch_region_links=True
)[0]
invalid_url = invalid_urls[0] if invalid_urls else None

relevant_translation = (
Expand Down
8 changes: 6 additions & 2 deletions integreat_cms/cms/views/linkcheck/linkcheck_list_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ def get_queryset(self) -> list[Url]:
:return: The QuerySet of the filtered urls
"""
urls, count_dict = filter_urls(
self.kwargs.get("region_slug"), self.kwargs.get("url_filter")
self.kwargs.get("region_slug"),
self.kwargs.get("url_filter"),
prefetch_region_links=True,
)
self.extra_context.update(count_dict)
return urls
Expand All @@ -102,7 +104,9 @@ def dispatch(self, request: HttpRequest, *args: Any, **kwargs: Any) -> HttpRespo
if edit_url_id := kwargs.pop("url_id", None):
region = request.region.slug if request.region else None
try:
self.instance = get_urls(region, url_ids=[edit_url_id])[0]
self.instance = get_urls(
region, url_ids=[edit_url_id], prefetch_region_links=True
)[0]
except IndexError as e:
raise Http404("This URL does not exist") from e
if request.POST:
Expand Down

0 comments on commit b67f358

Please sign in to comment.