Skip to content

Commit

Permalink
CSV export optimisation (#2755)
Browse files Browse the repository at this point in the history
* add utility to update xform export repeat columns from instance

* refactor code

* reset repeat_instances[repeat] if incoming repeat max is greater

* update code comments

* refactor code

* refactor code

* refactor comment

* refactor code

* update code comment

* register repeats on Instance create or update

* add tests

* add test

* disregard form version when registering repeats

* use registered repeats when generating CSV export

* fix registered repeats invalid export

* enhance use of registered repeats to generate CSV

* handle missing repeats in registered repeats

* refactor code

* refactor code

* consider previous submissions when creating new register

* refactor code

* create repeat register if Instance has no repeats

* enhance tests

* fix cyclic dependency error

* fix lint warning

fix unnecessary-dict-index-lookup

* suppress lint warning

suppress too-many-lines

* do not fall back to data if repeat unregistered

* rename export_repeat_columns

* fix cyclic dependency

* refactor code

* refactor code

* fix flaky test

* fix failing tests

* refactor test

* refactor test

* remove lint warning ignore

* rename symbol

* refactor code

* refactor code

* fix flaky test

* add doc strings

* add doc strings

* fix failing tests

* do not register repeat if instance not approved

* refactor code

* rename symbol

* rename symbol

* feat: add export_data management command

* add codetiming dependency

* address lint warning

* fix lint warning line too long

* add missing class docstring

* fix incorrect command output

---------

Co-authored-by: Ukang'a Dickson <[email protected]>
  • Loading branch information
kelvin-muchiri and ukanga authored Jan 15, 2025
1 parent 03436fa commit f9caeb5
Show file tree
Hide file tree
Showing 18 changed files with 1,170 additions and 64 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,5 @@ tags
.eggs
sonar-project.properties
.scannerwork

*.er
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
"""
Test /orgs API endpoint implementation.
"""

import json
from builtins import str as text
from unittest.mock import patch

from django.contrib.auth.models import User, timezone, AnonymousUser
from django.contrib.auth.models import AnonymousUser, User, timezone
from django.core.cache import cache
from django.test.utils import override_settings

Expand All @@ -15,8 +16,8 @@

from onadata.apps.api.models.organization_profile import (
OrganizationProfile,
get_organization_members_team,
Team,
get_organization_members_team,
)
from onadata.apps.api.tests.viewsets.test_abstract_viewset import TestAbstractViewSet
from onadata.apps.api.tools import (
Expand Down Expand Up @@ -840,8 +841,13 @@ def test_add_members_to_owner_role(self):
request = self.factory.get("/", **self.extra)
response = view(request, user="denoinc")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data["users"][1]["user"], "aboy")
self.assertEqual(response.data["users"][1]["role"], "owner")
aboy_data = {"user": "aboy", "role": "owner"}
self.assertTrue(
any( # Order doesn't matter. aboy can be first or last
all(item.get(key) == value for key, value in aboy_data.items())
for item in response.data["users"]
)
)

owner_team = get_or_create_organization_owners_team(self.organization)

Expand Down
48 changes: 48 additions & 0 deletions onadata/apps/logger/management/commands/export_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
"""
Management command to export data from a form in CSV format.
"""

from django.core.management.base import BaseCommand

from codetiming import Timer

from onadata.apps.logger.models.xform import XForm
from onadata.apps.viewer.models.export import Export
from onadata.libs.utils.export_tools import generate_export


class Command(BaseCommand):
"""Export data from a form in CSV format"""

help = "Exports data from a form in CSV format"

def add_arguments(self, parser):
parser.add_argument("form_id", type=int)

def handle(self, *args: str, **options: str):
self.stdout.write(self.style.SUCCESS("Exporting ..."))
form_id = options["form_id"]
try:
xform = XForm.objects.get(pk=form_id)
except XForm.DoesNotExist:
self.stderr.write(
self.style.ERROR(f"There is no form with id {form_id} present.")
)
else:
with Timer() as timer:
export = generate_export(Export.CSV_EXPORT, xform)
elapsed_time = timer.last
msg = (
f"The file {export.full_filepath} was exported in "
f"{elapsed_time:.2f} seconds."
)
self.stdout.write(self.style.NOTICE(msg))
plural_or_singular = (
"submission" if xform.num_of_submissions == 1 else "submissions"
)
msg = (
f"{export.pk}: Exporting {xform.num_of_submissions} "
f'{plural_or_singular} of the form "{xform.title}"'
)
self.stdout.write(self.style.SUCCESS(msg))
21 changes: 19 additions & 2 deletions onadata/apps/logger/models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
"""
Instance model class
"""
# pylint: disable=too-many-lines

import importlib
import math
import sys
from datetime import datetime
Expand All @@ -18,10 +21,10 @@
from django.urls import reverse
from django.utils import timezone
from django.utils.translation import gettext as _
from multidb.pinning import use_master

from celery import current_task
from deprecated import deprecated
from multidb.pinning import use_master
from taggit.managers import TaggableManager

from onadata.apps.logger.models.submission_review import SubmissionReview
Expand All @@ -35,11 +38,11 @@
from onadata.celeryapp import app
from onadata.libs.data.query import get_numeric_fields
from onadata.libs.utils.cache_tools import (
PROJECT_DATE_MODIFIED_CACHE,
DATAVIEW_COUNT,
IS_ORG,
PROJ_NUM_DATASET_CACHE,
PROJ_SUB_DATE_CACHE,
PROJECT_DATE_MODIFIED_CACHE,
XFORM_COUNT,
XFORM_DATA_VERSIONS,
XFORM_SUBMISSION_COUNT_FOR_DAY,
Expand Down Expand Up @@ -875,6 +878,14 @@ def permanently_delete_attachments(sender, instance=None, created=False, **kwarg
)


@use_master
def register_export_repeats(sender, instance, created=False, **kwargs):
# Avoid cyclic dependency errors
logger_tasks = importlib.import_module("onadata.apps.logger.tasks")

logger_tasks.register_instance_export_repeats_async.delay(instance.pk)


post_save.connect(
post_save_submission, sender=Instance, dispatch_uid="post_save_submission"
)
Expand All @@ -891,6 +902,12 @@ def permanently_delete_attachments(sender, instance=None, created=False, **kwarg
dispatch_uid="permanently_delete_attachments",
)

post_save.connect(
register_export_repeats,
sender=Instance,
dispatch_uid="register_export_repeats",
)


class InstanceHistory(models.Model, InstanceBaseClass):
"""Stores deleted submission XML to maintain a history of edits."""
Expand Down
63 changes: 45 additions & 18 deletions onadata/apps/logger/tasks.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
# pylint: disable=import-error,ungrouped-imports
"""
Asynchronous tasks for the logger app
"""

import logging

from django.core.cache import cache
from django.contrib.auth import get_user_model
from django.core.cache import cache
from django.db import DatabaseError

from multidb.pinning import use_master

from onadata.apps.logger.models import Entity, EntityList, Project
from onadata.apps.logger.models import Entity, EntityList, Instance, Project, XForm
from onadata.celeryapp import app
from onadata.libs.utils.cache_tools import (
PROJECT_DATE_MODIFIED_CACHE,
safe_delete,
)
from onadata.libs.utils.project_utils import set_project_perms_to_object
from onadata.libs.utils.cache_tools import PROJECT_DATE_MODIFIED_CACHE, safe_delete
from onadata.libs.utils.logger_tools import (
commit_cached_elist_num_entities,
dec_elist_num_entities,
inc_elist_num_entities,
register_instance_export_repeats,
register_xform_export_repeats,
soft_delete_entities_bulk,
)

from onadata.libs.utils.project_utils import set_project_perms_to_object

logger = logging.getLogger(__name__)
User = get_user_model()
Expand All @@ -32,8 +31,7 @@
def set_entity_list_perms_async(entity_list_id):
"""Set permissions for EntityList asynchronously
Args:
pk (int): Primary key for EntityList
:param entity_list_id: Primary key for EntityList
"""
with use_master:
try:
Expand Down Expand Up @@ -67,9 +65,8 @@ def apply_project_date_modified_async():
def delete_entities_bulk_async(entity_pks: list[int], username: str | None = None):
"""Delete Entities asynchronously
Args:
entity_pks (list(int)): Primary keys of Entities to be deleted
username (str): Username of the user initiating the delete
:param entity_pks: Primary keys of Entities to be deleted
:param username: Username of the user initiating the delete operation
"""
with use_master:
entity_qs = Entity.objects.filter(pk__in=entity_pks, deleted_at__isnull=True)
Expand Down Expand Up @@ -104,8 +101,7 @@ def commit_cached_elist_num_entities_async():
def inc_elist_num_entities_async(elist_pk: int):
"""Increment EntityList `num_entities` counter asynchronously
Args:
elist_pk (int): Primary key for EntityList
:param elist_pk: Primary key for EntityList
"""
inc_elist_num_entities(elist_pk)

Expand All @@ -114,7 +110,38 @@ def inc_elist_num_entities_async(elist_pk: int):
def dec_elist_num_entities_async(elist_pk: int) -> None:
"""Decrement EntityList `num_entities` counter asynchronously
Args:
elist_pk (int): Primary key for EntityList
:param elist_pk: Primary key for EntityList
"""
dec_elist_num_entities(elist_pk)


@app.task(retry_backoff=3, autoretry_for=(DatabaseError, ConnectionError))
def register_instance_export_repeats_async(instance_pk: int) -> None:
"""Register export repeats asynchronously
:param instance_pk: Primary key for Instance
"""
try:
instance = Instance.objects.get(pk=instance_pk)

except Instance.DoesNotExist as exc:
logger.exception(exc)

else:
register_instance_export_repeats(instance)


@app.task(retry_backoff=3, autoretry_for=(DatabaseError, ConnectionError))
def register_xform_export_repeats_async(xform_id: int) -> None:
"""Register export repeats for an XForm asynchronously
:param xform_id: Primary key for XForm
"""
try:
xform = XForm.objects.get(pk=xform_id)

except XForm.DoesNotExist as exc:
logger.exception(exc)

else:
register_xform_export_repeats(xform)
50 changes: 50 additions & 0 deletions onadata/apps/logger/tests/management/commands/test_export_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
"""
Tests for the onadata.apps.logger.management.commands.export_data module.
"""

from io import StringIO

from django.core.management import call_command
from django.core.management.base import CommandError

from onadata.apps.main.tests.test_base import TestBase
from onadata.apps.viewer.models.export import Export


class ExportDataTest(TestBase):
"""Tests for the export_data management command."""

def test_command_output(self):
"""Test the output of the export_data management command."""
output = StringIO()
error_output = StringIO()
with self.assertRaisesMessage(
CommandError,
expected_message="Error: the following arguments are required: form_id",
):
_ = call_command("export_data", stdout=output)

_ = call_command("export_data", 12300, stdout=output, stderr=error_output)
self.assertIn("Exporting ...", output.getvalue())
self.assertIn(
"There is no form with id 12300 present.", error_output.getvalue()
)
self._publish_transportation_form_and_submit_instance()
export_count = Export.objects.filter().count()
_ = call_command(
"export_data", self.xform.pk, stdout=output, stderr=error_output
)
self.assertIn(
f'Exporting 1 submission of the form "{self.xform.title}"',
output.getvalue(),
)
# confirm a new export record was created.
self.assertEqual(Export.objects.filter().count(), export_count + 1)
export = Export.objects.filter(
xform_id=self.xform.pk, export_type=Export.CSV_EXPORT
).latest("created_on")
self.assertIn(
f"The file {export.full_filepath} was exported in",
output.getvalue(),
)
Loading

0 comments on commit f9caeb5

Please sign in to comment.