Skip to content

Commit

Permalink
feat(backend): endpoint to create additional Entity lists on a project (
Browse files Browse the repository at this point in the history
#1799)

* fix(frontend): download xlsform with .xlsx format (updated from .xls)

* fix(backend): fix javarosa conversion for all geom types (LineString)

* refactor(backend): restructure code related to entity creation

* feat(backend): add endpoint /additional-entity for a project

* refactor(backend): rename features_geojson param --> geojson

* docs: add note to /additional-entity about filename usage

* fix(backend): correct error message if coordinates are invalid

* test(backend): fix test for /projects/task-split endpoint

* test(frontend): only select playwright test project to avoid errors

* refactor: add traceback info to generate project files exceptions

* fix(backend): geojson geom --> javarosa format for all types
  • Loading branch information
spwoodcock authored Sep 21, 2024
1 parent bd1276b commit f6efdd6
Show file tree
Hide file tree
Showing 11 changed files with 281 additions and 146 deletions.
78 changes: 77 additions & 1 deletion src/backend/app/central/central_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import csv
import json
from asyncio import gather
from io import BytesIO, StringIO
from typing import Optional, Union

Expand All @@ -32,7 +33,7 @@
from sqlalchemy import text
from sqlalchemy.orm import Session

from app.central import central_deps
from app.central import central_deps, central_schemas
from app.config import encrypt_value, settings
from app.db.postgis_utils import (
geojson_to_javarosa_geom,
Expand Down Expand Up @@ -519,6 +520,81 @@ async def convert_odk_submission_json_to_geojson(
return geojson.FeatureCollection(features=all_features)


async def feature_geojson_to_entity_dict(
feature: geojson.Feature,
) -> central_schemas.EntityDict:
"""Convert a single GeoJSON to an Entity dict for upload."""
if not isinstance(feature, (dict, geojson.Feature)):
log.error(f"Feature not in correct format: {feature}")
raise ValueError(f"Feature not in correct format: {type(feature)}")

feature_id = feature.get("id")

geometry = feature.get("geometry", {})
if not geometry:
msg = "'geometry' data field is mandatory"
log.debug(msg)
raise ValueError(msg)

javarosa_geom = await geojson_to_javarosa_geom(geometry)

# NOTE all properties MUST be string values for Entities, convert
properties = {
str(key): str(value) for key, value in feature.get("properties", {}).items()
}
# Set to TaskStatus enum READY value (0)
properties["status"] = "0"

task_id = properties.get("task_id")
entity_label = f"Task {task_id} Feature {feature_id}"

return {"label": entity_label, "data": {"geometry": javarosa_geom, **properties}}


async def task_geojson_dict_to_entity_values(
task_geojson_dict: dict[int, geojson.Feature],
) -> list[central_schemas.EntityDict]:
"""Convert a dict of task GeoJSONs into data for ODK Entity upload."""
log.debug("Converting dict of task GeoJSONs to Entity upload format")

asyncio_tasks = []
for _, geojson_dict in task_geojson_dict.items():
# Extract the features list and pass each Feature through
features = geojson_dict.get("features", [])
asyncio_tasks.extend(
[feature_geojson_to_entity_dict(feature) for feature in features if feature]
)

return await gather(*asyncio_tasks)


async def create_entity_list(
odk_creds: project_schemas.ODKCentralDecrypted,
odk_id: int,
dataset_name: str = "features",
properties: list[str] = None,
entities_list: list[central_schemas.EntityDict] = None,
) -> None:
"""Create a new Entity list in ODK."""
if properties is None:
# Get the default properties for FMTM project
properties = central_schemas.entity_fields_to_list()
log.debug(f"Using default FMTM properties for Entity creation: {properties}")

async with central_deps.get_odk_dataset(odk_creds) as odk_central:
# Step 1: create the Entity list, with properties
await odk_central.createDataset(
odk_id, datasetName=dataset_name, properties=properties
)
# Step 2: populate the Entities
if entities_list:
await odk_central.createEntities(
odk_id,
dataset_name,
entities_list,
)


async def get_entities_geojson(
odk_creds: project_schemas.ODKCentralDecrypted,
odk_id: int,
Expand Down
45 changes: 44 additions & 1 deletion src/backend/app/central/central_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
#
"""Schemas for returned ODK Central objects."""

from dataclasses import dataclass
from enum import Enum
from typing import Optional
from typing import Optional, TypedDict

from geojson_pydantic import Feature, FeatureCollection
from pydantic import BaseModel, Field, ValidationInfo, computed_field
Expand All @@ -27,6 +28,48 @@
from app.models.enums import TaskStatus


@dataclass
class NameTypeMapping:
"""A simple dataclass mapping field name to field type."""

name: str
type: str


ENTITY_FIELDS: list[NameTypeMapping] = [
NameTypeMapping(name="geometry", type="geopoint"),
NameTypeMapping(name="project_id", type="string"),
NameTypeMapping(name="task_id", type="string"),
NameTypeMapping(name="osm_id", type="string"),
NameTypeMapping(name="tags", type="string"),
NameTypeMapping(name="version", type="string"),
NameTypeMapping(name="changeset", type="string"),
NameTypeMapping(name="timestamp", type="datetime"),
NameTypeMapping(name="status", type="string"),
]


def entity_fields_to_list() -> list[str]:
"""Converts a list of Field objects to a list of field names."""
return [field.name for field in ENTITY_FIELDS]


# Dynamically generate EntityPropertyDict using ENTITY_FIELDS
def create_entity_property_dict() -> dict[str, type]:
"""Dynamically create a TypedDict using the defined fields."""
return {field.name: str for field in ENTITY_FIELDS}


EntityPropertyDict = TypedDict("EntityPropertyDict", create_entity_property_dict())


class EntityDict(TypedDict):
"""Dict of Entity label and data."""

label: str
data: EntityPropertyDict


class CentralBase(BaseModel):
"""ODK Central return."""

Expand Down
123 changes: 55 additions & 68 deletions src/backend/app/db/postgis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import json
import logging
from asyncio import gather
from datetime import datetime, timezone
from io import BytesIO
from random import getrandbits
Expand Down Expand Up @@ -84,7 +83,7 @@ def featcol_to_wkb_geom(
features = featcol.get("features", [])

if len(features) > 1 and features[0].get("type") == "MultiPolygon":
featcol = multipolygon_to_polygon(featcol)
featcol = multigeom_to_singlegeom(featcol)
features = featcol.get("features", [])

geometry = features[0].get("geometry")
Expand Down Expand Up @@ -347,7 +346,7 @@ async def split_geojson_by_task_areas(
log.error("Attempted geojson task splitting failed")
return None

if feature_collections:
if feature_collections and len(feature_collections[0]) > 1:
# NOTE the feature collections are nested in a tuple, first remove
task_geojson_dict = {
record[0]: geojson.loads(json.dumps(record[1]))
Expand Down Expand Up @@ -407,11 +406,11 @@ def add_required_geojson_properties(


def normalise_featcol(featcol: geojson.FeatureCollection) -> geojson.FeatureCollection:
"""Normalise a FeatureCollection into a standadised format.
"""Normalise a FeatureCollection into a standardised format.
The final FeatureCollection will only contain:
- Polygon
- Polyline
- LineString
- Point
Processed:
Expand Down Expand Up @@ -441,7 +440,7 @@ def normalise_featcol(featcol: geojson.FeatureCollection) -> geojson.FeatureColl
coords.pop()

# Convert MultiPolygon type --> individual Polygons
return multipolygon_to_polygon(featcol)
return multigeom_to_singlegeom(featcol)


def geojson_to_featcol(geojson_obj: dict) -> geojson.FeatureCollection:
Expand Down Expand Up @@ -498,7 +497,7 @@ def featcol_keep_dominant_geom_type(

def get_featcol_dominant_geom_type(featcol: geojson.FeatureCollection) -> str:
"""Get the predominant geometry type in a FeatureCollection."""
geometry_counts = {"Polygon": 0, "Point": 0, "Polyline": 0}
geometry_counts = {"Polygon": 0, "Point": 0, "LineString": 0}

for feature in featcol.get("features", []):
geometry_type = feature.get("geometry", {}).get("type", "")
Expand Down Expand Up @@ -552,6 +551,10 @@ def is_valid_coordinate(coord):
first_coordinate = coordinates
coordinates = coordinates[0]

error_message = (
"ERROR: The coordinates within the GeoJSON file are not valid. "
"Is the file empty?"
)
if not is_valid_coordinate(first_coordinate):
log.error(error_message)
raise HTTPException(status_code=400, detail=error_message)
Expand Down Expand Up @@ -629,20 +632,30 @@ async def geojson_to_javarosa_geom(geojson_geometry: dict) -> str:
if geojson_geometry is None:
return ""

coordinates = []
if geojson_geometry["type"] in ["Point", "LineString", "MultiPoint"]:
coordinates = [[geojson_geometry.get("coordinates", [])]]
elif geojson_geometry["type"] in ["Polygon", "MultiLineString"]:
coordinates = geojson_geometry.get("coordinates", [])
elif geojson_geometry["type"] == "MultiPolygon":
# Flatten the list structure to get coordinates of all polygons
coordinates = sum(geojson_geometry.get("coordinates", []), [])
coordinates = geojson_geometry.get("coordinates", [])
geometry_type = geojson_geometry["type"]

# Normalise single geometries into the same structure as multi-geometries
# We end up with three levels of nesting for the processing below
if geometry_type == "Point":
# Format [x, y]
coordinates = [[coordinates]]
elif geometry_type in ["LineString", "MultiPoint"]:
# Format [[x, y], [x, y]]
coordinates = [coordinates]
elif geometry_type in ["Polygon", "MultiLineString"]:
# Format [[[x, y], [x, y]]]
pass
elif geometry_type == "MultiPolygon":
# Format [[[[x, y], [x, y]]]], flatten coords
coordinates = [coord for poly in coordinates for coord in poly]
else:
raise ValueError("Unsupported GeoJSON geometry type")
raise ValueError(f"Unsupported GeoJSON geometry type: {geometry_type}")

# Prepare the JavaRosa format by iterating over coordinates
javarosa_geometry = []
for polygon in coordinates:
for lon, lat in polygon:
for polygon_or_line in coordinates:
for lon, lat in polygon_or_line:
javarosa_geometry.append(f"{lat} {lon} 0.0 0.0")

return ";".join(javarosa_geometry)
Expand All @@ -664,7 +677,7 @@ async def javarosa_to_geojson_geom(javarosa_geom_string: str, geom_type: str) ->
if geom_type == "Point":
lat, lon, _, _ = map(float, javarosa_geom_string.split())
geojson_geometry = {"type": "Point", "coordinates": [lon, lat]}
elif geom_type == "Polyline":
elif geom_type == "LineString":
coordinates = [
[float(coord) for coord in reversed(point.split()[:2])]
for point in javarosa_geom_string.split(";")
Expand All @@ -685,56 +698,26 @@ async def javarosa_to_geojson_geom(javarosa_geom_string: str, geom_type: str) ->
return geojson_geometry


async def feature_geojson_to_entity_dict(
feature: dict,
) -> dict:
"""Convert a single GeoJSON to an Entity dict for upload."""
feature_id = feature.get("id")

geometry = feature.get("geometry", {})
if not geometry:
msg = "'geometry' data field is mandatory"
log.debug(msg)
raise ValueError(msg)

javarosa_geom = await geojson_to_javarosa_geom(geometry)

# NOTE all properties MUST be string values for Entities, convert
properties = {
str(key): str(value) for key, value in feature.get("properties", {}).items()
}
# Set to TaskStatus enum READY value (0)
properties["status"] = "0"

task_id = properties.get("task_id")
entity_label = f"Task {task_id} Feature {feature_id}"

return {"label": entity_label, "data": {"geometry": javarosa_geom, **properties}}


async def task_geojson_dict_to_entity_values(task_geojson_dict):
"""Convert a dict of task GeoJSONs into data for ODK Entity upload."""
asyncio_tasks = []
for _, geojson_dict in task_geojson_dict.items():
features = geojson_dict.get("features", [])
asyncio_tasks.extend(
[feature_geojson_to_entity_dict(feature) for feature in features if feature]
)

return await gather(*asyncio_tasks)


def multipolygon_to_polygon(
def multigeom_to_singlegeom(
featcol: geojson.FeatureCollection,
) -> geojson.FeatureCollection:
"""Converts a GeoJSON FeatureCollection of MultiPolygons to Polygons.
"""Converts any Multi(xxx) geometry types to individual geometries.
Args:
featcol : A GeoJSON FeatureCollection containing MultiPolygons/Polygons.
featcol : A GeoJSON FeatureCollection of geometries.
Returns:
geojson.FeatureCollection: A GeoJSON FeatureCollection containing Polygons.
geojson.FeatureCollection: A GeoJSON FeatureCollection containing
single geometry types only: Polygon, LineString, Point.
"""

def split_multigeom(geom, properties):
"""Splits multi-geometries into individual geometries."""
return [
geojson.Feature(geometry=mapping(single_geom), properties=properties)
for single_geom in geom.geoms
]

final_features = []

for feature in featcol.get("features", []):
Expand All @@ -745,12 +728,16 @@ def multipolygon_to_polygon(
log.warning(f"Geometry is not valid, so was skipped: {feature['geometry']}")
continue

if geom.geom_type == "Polygon":
final_features.append(geojson.Feature(geometry=geom, properties=properties))
elif geom.geom_type == "MultiPolygon":
final_features.extend(
geojson.Feature(geometry=polygon_coords, properties=properties)
for polygon_coords in geom.geoms
if geom.geom_type.startswith("Multi"):
# Handle all MultiXXX types
final_features.extend(split_multigeom(geom, properties))
else:
# Handle single geometry types
final_features.append(
geojson.Feature(
geometry=mapping(geom),
properties=properties,
)
)

return geojson.FeatureCollection(final_features)
Expand Down
4 changes: 2 additions & 2 deletions src/backend/app/helpers/helper_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
add_required_geojson_properties,
featcol_keep_dominant_geom_type,
javarosa_to_geojson_geom,
multipolygon_to_polygon,
multigeom_to_singlegeom,
parse_geojson_file_to_featcol,
)
from app.models.enums import GeometryType, HTTPStatus, XLSFormType
Expand Down Expand Up @@ -264,7 +264,7 @@ async def flatten_multipolygons_to_polygons(
raise HTTPException(
status_code=HTTPStatus.UNPROCESSABLE_ENTITY, detail="No geometries present"
)
multi_to_single_polygons = multipolygon_to_polygon(featcol)
multi_to_single_polygons = multigeom_to_singlegeom(featcol)

if multi_to_single_polygons:
headers = {
Expand Down
Loading

0 comments on commit f6efdd6

Please sign in to comment.