Skip to content

Commit

Permalink
Merge pull request #58 from mjanez/latest
Browse files Browse the repository at this point in the history
Fix - Refactor JSON template rendering and improve error handling (ISO19139-GeoDCAT-AP)
  • Loading branch information
mjanez authored Oct 30, 2024
2 parents ee7692b + c400007 commit b20d86e
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 59 deletions.
File renamed without changes.
21 changes: 21 additions & 0 deletions ckan-pycsw/Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,27 @@ ENV PYCSW_DEV_PORT=5678
ENV TIMEOUT=300
ENV SSL_UNVERIFIED_MODE=False

# PYCSW Catalog configuration
ENV CSW_IDENTIFICATION_TITLE="Sample Geospatial Catalogue" \
CSW_IDENTIFICATION_ABSTRACT="OGC CSW server powered by pycsw" \
CSW_PROVIDER_NAME="ckan-docker development team" \
CSW_PROVIDER_URL="https://github.com/mjanez/ckan-docker" \
CSW_CONTACT_NAME="ckan-docker development team" \
CSW_CONTACT_POSITION="Site Administrator" \
CSW_CONTACT_ADDRESS="ckan-docker development team" \
CSW_CONTACT_CITY="Madrid" \
CSW_CONTACT_STATE_OR_PROVINCE="Madrid" \
CSW_CONTACT_POSTAL_CODE="28001" \
CSW_CONTACT_COUNTRY="Spain" \
CSW_CONTACT_EMAIL=${CKAN_SYSADMIN_EMAIL} \
CSW_CONTACT_URL=${CKAN_URL} \
CSW_INSPIRE_DATE="2024-01-01" \
CSW_INSPIRE_GEMET_KEYWORDS="Utility and governmental services" \
CSW_INSPIRE_CONFORMITY="notEvaluated" \
CSW_INSPIRE_CONTACT_NAME=${CSW_CONTACT_NAME} \
CSW_INSPIRE_CONTACT_EMAIL=${CKAN_SYSADMIN_EMAIL} \
CSW_INSPIRE_TEMP_EXTENT="2024-01-01/2024-12-31"

WORKDIR ${APP_DIR}

# Update files if needed
Expand Down
20 changes: 10 additions & 10 deletions ckan2pycsw/ckan2pycsw.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from schemas.pygeometa.iso19139_inspire import ISO19139_inspireOutputSchema

# debug
import ptvsd
#import ptvsd

# Ennvars
TZ = os.environ.get("TZ", "TZ")
Expand Down Expand Up @@ -231,14 +231,14 @@ def run_tasks():
logging.error(f"{log_module}:ckan2pycsw | Error starting gunicorn: {e}")

if __name__ == "__main__":
if str(DEV_MODE).lower() == "true":
# Allow other computers to attach to ptvsd at this IP address and port.
ptvsd.enable_attach(address=("0.0.0.0", PYCSW_DEV_PORT), redirect_output=True)

# Pause the program until a remote debugger is attached
ptvsd.wait_for_attach()
main()
# Launch a cronjob
else:
# if str(DEV_MODE).lower() == "true":
# # Allow other computers to attach to ptvsd at this IP address and port.
# ptvsd.enable_attach(address=("0.0.0.0", PYCSW_DEV_PORT), redirect_output=True)

# # Pause the program until a remote debugger is attached
# ptvsd.wait_for_attach()
# main()
# # Launch a cronjob
# else:
run_tasks()
run_scheduler()
19 changes: 15 additions & 4 deletions ckan2pycsw/model/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,18 @@ def render_j2_template(mcf: dict, schema_type: str, url: str = None, template_di
mcf = update_object_lists(mcf)

try:
# Render the template and directly attempt to correct and deserialize the JSON string
mcf_dict = json.loads(re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', template.render(record=mcf)), strict=False)
# Render the template
rendered_template = template.render(record=mcf)
# Clean trailing commas
cleaned_template = clean_trailing_commas(rendered_template)
# Escape backslashes
escaped_template = re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', cleaned_template)
# Deserialize the JSON string
mcf_dict = json.loads(escaped_template, strict=False)
except json.JSONDecodeError as e:
LOGGER.error("Error deserializing the template output: %s", e)
# Optionally: Save the problematic output for debugging
LOGGER.error("Problematic output: %s", template.render(record=mcf))
LOGGER.error("Problematic output: %s", rendered_template)
raise

return mcf_dict
Expand Down Expand Up @@ -753,4 +759,9 @@ def get_localized_dataset_value(multilang_value, default_language, languages=Non
if language in multilang_value:
localized_value[language] = multilang_value[language]

return localized_value
return localized_value

def clean_trailing_commas(json_string):
# Removes trailing commas in objects and arrays
json_string = re.sub(r',\s*([}\]])', r'\1', json_string)
return json_string
132 changes: 87 additions & 45 deletions ckan2pycsw/schemas/ckan/iso19139_geodcatap/main.j2
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
{% macro render_field(field_name, field_value) %}
"{{ field_name }}": "{{ field_value }}"
{% endmacro %}

{% macro render_object(fields) %}
{
{%- for field in fields %}
{{ render_field(field[0], field[1]) }}{% if not loop.last %},{% endif %}
{%- endfor %}
}
{% endmacro %}

{
{# INSPIRE ISO19139 Metadata Schema #}
{% set language_iso19115 = record['language']|get_mapping_value_from_yaml_list(input_field="uri", output_field='iso_639_2', codelist="language",mappings_folder=mappings_folder + "/ckan_geodcatap") %}
Expand All @@ -8,36 +20,52 @@
{% set dcat_type = record['dcat_type'].rsplit('/', 1)[-1] %}
"mcf": {"version": 1.0},
"metadata": {
{%- set metadata_fields = [] %}

{% if record['identifier'] %}
"identifier": "{{ record['identifier'] }}",
{%- set _ = metadata_fields.append(('identifier', record['identifier'])) %}
{% else %}
"identifier": "{{ record['id'] }}",
{%- set _ = metadata_fields.append(('identifier', record['id'])) %}
{% endif %}
"language": "{{ language_2code }}",

{%- set _ = metadata_fields.append(('language', language_2code)) %}

{% if language_alternate %}
"language_alternate": "{{ language_alternate }}",
{%- set _ = metadata_fields.append(('language_alternate', language_alternate)) %}
{% endif %}
"charset": "UTF-8",

{%- set _ = metadata_fields.append(('charset', 'UTF-8')) %}

{% if record['source'] %}
"parentidentifier": "{{ record['source'].rsplit('/', 1)[-1] }}",
{%- set parent_id = record['source'].rsplit('/', 1)[-1] %}
{%- set _ = metadata_fields.append(('parentidentifier', parent_id)) %}
{% endif %}
"datestamp": "{{ record['metadata_modified']|normalize_datetime }}",
"dataseturi": "{{ url }}",

{%- set _ = metadata_fields.append(('datestamp', record['metadata_modified']|normalize_datetime)) %}
{%- set _ = metadata_fields.append(('dataseturi', url)) %}

{% if dcat_type == 'service' %}
{% if "catalog" in record['title'].lower() or "csw" in record['title'].lower() %}
"servicetype": "discovery",
{%- set _ = metadata_fields.append(('servicetype', 'discovery')) %}
{% elif "wfs" in record['title'].lower() or "descarg" in record['title'].lower() %}
"servicetype": "download",
{%- set _ = metadata_fields.append(('servicetype', 'download')) %}
{% elif "wms" in record['title'].lower() or "wmts" in record['title'].lower() or "wcs" in record['title'].lower() or "map" in record['title'].lower() %}
"servicetype": "view",
{%- set _ = metadata_fields.append(('servicetype', 'view')) %}
{% else %}
"servicetype": "other",
{%- set _ = metadata_fields.append(('servicetype', 'other')) %}
{% endif %}
{% endif %}
"hierarchylevel": {
"value": "{{ dcat_type }}",
"uri": "{{ record['dcat_type'] }}"
}

{%- set hierarchylevel = {
"value": dcat_type,
"uri": record['dcat_type']
} %}
{%- set _ = metadata_fields.append(('hierarchylevel', hierarchylevel)) %}

{# Render all metadata fields with appropriate commas #}
{%- for field in metadata_fields %}
"{{ field[0] }}": {% if field[1] is mapping or field[1] is iterable and field[1] is not string %}{{ field[1] | tojson }}{% else %}"{{ field[1] }}" {% endif %}{% if not loop.last %},{% endif %}
{%- endfor %}
},
"spatial": {
{% if record['reference_system'] is defined %}
Expand Down Expand Up @@ -313,37 +341,51 @@
"maintenancefrequency": "continual"
},
"contact": {
{% if record['publisher_name'] is defined %}
"publisher": {
{% if record['publisher_name'] %}
"organization": "{{ record['publisher_name'] }}",
{% endif %}
{% if record['publisher_email'] %}
"email": "{{ record['publisher_email'] }}",
{% endif %}
{% if record['publisher_url'] %}
"url": "{{ record['publisher_url'] }}"
{% endif %}
},
{# Initialize a list for storing contact fields #}
{%- set contact_fields = [] %}

{# Publisher #}
{% if record['publisher_name'] or record['publisher_email'] or record['publisher_url'] %}
{%- set publisher = {} %}
{% if record['publisher_name'] %}
{%- set _ = publisher.update({'organization': record['publisher_name']}) %}
{% endif %}
{% if record['publisher_email'] %}
{%- set _ = publisher.update({'email': record['publisher_email']}) %}
{% endif %}
{% if record['publisher_url'] %}
{%- set _ = publisher.update({'url': record['publisher_url']}) %}
{% endif %}
{%- set _ = contact_fields.append(('publisher', publisher)) %}
{% endif %}
{% if record['author_name'] is defined %}
"author": {
{% if record['author_name'] %}
"individualname": "{{ record['author_name'] }}",
{% endif %}
{% if record['author_email'] %}
"email": "{{ record['author_email'] }}",
{% endif %}
{% if record['author_url'] %}
"url": "{{ record['author_url'] }}"
{% endif %}
},

{# Author #}
{% if record['author_name'] or record['author_email'] or record['author_url'] %}
{%- set author = {} %}
{% if record['author_name'] %}
{%- set _ = author.update({'individualname': record['author_name']}) %}
{% endif %}
{% if record['author_email'] %}
{%- set _ = author.update({'email': record['author_email']}) %}
{% endif %}
{% if record['author_url'] %}
{%- set _ = author.update({'url': record['author_url']}) %}
{% endif %}
{%- set _ = contact_fields.append(('author', author)) %}
{% endif %}
"pointOfContact": {
"organization": "{{ record['contact_name'] }}",
"email": "{{ record['contact_email'] }}",
"url": "{{ record['contact_url'] }}"
}

{# Point of Contact (if always to be included) #}
{%- set pointOfContact = {
"organization": record['contact_name'],
"email": record['contact_email'],
"url": record['contact_url']
} %}
{%- set _ = contact_fields.append(('pointOfContact', pointOfContact)) %}

{# Render all contact fields with appropriate commas #}
{%- for field in contact_fields %}
"{{ field[0] }}": {{ render_object(field[1].items()) }}{% if not loop.last %},{% endif %}
{%- endfor %}
},
"distribution": {
{% for resource in record['resources'] %}
Expand Down

0 comments on commit b20d86e

Please sign in to comment.