Skip to content

Commit

Permalink
Fix ckan dataset retrieving
Browse files Browse the repository at this point in the history
Update the update_object_lists function to escaping JSON special characters
  • Loading branch information
mjanez committed Oct 3, 2023
1 parent 2da1c0f commit 2a5ca63
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 42 deletions.
74 changes: 37 additions & 37 deletions ckan2pycsw/model/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def render_j2_template(mcf: dict, schema_type: str, url: str = None, template_di
'normalize_datestring': normalize_datestring,
'prune_distribution_formats': prune_distribution_formats,
'prune_transfer_option': prune_transfer_option,
'escape_json': escape_json,
}

LOGGER.debug('Evaluating template directory')
Expand All @@ -82,7 +83,7 @@ def render_j2_template(mcf: dict, schema_type: str, url: str = None, template_di

if schema_type == 'ckan':
LOGGER.debug(f'Setting up template environment {template_dir} of type {schema_type}')
env = Environment(loader=FileSystemLoader(os.path.join(SCHEMAS_CKAN, template_dir)))
env = Environment(loader=FileSystemLoader(os.path.join(SCHEMAS_CKAN, template_dir)), autoescape=True)

if template_dir != "iso19139_base":
LOGGER.debug(f'Adding CKAN Schema mapping:{template_dir}')
Expand Down Expand Up @@ -111,14 +112,12 @@ def render_j2_template(mcf: dict, schema_type: str, url: str = None, template_di

LOGGER.debug('Processing CKAN template to JSON')
mcf = update_object_lists(mcf)
json_bytes = template.render(record=mcf).encode('utf-8')

json_str = json_bytes.decode('utf-8')

mcf_dict = json.loads(template.render(record=mcf), strict=False)

#TODO: Delete Dumps to log
#print(json.dumps(json.loads(json_str, strict=False), indent=4, ensure_ascii=False), file=open(APP_DIR + '/log/demo_ckan.json', 'w', encoding='utf-8'))

mcf_dict = json.loads(json_str, strict=False)
#print(json.dumps(mcf_dict, indent=4, ensure_ascii=False), file=open(APP_DIR + '/log/demo_ckan.json', 'w', encoding='utf-8'))

return mcf_dict

if schema_type == 'pygeometa':
Expand Down Expand Up @@ -399,36 +398,27 @@ def scheming_get_object_list(ckan_field, data):
json_data = scheming_clean_json_list(data[ckan_field])
return json_data

def update_object_lists(data):
def process_string(s):
if s.startswith('["') or s.endswith('"]'):
try:
return scheming_get_object_list(key, data)
except:
pass
elif s.startswith('{"') or s.endswith('"}'):
try:
return json.loads(s, strict=False)
except:
pass
elif s == "[]":
return ""
elif '\n' in s or '\r' in s or '"' in s:
s = s.replace('\n', '\\n').replace('\r', '\\r').replace('"', '\\"')
return s.strip()

try:
for key in data:
if isinstance(data[key], str):
data[key] = process_string(data.get(key, ""))
if 'resources' in data:
for resource in data['resources']:
for key in resource:
if isinstance(resource[key], str):
resource[key] = process_string(resource.get(key, ""))
except:
pass
def process_string(s):
if s.startswith('["') or s.endswith('"]') or s.startswith('{"') or s.endswith('"}'):
try:
return json.loads(s, strict=False)
except:
pass
return json.dumps(s)[1:-1].replace('\\"', "'").replace('"', "'")

def update_object_lists(data):
for key in data:
if isinstance(data[key], str):
data[key] = process_string(data[key])
elif isinstance(data[key], list):
for i in range(len(data[key])):
if isinstance(data[key][i], str):
data[key][i] = process_string(data[key][i])
elif isinstance(data[key], dict) and 'resources' in data[key]:
for resource in data[key]['resources']:
for k in resource:
if isinstance(resource[k], str):
resource[k] = process_string(resource[k])
return data

def update_large_text_lists(data):
Expand Down Expand Up @@ -652,4 +642,14 @@ def pretty_print(xml: str, encoding: str = 'UTF-8') -> str:

LOGGER.debug('pretty-printing XML')
val = minidom.parseString(xml.decode(encoding))
return '\n'.join([val for val in val.toprettyxml(indent=' '*2).split('\n') if val.strip()]) # noqa
return '\n'.join([val for val in val.toprettyxml(indent=' '*2).split('\n') if val.strip()]) # noqa

def escape_json(value):
"""
Escapes backslashes and other problematic characters in JSON.
:param value: The value to escape.
:returns: A string representing the escaped value for use in JSON.
"""
return json.dumps(value).replace('\\', '\\\\').replace('"', '\\"')
16 changes: 11 additions & 5 deletions ckan2pycsw/schemas/ckan/iso19139_geodcatap/main.j2
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@
"language": "{{ language }}",
"languagelabel": "{{ language|get_mapping_value_from_yaml_list(input_field="value", output_field='label', codelist="language",mappings_folder=mappings_folder + "/ckan_geodcatap")}}",
"charset": "utf8",
"title": "{{ record['title'] }}",
"abstract": "{{ record['notes'] }}",
"title": "{{ record['title']|safe }}",
"abstract": "{{ record['notes']|safe }}",
{% if record['graphic_overview'] %}
"browsegraphic": "{{ record['graphic_overview'] }}",
{% endif %}
Expand Down Expand Up @@ -273,6 +273,12 @@
"uri": "{{ record['access_rights'] }}"
},
{% endif %}

{% else %}
"accessconstraints": {
"label": "There are no limitations on public access to spatial data sets and services.",
"uri": "http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/noLimitations"
},
{% endif %}
"url": "{{ url }}",
"status": "UnderDevelopment",
Expand Down Expand Up @@ -360,7 +366,7 @@
"processstep": [
{% for step in record['lineage_process_steps'] %}
{
"description": "{{ step }}"
"description": "{{ step|safe }}"
}{% if not loop.last %},{% endif %}
{% endfor %}
],
Expand All @@ -369,13 +375,13 @@
"source": [
{% for source in record['lineage_source'] %}
{
"description": "{{ source }}"
"description": "{{ source|safe }}"
}{% if not loop.last %},{% endif %}
{% endfor %}
],
{% endif %}
{% if record['lineage_statement'] %}
"statement": "{{ record['provenance'] }}"
"statement": "{{ record['provenance']|safe }}"
{% else %}
"statement": "No lineage statement provided"
{% endif %}
Expand Down

0 comments on commit 2a5ca63

Please sign in to comment.