Merge pull request #11 from GSA/develop

Merging for 1.25 release
GSA · Jan 16, 2015 · 7a8d6d0 · 7a8d6d0
2 parents 3ee1bd2 + b6154f2
commit 7a8d6d0
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 49 deletions.
diff --git a/ckanext/datajson/datajsonvalidator.py b/ckanext/datajson/datajsonvalidator.py
@@ -1,10 +1,5 @@
 import re
 
-# from the iso8601 package, plus ^ and $ on the edges
-ISO8601_REGEX = re.compile(r"^([0-9]{4})(-([0-9]{1,2})(-([0-9]{1,2})"
-                           r"((.)([0-9]{2}):([0-9]{2})(:([0-9]{2})(\.([0-9]+))?)?"
-                           r"(Z|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?$")
-
 TEMPORAL_REGEX_1 = re.compile(
     r'^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?'
     r'|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]'
@@ -107,11 +102,11 @@ def do_validation(doc, errors_array):
             dataset_name = "dataset %d" % (i + 1)
 
             # title
-            if check_string_field(item, "title", 1, dataset_name, errs):
+            if check_required_string_field(item, "title", 1, dataset_name, errs):
                 dataset_name = '"%s"' % item.get("title", "").strip()
 
             # accessLevel # required
-            if check_string_field(item, "accessLevel", 3, dataset_name, errs):
+            if check_required_string_field(item, "accessLevel", 3, dataset_name, errs):
                 if item["accessLevel"] not in ("public", "restricted public", "non-public"):
                     add_error(errs, 5, "Invalid Required Field Value",
                               "The field 'accessLevel' had an invalid value: \"%s\"" % item["accessLevel"],
@@ -137,10 +132,10 @@ def do_validation(doc, errors_array):
             if check_required_field(item, "contactPoint", dict, dataset_name, errs):
                 cp = item["contactPoint"]
                 # contactPoint - fn # required
-                check_string_field(cp, "fn", 1, dataset_name, errs)
+                check_required_string_field(cp, "fn", 1, dataset_name, errs)
 
                 # contactPoint - hasEmail # required
-                if check_string_field(cp, "hasEmail", 9, dataset_name, errs):
+                if check_required_string_field(cp, "hasEmail", 9, dataset_name, errs):
                     import lepl.apps.rfc3696
 
                     email_validator = lepl.apps.rfc3696.Email()
@@ -151,10 +146,10 @@ def do_validation(doc, errors_array):
                                   dataset_name)
 
             # description # required
-            check_string_field(item, "description", 1, dataset_name, errs)
+            check_required_string_field(item, "description", 1, dataset_name, errs)
 
             # identifier #required
-            if check_string_field(item, "identifier", 1, dataset_name, errs):
+            if check_required_string_field(item, "identifier", 1, dataset_name, errs):
                 if item["identifier"] in seen_identifiers:
                     add_error(errs, 5, "Invalid Required Field Value",
                               "The dataset identifier \"%s\" is used more than once." % item["identifier"],
@@ -175,7 +170,7 @@ def do_validation(doc, errors_array):
                                   "A keyword in the keyword array was an empty string.", dataset_name)
 
             # modified # required
-            if check_string_field(item, "modified", 1, dataset_name, errs):
+            if check_required_string_field(item, "modified", 1, dataset_name, errs):
                 if not MODIFIED_REGEX_1.match(item['modified']) \
                         and not MODIFIED_REGEX_2.match(item['modified']) \
                         and not MODIFIED_REGEX_3.match(item['modified']):
@@ -195,7 +190,7 @@ def do_validation(doc, errors_array):
             # publisher # required
             if check_required_field(item, "publisher", dict, dataset_name, errs):
                 # publisher - name # required
-                check_string_field(item["publisher"], "name", 1, dataset_name, errs)
+                check_required_string_field(item["publisher"], "name", 1, dataset_name, errs)
 
             # Required-If-Applicable
 
@@ -222,7 +217,7 @@ def do_validation(doc, errors_array):
 
                     # distribution - mediaType # Required-If-Applicable
                     if 'downloadURL' in dt:
-                        if check_string_field(dt, "mediaType", 1, distribution_name, errs):
+                        if check_required_string_field(dt, "mediaType", 1, distribution_name, errs):
                             if not IANA_MIME_REGEX.match(dt["mediaType"]):
                                 add_error(errs, 5, "Invalid Field Value",
                                           "The distribution mediaType \"%s\" is invalid. "
@@ -249,23 +244,23 @@ def do_validation(doc, errors_array):
 
                     # distribution - description # optional
                     if dt.get("description") is not None:
-                        check_string_field(dt, "description", 1, distribution_name, errs)
+                        check_required_string_field(dt, "description", 1, distribution_name, errs)
 
                     # distribution - format # optional
                     if dt.get("format") is not None:
-                        check_string_field(dt, "format", 1, distribution_name, errs)
+                        check_required_string_field(dt, "format", 1, distribution_name, errs)
 
                     # distribution - title # optional
                     if dt.get("title") is not None:
-                        check_string_field(dt, "title", 1, distribution_name, errs)
+                        check_required_string_field(dt, "title", 1, distribution_name, errs)
 
             # license # Required-If-Applicable
             check_url_field(False, item, "license", dataset_name, errs)
 
             # rights # Required-If-Applicable
             # TODO move to warnings
             # if item.get("accessLevel") != "public":
-            #     check_string_field(item, "rights", 1, dataset_name, errs)
+            # check_string_field(item, "rights", 1, dataset_name, errs)
 
             # spatial # Required-If-Applicable
             # TODO: There are more requirements than it be a string.
@@ -312,7 +307,7 @@ def do_validation(doc, errors_array):
 
             # isPartOf # optional
             if item.get("isPartOf"):
-                check_string_field(item, "isPartOf", 1, dataset_name, errs)
+                check_required_string_field(item, "isPartOf", 1, dataset_name, errs)
 
             # issued # optional
             if item.get("issued") is not None:
@@ -415,7 +410,7 @@ def check_required_field(obj, field_name, data_type, dataset_name, errs):
     return True
 
 
-def check_string_field(obj, field_name, min_length, dataset_name, errs):
+def check_required_string_field(obj, field_name, min_length, dataset_name, errs):
     # checks that a required field exists, is typed as a string, and has a minimum length
     if not check_required_field(obj, field_name, (str, unicode), dataset_name, errs):
         return False
@@ -431,23 +426,6 @@ def check_string_field(obj, field_name, min_length, dataset_name, errs):
     return True
 
 
-def check_date_field(obj, field_name, dataset_name, errs):
-    # checks that a required date field exists and looks like a date
-    if not check_required_field(obj, field_name, (str, unicode), dataset_name, errs):
-        return False
-    elif len(obj[field_name].strip()) == 0:
-        add_error(errs, 10, "Missing Required Fields", "The '%s' field is present but empty." % field_name,
-                  dataset_name)
-        return False
-    else:
-        if not ISO8601_REGEX.match(obj[field_name]):
-            add_error(errs, 5, "Invalid Required Field Value",
-                      "The '%s' field has an invalid ISO 8601 date or date-time value: \"%s\"." % (
-                          field_name, obj[field_name]), dataset_name)
-            return False
-    return True
-
-
 def check_url_field(required, obj, field_name, dataset_name, errs):
     # checks that a required or optional field, if specified, looks like a URL
     if not required and (field_name not in obj or obj[field_name] is None): return True  # not required, so OK

diff --git a/ckanext/datajson/plugin.py b/ckanext/datajson/plugin.py
@@ -217,15 +217,16 @@ def make_edi(owner_org):
     logger.addHandler(eh)
 
     # Build the data.json file.
-    packages = get_all_group_packages(group_id=owner_org)
+    packages = get_packages(owner_org)
+
     output = []
     for pkg in packages:
-        if pkg['owner_org'] == owner_org:
-            datajson_entry = make_datajson_entry(pkg)
-            if datajson_entry and is_valid(datajson_entry):
-                output.append(datajson_entry)
-            else:
-                logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None))
+        #if pkg['owner_org'] == owner_org:
+        datajson_entry = make_datajson_entry(pkg)
+        if datajson_entry and is_valid(datajson_entry):
+            output.append(datajson_entry)
+        else:
+            logger.warn("Dataset id=[%s], title=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None))
 
     # Get the error log
     eh.flush()
@@ -247,18 +248,15 @@ def make_pdl(owner_org):
     eh.setFormatter(formatter)
     logger.addHandler(eh)
 
-
     # Build the data.json file.
-    packages = get_all_group_packages(group_id=owner_org)
+    packages = get_packages(owner_org)
 
     output = []
     #Create data.json only using public datasets, datasets marked non-public are not exposed
     for pkg in packages:
         extras = dict([(x['key'], x['value']) for x in pkg['extras']])
         try:
-            if pkg['owner_org'] == owner_org \
-                    and not (re.match(r'[Nn]on-public', extras['public_access_level'])):
-
+            if not (re.match(r'[Nn]on-public', extras['public_access_level'])):
                 datajson_entry = make_datajson_entry(pkg)
                 if datajson_entry and is_valid(datajson_entry):
                     output.append(datajson_entry)
@@ -280,6 +278,21 @@ def make_pdl(owner_org):
     #return json.dumps(output)
     return write_zip(output, error, zip_name='pdl')
 
+def get_packages(owner_org):
+    # Build the data.json file.
+    packages = get_all_group_packages(group_id=owner_org)
+    #get packages for sub-agencies.
+    sub_agency = model.Group.get(owner_org)
+    if 'sub-agencies' in sub_agency.extras.col.target and \
+                    sub_agency.extras.col.target['sub-agencies'].state == 'active':
+        sub_agencies = sub_agency.extras.col.target['sub-agencies'].value
+        sub_agencies_list = sub_agencies.split(",")
+        for sub in sub_agencies_list:
+            sub_packages = get_all_group_packages(group_id=sub)
+            for sub_package in sub_packages:
+                packages.append(sub_package)
+
+    return packages
 
 def get_all_group_packages(group_id):
     """