From 1232a39bc252c5cce82d1603975a593a6c3a219d Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Wed, 20 Sep 2023 12:56:46 +1000 Subject: [PATCH 01/14] First pass at updating the validator --- lib/checker.js | 67 ++++++---------- package-lock.json | 4 +- test/checker.spec.js | 101 ++++++++---------------- test/rocrate.new.spec.js | 24 +++++- test/rocrate.spec.js | 5 +- test_data/sample-ro-crate-metadata.json | 2 +- 6 files changed, 90 insertions(+), 113 deletions(-) diff --git a/lib/checker.js b/lib/checker.js index 0af91ea..ea10f59 100644 --- a/lib/checker.js +++ b/lib/checker.js @@ -98,7 +98,7 @@ class Checker { const root = this.crate.rootDataset; return new CheckItem({ name: 'Root dataset has appropriate @id', - message: `The root dataset @id ends in "/"`, + message: `The root dataset is './' `, status: !!(root && root['@id'].endsWith('/')) }); } @@ -112,63 +112,45 @@ class Checker { }); } - hasDescription() { - const root = this.crate.rootDataset; - return new CheckItem({ - name: 'Has description', - message: 'The root Dataset has a description (http://schema.org/description)', - status: !!(root && root.description && root.description.length > 0) - }); - } - hasAuthor() { + hasLicense() { const root = this.crate.rootDataset; - const authors = Utils.asArray(root?.author).map(a => this.crate.getEntity(a['@id'])); - return new CheckItem({ - name: 'Has valid Authors', - message: 'The root Dataset has at least one Author (http://schema.org/author) referred to by @id, and all authors have @type Person (http://schema.org/Person) or Organization (http://schema.org/Organization)', - status: (authors.length > 0) && authors.every(a => includesTextCI(a?.['@type'], ['Person', 'Organization'])) - }); + const licenses = Utils.asArray(root?.license); + const check = new CheckItem({ + name: 'Has a license', + type: "ERROR", + message: "The Root dataset has a license property", + status: !!(licenses.length > 0 || false) + }); + + return check; + } - hasLicense() { + hasDescription() { const root = this.crate.rootDataset; - const licenses = Utils.asArray(root?.license).map(l => this.crate.getEntity(l['@id'])); return new CheckItem({ - name: 'Has a license ', - message: 'The root Dataset has a License' + - licenses.map(license => license && license.name && license.description && - includesTextCI(license['@type'], 'CreativeWork') ? - ' (the license is a Creative Work with a name and description as it SHOULD be)' : '' - ).join(''), - status: (licenses.length > 0) + name: 'Has description', + message: 'The root Dataset has a description (http://schema.org/description)', + status: !!(root && root.description && root.description.length > 0), + type: "ERROR" }); } + hasDatePublished() { const root = this.crate.rootDataset; var date = Utils.asArray(root?.datePublished); return new CheckItem({ name: 'Has a datePublished ', - message: 'The root Dataset has a datePublished with ONE value which is an ISO 8601 format precision of at least a day', + message: 'The root Dataset has a datePublished with ONE value which is an ISO 8601 format', diagnostics: date.length === 1 ? '' : `Number of datePublished values is ${date.length} NOT 1`, - status: !!(date.length === 1 && date[0]?.match(/^\d{4}-([0]\d|1[0-2])-([0-2]\d|3[01])/)) + status: !!(date.length === 1 && date[0].match(/^\d{4}-?([0]\d|1[0-2])?-?([0-2]\d|3[01])?/)), + type: "ERROR" }); } - hasContactPoint() { - const root = this.crate.rootDataset; - var contacts = Utils.asArray(root?.contactPoint).map(c => this.crate.getEntity(c['@id'])); - return new CheckItem({ - name: 'Has a contactPoint', - message: 'The root Dataset has at least one contactPoint property which references a ContactPoint of type Customer Service', - status: contacts.some(contact => contact && contact.email && - Utils.asArray(contact['@type']).includes('ContactPoint') && - Utils.asArray(contact.contactType).includes('customer service')) - }); - } - async check() { var checkNames = methods.filter(n => !(n in { hasContext: 0, hasAuthor: 0, hasContactPoint: 0 })); var context = await this.hasContext(); @@ -189,8 +171,8 @@ class Checker { report() { var report = []; for (var item of this.checklist) { - const tick = item.status ? '✔️' : '❌'; - report.push(`${tick} ${item.name}: ${item.message}`); + const tick = item.status ? '✔️ OK' : `❌ ${item.type}`; + report.push(`${tick} ${item.name}: ${item.message} ${item.diagnostics}`); } return report.join('\n'); } @@ -210,8 +192,9 @@ class CheckItem { constructor(data) { this.name = data.name; this.message = data.message; + this.type = data?.type || "WARNING" this.status = data.status ?? false; - if (data.diagnostics) this.diagnostics = data.diagnostics; + this.diagnostics = data.diagnostics || ""; } } diff --git a/package-lock.json b/package-lock.json index 6aeb854..4b6ed20 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "ro-crate", - "version": "3.3.1", + "version": "3.3.2", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "ro-crate", - "version": "3.3.1", + "version": "3.3.2", "license": "GPL-3.0-or-later", "dependencies": { "commander": "^4.0.1", diff --git a/test/checker.spec.js b/test/checker.spec.js index 0046c16..9ba8252 100644 --- a/test/checker.spec.js +++ b/test/checker.spec.js @@ -13,6 +13,23 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . +*/ + + + +/* +The Root Data Entity MUST have the following properties: + +@type: MUST be [Dataset] or an array that contain Dataset +@id: SHOULD be the string ./ or an absolute URI (see below) +name: SHOULD identify the dataset to humans well enough to disambiguate it from other RO-Crates +description: SHOULD further elaborate on the name to provide a summary of the context in which the dataset is important. +datePublished: MUST be a string in [ISO 8601 date format][DateTime] and SHOULD be specified to at least the precision of a day, MAY be a timestamp down to the millisecond. +license: SHOULD link to a Contextual Entity or Data Entity in the RO-Crate Metadata Document with a name and description (see section on licensing). MAY, if necessary be a textual description of how the RO-Crate may be used. + + + + */ const assert = require("assert"); @@ -44,79 +61,52 @@ describe("Incremental checking", async function () { //var dataset = crate.getRootDataset(); var dataset = json["@graph"][0]; dataset.name = ""; + + var checker = new Checker(new ROCrate(json)); assert(!checker.hasName().status, "Does not have a name"); dataset.name = "Name!"; var checker = new Checker(new ROCrate(json)); assert(checker.hasName().status, "Does have a name"); - assert(!checker.hasAuthor().status, "Does not have author"); - // Author - var author1 = { - "@id": "http://orcid.org/some-orcid", - name: "Some Person", - }; - dataset.author = [{ "@id": "http://orcid.org/some-orcid" }]; - json["@graph"].push(author1); - var checker = new Checker(new ROCrate(json)); - assert( - !checker.hasAuthor().status, - "Does not have one or more authors with @type Person or Organization" - ); - // One good author and one dodgy one - var author2 = { - "@id": "http://orcid.org/some-other-orcid", - name: "Some Person", - "@type": "Person", - }; - dataset.author = [{ "@id": "http://orcid.org/some-orcid" }, { "@id": "http://orcid.org/some-other-orcid" }]; - json["@graph"].push(author1, author2); var checker = new Checker(new ROCrate(json)); - assert( - !checker.hasAuthor().status, - "Does not have one or more authors with @type Person or Organization" - ); + assert(!checker.hasDescription().status, "Does not have a description"); + dataset.description = "Description!"; - // One good author - dataset.author = [author2]; - json["@graph"] = [ - defaults.metadataFileDescriptorTemplate, - dataset, - author2, - ]; var checker = new Checker(new ROCrate(json)); - assert( - checker.hasAuthor().status, - "Does have a author with @type Person or Organization" - ); + assert(checker.hasName().status, "Does have a description"); // License // No name, description + console.log(checker.hasLicense()); assert( !checker.hasLicense().status, - "Does not have a license with @type CreativeWork" + "Has a license" ); + var license = { "@id": "http://example.com/some_kind_of_license", "@type": "CreativeWork", URL: "http://example.com/some_kind_of_license", }; dataset.license = { "@id": license["@id"] }; + json["@graph"].push(license); crate = new ROCrate(json); var checker = new Checker(crate); assert( checker.hasLicense().status, - "Has a license with @type CreativeWork" + "Has a license" ); + license.name = "Some license"; license.description = "Description of at least 20 characters."; assert( checker.hasLicense().status, - "Does have a license with @type CreativeWork and a name and description" + "Has a license" ); // datePublished @@ -124,13 +114,14 @@ describe("Incremental checking", async function () { !checker.hasDatePublished().status, "Does not have a datePublished" ); + + crate.rootDataset.datePublished = "2017"; // Not enough detail! assert( - !checker.hasDatePublished().status, - "Does not have a datePublished (not enough detail)" + checker.hasDatePublished().status, ); - crate.rootDataset.datePublished = ["2017-07-21", "2019-08-09"]; // this should do it + crate.rootDataset.datePublished = ["2017-07-21", "2019-08-09"]; assert( !checker.hasDatePublished().status, "Does not have a single datePublished" @@ -139,32 +130,10 @@ describe("Incremental checking", async function () { crate.rootDataset.datePublished = ["2017-07-21"]; // this should do it assert(checker.hasDatePublished().status, "Does have a datePublished"); - //contactPoint missing - assert( - !checker.hasContactPoint().status, - "Does not have a single contact point" - ); - var contact = { - "@id": "some.email@example.com", - "@type": "ContactPoint", - }; // Not enough - dataset.contactPoint = [{ "@id": "some.email@example.com" }]; - json["@graph"].push(contact); - var checker = new Checker(new ROCrate(json)); - assert( - !checker.hasContactPoint().status, - "Does not have a contact point with enough properties" - ); - contact.contactType = "customer service"; - contact.email = "some@email"; // TODO: Not validated! - var checker = new Checker(new ROCrate(json)); - assert( - checker.hasContactPoint().status, - "Does have a proper contact point" - ); + await checker.check(); - //console.log(checker.report()); + console.log(checker.report()); }); }); diff --git a/test/rocrate.new.spec.js b/test/rocrate.new.spec.js index ecdb4e0..5ca3e78 100644 --- a/test/rocrate.new.spec.js +++ b/test/rocrate.new.spec.js @@ -497,6 +497,20 @@ describe("setProperty", function () { assert.strictEqual(r.license.length, 2); }); + + it("Does not kill existing entities", function() { + let crate = new ROCrate(testData, { link: true, replace: true }); + let e = crate.getEntity('https://orcid.org/0000'); + assert.ok(e); + assert.strictEqual(e.contactPoint.email, "john.doe@uq.edu.au"); + crate.rootDataset.author = {'@id': 'https://orcid.org/0000'} + let auth = crate.getEntity('https://orcid.org/0000'); + assert.strictEqual(auth.contactPoint.email, "john.doe@uq.edu.au"); + + + }); + + it("can replace existing entities", function() { let crate = new ROCrate(testData, { link: true, replace: true }); let e = crate.getEntity('https://orcid.org/0000'); @@ -505,7 +519,7 @@ describe("setProperty", function () { // ref only, don't replace crate.rootDataset.author = {'@id': 'https://orcid.org/0000'} let auth = crate.getEntity('https://orcid.org/0000'); - assert.strictEqual(auth.name, "John Doe"); + assert.strictEqual(auth.name, "John Doe"); assert.strictEqual(auth.contactPoint.email, "john.doe@uq.edu.au"); // replace here crate.rootDataset.author = { @@ -517,6 +531,14 @@ describe("setProperty", function () { assert.ok(!auth.contactPoint); assert.strictEqual(auth.name, "Jane Doe"); + + + crate.rootDataset.author = {'@id': 'https://orcid.org/0000'} + let auth1 = crate.getEntity('https://orcid.org/0000'); + assert.strictEqual(auth1.name, "John Doe"); + assert.strictEqual(auth1.contactPoint.email, "john.doe@uq.edu.au"); + + }); }); diff --git a/test/rocrate.spec.js b/test/rocrate.spec.js index 7b46eb6..25edce0 100644 --- a/test/rocrate.spec.js +++ b/test/rocrate.spec.js @@ -15,6 +15,9 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ + + + const fs = require("fs"); const assert = require("assert"); const expect = require("chai").expect; @@ -412,7 +415,7 @@ describe("IDs and identifiers", function () { //console.log(newItem.name) assert(Array.isArray(newItem.name)); - //consol.og(crate.flatify(newItem, 2)); + console.log(crate.flatify(newItem, 2)); //console.log(crate.objectified); }); diff --git a/test_data/sample-ro-crate-metadata.json b/test_data/sample-ro-crate-metadata.json index a064c0b..d13e057 100644 --- a/test_data/sample-ro-crate-metadata.json +++ b/test_data/sample-ro-crate-metadata.json @@ -4000,4 +4000,4 @@ "name": "pics/thumbs/sepia_fence.png" } ] -} \ No newline at end of file +} From d5fed653a5bfe380d81e1dee34a6048e03061181 Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Thu, 21 Sep 2023 15:57:25 +1000 Subject: [PATCH 02/14] Adding new files --- lib/validator.js | 227 +++++++++++++++++++++++++++++++++++++++++ test/validator.spec.js | 172 +++++++++++++++++++++++++++++++ 2 files changed, 399 insertions(+) create mode 100644 lib/validator.js create mode 100644 test/validator.spec.js diff --git a/lib/validator.js b/lib/validator.js new file mode 100644 index 0000000..5330c69 --- /dev/null +++ b/lib/validator.js @@ -0,0 +1,227 @@ +/* + +This is part of ro-crate-html-js a tool for generating HTMl +previews of HTML files. + +Copyright (C) 2021 University of Technology Sydney +Copyright (C) 2022 Queensland University + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +/** + * @typedef {import('./rocrate').ROCrate} ROCrate + */ + +const defaults = require('./defaults'); +const { Utils } = require('./utils'); +const { ROCrate } = require('./rocrate'); +require('cross-fetch/polyfill'); + +// errors, warnings, info + +function isValidUrl(urlStr) { + try { + let url = new URL(urlStr); + return true; + } catch (e) {} + return false; +} + +class Validator { + /** + * + * @param {ROCrate} crate + */ + constructor(json) { + this.result = { + errors: [], + warnings: [], + info: [], + }; + this.json = null; + this.crate = null; + this.isJSON(json); + this.isCrate(); + } + + isJSON(json) { + if (typeof json === 'object') { + this.json = json; + return; + } + try { + this.json = JSON.parse(json); + } catch (error) { + console.log(JSON); + this.result.errors.push({ + message: 'Crate is not JSON: ' + error, + }); + } + } + + isCrate() { + if (!this.json) return; + + try { + this.crate = new ROCrate(this.json); + console.log('crate', this.crate, 'json', this.json); + } catch (error) { + console.log('HAVE AN ERROR', error); + this.result.errors.push({ + message: 'ROCrate-js can not parse this JSON: ' + String(error), + }); + } + } + async hasContext() { + if (!this.json || !this.crate) return; + + // See if there is a URL in the context which has an appropriate name + var foundContext = false; + for (let contextUrl of Utils.asArray(this.crate['@context'])) { + if (typeof contextUrl === 'string' || contextUrl instanceof String) { + try { + const response = await fetch(/**@type {string}*/ (contextUrl), { + headers: { + accept: 'application/ld+json, application/ld+json, text/text', + }, + }); + if (response.ok) { + const content = await response.json(); + if ( + Utils.asArray(content.name).includes('RO-Crate JSON-LD Context') + ) { + this.result.info.push({ + message: `Has a context ${contextUrl} named "RO-Crate JSON-LD Context", version ${content.version}`, + }); + foundContext = true; + break; + } + } else { + throw new Error(response.statusText); + } + } catch (error) { + console.error(error); + this.result.warnings.push({ + message: `There was an issue fetching this context: ${contextUrl} ${error}`, + }); + break; + } + } + } + if (!foundContext) { + this.result.warnings.push({ + message: "There is no reference to an 'official' RO-Crate @context", + }); + } + } + + rootDataEntity() { + /* + The Root Data Entity MUST have the following properties: ["@type", "@id", "name", "description", "datePublished", "license"] + + @type: MUST be [Dataset] or an array that contain Dataset + @id: SHOULD be the string ./ or an absolute URI + */ + + if (!this.json || !this.crate) return; + + const root = this.crate.rootDataset; + if (!root) { + this.result.errors.push({ + entity: '', + property: '', + message: 'There is no Root Data Entity', + clause: '', + }); + } else { + // Check ID is up to scratch -- warn if not + if (!(root?.['@id'] === './') || isValidUrl(root?.['@id'])) { + this.result.warnings.push({ + entity: root?.['@id'], + message: `Root Data Entity has appropriate @id. Is: ${root?.['@id']}`, + clause: `@id: SHOULD be the string ./ or an absolute URI `, + }); + } + // Check type is there -- error if not + if ( + !( + root?.['@type'].includes('Dataset') && + this.crate.resolveTerm('Dataset') === 'http://schema.org/Dataset' + ) + ) { + this.result.errors.push({ + entity: root['@id'], + message: 'Root dataset does not have Dataset as one of its types', + clause: `@type: MUST be [Dataset] or an array that contain Dataset`, + }); + } + // Check all the props are there - error if not + for (let prop of [ + '@type', + '@id', + 'name', + 'description', + 'datePublished', + 'license', + ]) { + if ( + !root?.[prop] || + (!['@type', '@id'].includes(prop) && + this.crate.resolveTerm(prop) != `http://schema.org/${prop}`) + ) { + this.result.errors.push({ + entity: root['@id'], + message: `Missing required property: ${prop}`, + clause: `The Root Data Entity MUST have the following properties: ["@type", "@id", "name", "description", "datePublished", "license"]`, + }); + } + } + // Check the date + if ( + root?.datePublished && + root.datePublished.length > 1 + ) { + this.result.errors.push({ + entity: root['@id'], + message: `datePublished must be a string, but multiple values have been supplied`, + clause: `datePublished: MUST be a string in [ISO 8601 date format][DateTime] and SHOULD be specified to at least the precision of a day, MAY be a timestamp down to the millisecond.`, + }); + } + } + + if ( + root?.datePublished && + (!root.datePublished.match( + /^\d{4}-?([0]\d|1[0-2])?-?([0-2]\d|3[01])?/ + )) + ) { + this.result.errors.push({ + entity: root['@id'], + message: `datePublished does not start with a compliant date in this format: YYYY, YYYY-MM or YYYY-MM-DD`, + clause: `datePublished: MUST be a string in [ISO 8601 date format][DateTime] and SHOULD be specified to at least the precision of a day, MAY be a timestamp down to the millisecond.`, + }); + } +} + + + + async validate() { + await this.hasContext(); + this.rootDataEntity(); + + } +} + +module.exports = { Validator }; diff --git a/test/validator.spec.js b/test/validator.spec.js new file mode 100644 index 0000000..4a0c100 --- /dev/null +++ b/test/validator.spec.js @@ -0,0 +1,172 @@ +/* This is part of Calcyte a tool for implementing the DataCrate data packaging +spec. Copyright (C) 2018 University of Technology Sydney + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + + + +/* +The Root Data Entity MUST have the following properties: ["@type", "@id", "name", "description", "datePublished", "license"] + +@type: MUST be [Dataset] or an array that contain Dataset +@id: SHOULD be the string ./ or an absolute URI (see below) +name: SHOULD identify the dataset to humans well enough to disambiguate it from other RO-Crates +description: SHOULD further elaborate on the name to provide a summary of the context in which the dataset is important. +datePublished: MUST be a string in [ISO 8601 date format][DateTime] and SHOULD be specified to at least the precision of a day, MAY be a timestamp down to the millisecond. +license: SHOULD link to a Contextual Entity or Data Entity in the RO-Crate Metadata Document with a name and description (see section on licensing). MAY, if necessary be a textual description of how the RO-Crate may be used. + + + + +*/ + + +const assert = require("assert"); +const {Validator} = require("../lib/validator"); +const chai = require("chai"); +chai.use(require("chai-fs")); +const defaults = require("../lib/defaults"); +const {ROCrate} = require("../lib/rocrate"); + +function hasClause(results, rule, id) { + if (id) { + results.some((r) => r.clause === rule.clause && rule.entity === id); + } + return results.some((r) => r.clause === rule.clause); +} + +function hasMessage(results, message, id) { + if (id) { + return results.some((r) => r.message === message && r.entity === id); + } + return results.some((r) => r.message === message); +} + + + +describe("Incremental checking", async function () { + it("should trigger all the right reporting", async function () { + var validator = new Validator("THIS IS NOT JSON IT IS A STRING"); + assert(validator.result.errors[0].message === `Crate is not JSON: SyntaxError: Unexpected token 'T', "THIS IS NO"... is not valid JSON`) + assert(validator.crate === null) + + var validator = new Validator(JSON.stringify({"Something" : ["THIS IS JSON but RO-Crate will not like it one bit"]})); + // TODO -- Actually - RO Crate does not care -- need to add some more validation :) + + this.timeout(10000); + var crate = new ROCrate(); + var json = crate.toJSON(); // should be a minimal viable datacrate + json["@context"] = []; + var validator = new Validator(json); + await validator.hasContext(); + console.log(validator.result) + assert(validator.result.errors.length === 0) + assert(validator.result.warnings[0].message === "There is no reference to an 'official' RO-Crate @context"); + + + // Now with context + json["@context"] = defaults.context; + var validator = new Validator(json); + + discoverInParentDirExactMatch; + + + // Don't have a dataset tho yet + + + // Check that the RootDatset exists + var crate = new ROCrate(); + crate.rootDataset["@id"] = "Nothing special"; + var validator = new Validator(crate.toJSON()); + validator.rootDataEntity() + + assert(hasMessage(validator.result.warnings, "Root Data Entity has appropriate @id. Is: Nothing special", "Nothing special")) + + + // Check that the Root Data Entity has the right @type + var crate = new ROCrate(); + crate.rootDataset["@type"] = ["Nothing", "Special"]; + var validator = new Validator(crate.toJSON()); + validator.rootDataEntity(); + assert(validator.result.errors[0].clause === "@type: MUST be [Dataset] or an array that contain Dataset"); + + // Check that the Root Data Entity has the right Type -- change the context so it doesn't + var crate = new ROCrate(); + crate.addContext({"Dataset": "some:dodgy-definiton-of-dataset"}); + var validator = new Validator(crate.toJSON()); + validator.rootDataEntity(); + assert(validator.result.errors[0].clause === "@type: MUST be [Dataset] or an array that contain Dataset"); + + + // Check required props on Root Data Entity + var crate = new ROCrate(); + var validator = new Validator(crate.toJSON()); + validator.rootDataEntity(); + assert(validator.result.errors.length === 4); + assert(hasMessage(validator.result.errors, "Missing required property: license")) + assert(hasMessage(validator.result.errors, "Missing required property: name")) + assert(hasMessage(validator.result.errors, "Missing required property: description")) + assert(hasMessage(validator.result.errors, "Missing required property: datePublished")) + + + // Check required props on Root Data Entity are properly defined -- and if the context is wrong then they are not + var crate = new ROCrate(); + var validator = new Validator(crate.toJSON()); + + crate.addContext({ + "name": "some:dodgy-definiton-of-name", + "license": "some:dodgy-definiton-of-license", + "description": "some:dodgy-definiton-of-description", + "datePublished": "some:dodgy-definiton-of-name" + + }); + crate.rootDataset.name = "name"; + crate.rootDataset.description = "description"; + crate.rootDataset.license = "license"; + crate.rootDataset.datePublished = "1983"; + validator.rootDataEntity(); + + assert(validator.result.errors.length === 4); + assert(hasMessage(validator.result.errors, "Missing required property: license")) + assert(hasMessage(validator.result.errors, "Missing required property: name")) + assert(hasMessage(validator.result.errors, "Missing required property: description")) + assert(hasMessage(validator.result.errors, "Missing required property: datePublished")) + + + // Check required props on Root Data Entity are properly defined + var crate = new ROCrate(); + crate.rootDataset.name = "name"; + crate.rootDataset.description = "description"; + crate.rootDataset.license = "bad license"; + crate.rootDataset.datePublished = "1983"; + validator.rootDataEntity(); + var validator = new Validator(crate.toJSON()); + + assert(validator.result.errors.length === 0); + + + + + + + + + + }); +}); + +after(function () { + //TODO: destroy test repoPath +}); From b6e4cbe6e4f6cba161e10893c0f851c136c6031a Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Thu, 21 Sep 2023 16:06:59 +1000 Subject: [PATCH 03/14] Put back old checker code --- lib/checker.js | 67 +++++++++++++++++----------- test/checker.spec.js | 101 ++++++++++++++++++++++++++++--------------- 2 files changed, 108 insertions(+), 60 deletions(-) diff --git a/lib/checker.js b/lib/checker.js index ea10f59..0af91ea 100644 --- a/lib/checker.js +++ b/lib/checker.js @@ -98,7 +98,7 @@ class Checker { const root = this.crate.rootDataset; return new CheckItem({ name: 'Root dataset has appropriate @id', - message: `The root dataset is './' `, + message: `The root dataset @id ends in "/"`, status: !!(root && root['@id'].endsWith('/')) }); } @@ -112,45 +112,63 @@ class Checker { }); } - - hasLicense() { - const root = this.crate.rootDataset; - const licenses = Utils.asArray(root?.license); - const check = new CheckItem({ - name: 'Has a license', - type: "ERROR", - message: "The Root dataset has a license property", - status: !!(licenses.length > 0 || false) - }); - - return check; - - } - hasDescription() { const root = this.crate.rootDataset; return new CheckItem({ name: 'Has description', message: 'The root Dataset has a description (http://schema.org/description)', - status: !!(root && root.description && root.description.length > 0), - type: "ERROR" + status: !!(root && root.description && root.description.length > 0) + }); + } + + hasAuthor() { + const root = this.crate.rootDataset; + const authors = Utils.asArray(root?.author).map(a => this.crate.getEntity(a['@id'])); + return new CheckItem({ + name: 'Has valid Authors', + message: 'The root Dataset has at least one Author (http://schema.org/author) referred to by @id, and all authors have @type Person (http://schema.org/Person) or Organization (http://schema.org/Organization)', + status: (authors.length > 0) && authors.every(a => includesTextCI(a?.['@type'], ['Person', 'Organization'])) }); } + hasLicense() { + const root = this.crate.rootDataset; + const licenses = Utils.asArray(root?.license).map(l => this.crate.getEntity(l['@id'])); + return new CheckItem({ + name: 'Has a license ', + message: 'The root Dataset has a License' + + licenses.map(license => license && license.name && license.description && + includesTextCI(license['@type'], 'CreativeWork') ? + ' (the license is a Creative Work with a name and description as it SHOULD be)' : '' + ).join(''), + status: (licenses.length > 0) + }); + } hasDatePublished() { const root = this.crate.rootDataset; var date = Utils.asArray(root?.datePublished); return new CheckItem({ name: 'Has a datePublished ', - message: 'The root Dataset has a datePublished with ONE value which is an ISO 8601 format', + message: 'The root Dataset has a datePublished with ONE value which is an ISO 8601 format precision of at least a day', diagnostics: date.length === 1 ? '' : `Number of datePublished values is ${date.length} NOT 1`, - status: !!(date.length === 1 && date[0].match(/^\d{4}-?([0]\d|1[0-2])?-?([0-2]\d|3[01])?/)), - type: "ERROR" + status: !!(date.length === 1 && date[0]?.match(/^\d{4}-([0]\d|1[0-2])-([0-2]\d|3[01])/)) }); } + hasContactPoint() { + const root = this.crate.rootDataset; + var contacts = Utils.asArray(root?.contactPoint).map(c => this.crate.getEntity(c['@id'])); + return new CheckItem({ + name: 'Has a contactPoint', + message: 'The root Dataset has at least one contactPoint property which references a ContactPoint of type Customer Service', + status: contacts.some(contact => contact && contact.email && + Utils.asArray(contact['@type']).includes('ContactPoint') && + Utils.asArray(contact.contactType).includes('customer service')) + }); + } + async check() { var checkNames = methods.filter(n => !(n in { hasContext: 0, hasAuthor: 0, hasContactPoint: 0 })); var context = await this.hasContext(); @@ -171,8 +189,8 @@ class Checker { report() { var report = []; for (var item of this.checklist) { - const tick = item.status ? '✔️ OK' : `❌ ${item.type}`; - report.push(`${tick} ${item.name}: ${item.message} ${item.diagnostics}`); + const tick = item.status ? '✔️' : '❌'; + report.push(`${tick} ${item.name}: ${item.message}`); } return report.join('\n'); } @@ -192,9 +210,8 @@ class CheckItem { constructor(data) { this.name = data.name; this.message = data.message; - this.type = data?.type || "WARNING" this.status = data.status ?? false; - this.diagnostics = data.diagnostics || ""; + if (data.diagnostics) this.diagnostics = data.diagnostics; } } diff --git a/test/checker.spec.js b/test/checker.spec.js index 9ba8252..0046c16 100644 --- a/test/checker.spec.js +++ b/test/checker.spec.js @@ -13,23 +13,6 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -*/ - - - -/* -The Root Data Entity MUST have the following properties: - -@type: MUST be [Dataset] or an array that contain Dataset -@id: SHOULD be the string ./ or an absolute URI (see below) -name: SHOULD identify the dataset to humans well enough to disambiguate it from other RO-Crates -description: SHOULD further elaborate on the name to provide a summary of the context in which the dataset is important. -datePublished: MUST be a string in [ISO 8601 date format][DateTime] and SHOULD be specified to at least the precision of a day, MAY be a timestamp down to the millisecond. -license: SHOULD link to a Contextual Entity or Data Entity in the RO-Crate Metadata Document with a name and description (see section on licensing). MAY, if necessary be a textual description of how the RO-Crate may be used. - - - - */ const assert = require("assert"); @@ -61,52 +44,79 @@ describe("Incremental checking", async function () { //var dataset = crate.getRootDataset(); var dataset = json["@graph"][0]; dataset.name = ""; - - var checker = new Checker(new ROCrate(json)); assert(!checker.hasName().status, "Does not have a name"); dataset.name = "Name!"; var checker = new Checker(new ROCrate(json)); assert(checker.hasName().status, "Does have a name"); + assert(!checker.hasAuthor().status, "Does not have author"); + // Author + var author1 = { + "@id": "http://orcid.org/some-orcid", + name: "Some Person", + }; + dataset.author = [{ "@id": "http://orcid.org/some-orcid" }]; + json["@graph"].push(author1); + var checker = new Checker(new ROCrate(json)); + assert( + !checker.hasAuthor().status, + "Does not have one or more authors with @type Person or Organization" + ); + // One good author and one dodgy one + var author2 = { + "@id": "http://orcid.org/some-other-orcid", + name: "Some Person", + "@type": "Person", + }; + dataset.author = [{ "@id": "http://orcid.org/some-orcid" }, { "@id": "http://orcid.org/some-other-orcid" }]; + json["@graph"].push(author1, author2); var checker = new Checker(new ROCrate(json)); - assert(!checker.hasDescription().status, "Does not have a description"); - dataset.description = "Description!"; + assert( + !checker.hasAuthor().status, + "Does not have one or more authors with @type Person or Organization" + ); + // One good author + dataset.author = [author2]; + json["@graph"] = [ + defaults.metadataFileDescriptorTemplate, + dataset, + author2, + ]; var checker = new Checker(new ROCrate(json)); - assert(checker.hasName().status, "Does have a description"); + assert( + checker.hasAuthor().status, + "Does have a author with @type Person or Organization" + ); // License // No name, description - console.log(checker.hasLicense()); assert( !checker.hasLicense().status, - "Has a license" + "Does not have a license with @type CreativeWork" ); - var license = { "@id": "http://example.com/some_kind_of_license", "@type": "CreativeWork", URL: "http://example.com/some_kind_of_license", }; dataset.license = { "@id": license["@id"] }; - json["@graph"].push(license); crate = new ROCrate(json); var checker = new Checker(crate); assert( checker.hasLicense().status, - "Has a license" + "Has a license with @type CreativeWork" ); - license.name = "Some license"; license.description = "Description of at least 20 characters."; assert( checker.hasLicense().status, - "Has a license" + "Does have a license with @type CreativeWork and a name and description" ); // datePublished @@ -114,14 +124,13 @@ describe("Incremental checking", async function () { !checker.hasDatePublished().status, "Does not have a datePublished" ); - - crate.rootDataset.datePublished = "2017"; // Not enough detail! assert( - checker.hasDatePublished().status, + !checker.hasDatePublished().status, + "Does not have a datePublished (not enough detail)" ); - crate.rootDataset.datePublished = ["2017-07-21", "2019-08-09"]; + crate.rootDataset.datePublished = ["2017-07-21", "2019-08-09"]; // this should do it assert( !checker.hasDatePublished().status, "Does not have a single datePublished" @@ -130,10 +139,32 @@ describe("Incremental checking", async function () { crate.rootDataset.datePublished = ["2017-07-21"]; // this should do it assert(checker.hasDatePublished().status, "Does have a datePublished"); - + //contactPoint missing + assert( + !checker.hasContactPoint().status, + "Does not have a single contact point" + ); + var contact = { + "@id": "some.email@example.com", + "@type": "ContactPoint", + }; // Not enough + dataset.contactPoint = [{ "@id": "some.email@example.com" }]; + json["@graph"].push(contact); + var checker = new Checker(new ROCrate(json)); + assert( + !checker.hasContactPoint().status, + "Does not have a contact point with enough properties" + ); + contact.contactType = "customer service"; + contact.email = "some@email"; // TODO: Not validated! + var checker = new Checker(new ROCrate(json)); + assert( + checker.hasContactPoint().status, + "Does have a proper contact point" + ); await checker.check(); - console.log(checker.report()); + //console.log(checker.report()); }); }); From 7eed523c8052ea52c639547657f4e01ed4a4977e Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Thu, 21 Sep 2023 16:09:18 +1000 Subject: [PATCH 04/14] Test passing --- lib/validator.js | 3 --- test/validator.spec.js | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/validator.js b/lib/validator.js index 5330c69..5387e8d 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -64,7 +64,6 @@ class Validator { try { this.json = JSON.parse(json); } catch (error) { - console.log(JSON); this.result.errors.push({ message: 'Crate is not JSON: ' + error, }); @@ -76,9 +75,7 @@ class Validator { try { this.crate = new ROCrate(this.json); - console.log('crate', this.crate, 'json', this.json); } catch (error) { - console.log('HAVE AN ERROR', error); this.result.errors.push({ message: 'ROCrate-js can not parse this JSON: ' + String(error), }); diff --git a/test/validator.spec.js b/test/validator.spec.js index 4a0c100..abd8412 100644 --- a/test/validator.spec.js +++ b/test/validator.spec.js @@ -71,7 +71,6 @@ describe("Incremental checking", async function () { json["@context"] = []; var validator = new Validator(json); await validator.hasContext(); - console.log(validator.result) assert(validator.result.errors.length === 0) assert(validator.result.warnings[0].message === "There is no reference to an 'official' RO-Crate @context"); @@ -80,8 +79,6 @@ describe("Incremental checking", async function () { json["@context"] = defaults.context; var validator = new Validator(json); - discoverInParentDirExactMatch; - // Don't have a dataset tho yet @@ -115,6 +112,7 @@ describe("Incremental checking", async function () { var validator = new Validator(crate.toJSON()); validator.rootDataEntity(); assert(validator.result.errors.length === 4); + assert(hasMessage(validator.result.errors, "Missing required property: license")) assert(hasMessage(validator.result.errors, "Missing required property: name")) assert(hasMessage(validator.result.errors, "Missing required property: description")) From 8aacdf449a1c11d8b8ba03ba2f98f49524180d96 Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Fri, 22 Sep 2023 06:20:36 +1000 Subject: [PATCH 05/14] Added some simple JSON-LD checking -- still need to check flatness and compactness --- lib/validator.js | 54 +++++++++++++++++++++++++++++++++--------- test/validator.spec.js | 4 ++++ 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/lib/validator.js b/lib/validator.js index 5387e8d..e39ef30 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -57,8 +57,23 @@ class Validator { } isJSON(json) { + /* + + The RO-Crate Metadata Document MUST contain RO-Crate JSON-LD; a valid [JSON-LD 1.0] document in [flattened] and [compacted] form + The RO-Crate JSON-LD SHOULD use the RO-Crate JSON-LD Context https://w3id.org/ro/crate/1.2-DRAFT/context by reference. + JSON-LD is a structured form of [JSON] that can represent a Linked Data graph. + + A valid RO-Crate JSON-LD graph MUST describe: + + The RO-Crate Metadata Descriptor + The Root Data Entity + Zero or more Data Entities + Zero or more Contextual Entities + */ + if (typeof json === 'object') { this.json = json; + return; } try { @@ -67,6 +82,31 @@ class Validator { this.result.errors.push({ message: 'Crate is not JSON: ' + error, }); + return; + } + var okSoFar = true; + const graph = this.json['@graph']; + if (!graph) { + this.result.errors.push({ + message: 'JSON Object not have a @graph', + clause: + 'The RO-Crate Metadata Document MUST contain RO-Crate JSON-LD; a valid [JSON-LD 1.0] document in [flattened] and [compacted] form', + }); + okSoFar = false; + } + for (let key of Object.keys(this.json)) { + if (!['@graph', '@context'].includes(key)) { + this.result.errors.push({ + message: 'JSON object contains keys other than @graph and @context', + clause: + 'The RO-Crate Metadata Document MUST contain RO-Crate JSON-LD; a valid [JSON-LD 1.0] document in [flattened] and [compacted] form', + }); + okSoFar = false; + } + } + + if (!okSoFar) { + this.json = null; // Don't try to parse this further as our RO-Crate library is overly permissive for this validation task } } @@ -186,10 +226,7 @@ class Validator { } } // Check the date - if ( - root?.datePublished && - root.datePublished.length > 1 - ) { + if (root?.datePublished && root.datePublished.length > 1) { this.result.errors.push({ entity: root['@id'], message: `datePublished must be a string, but multiple values have been supplied`, @@ -200,24 +237,19 @@ class Validator { if ( root?.datePublished && - (!root.datePublished.match( - /^\d{4}-?([0]\d|1[0-2])?-?([0-2]\d|3[01])?/ - )) + !root.datePublished.match(/^\d{4}-?([0]\d|1[0-2])?-?([0-2]\d|3[01])?/) ) { this.result.errors.push({ entity: root['@id'], message: `datePublished does not start with a compliant date in this format: YYYY, YYYY-MM or YYYY-MM-DD`, clause: `datePublished: MUST be a string in [ISO 8601 date format][DateTime] and SHOULD be specified to at least the precision of a day, MAY be a timestamp down to the millisecond.`, }); + } } -} - - async validate() { await this.hasContext(); this.rootDataEntity(); - } } diff --git a/test/validator.spec.js b/test/validator.spec.js index abd8412..b627def 100644 --- a/test/validator.spec.js +++ b/test/validator.spec.js @@ -57,6 +57,7 @@ function hasMessage(results, message, id) { describe("Incremental checking", async function () { + it("should trigger all the right reporting", async function () { var validator = new Validator("THIS IS NOT JSON IT IS A STRING"); assert(validator.result.errors[0].message === `Crate is not JSON: SyntaxError: Unexpected token 'T', "THIS IS NO"... is not valid JSON`) @@ -64,7 +65,10 @@ describe("Incremental checking", async function () { var validator = new Validator(JSON.stringify({"Something" : ["THIS IS JSON but RO-Crate will not like it one bit"]})); // TODO -- Actually - RO Crate does not care -- need to add some more validation :) + assert(hasMessage(validator.result.errors, "JSON Object not have a @graph")) + assert(hasMessage(validator.result.errors, "JSON object contains keys other than @graph and @context")) + assert(validator.result.errors.length === 2) this.timeout(10000); var crate = new ROCrate(); var json = crate.toJSON(); // should be a minimal viable datacrate From b3649004030b56ff88c58c73c339a7b7a13b9ee9 Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Wed, 27 Sep 2023 09:15:59 +1000 Subject: [PATCH 06/14] First go at files validation --- lib/validator.js | 39 ++++++++++++++++++++++++++++++++----- package-lock.json | 3 ++- package.json | 4 +++- roc-validate.js | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 7 deletions(-) create mode 100755 roc-validate.js diff --git a/lib/validator.js b/lib/validator.js index e39ef30..4071157 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -73,7 +73,6 @@ class Validator { if (typeof json === 'object') { this.json = json; - return; } try { @@ -88,7 +87,7 @@ class Validator { const graph = this.json['@graph']; if (!graph) { this.result.errors.push({ - message: 'JSON Object not have a @graph', + message: 'JSON Object does not have a @graph', clause: 'The RO-Crate Metadata Document MUST contain RO-Crate JSON-LD; a valid [JSON-LD 1.0] document in [flattened] and [compacted] form', }); @@ -114,12 +113,41 @@ class Validator { if (!this.json) return; try { - this.crate = new ROCrate(this.json); + this.crate = new ROCrate(this.json, {array: true,link: true}); + console.log("got crate") } catch (error) { this.result.errors.push({ message: 'ROCrate-js can not parse this JSON: ' + String(error), }); } + } + checkFiles(files) { + + for (let file of Object.keys(files)) { + if (!this.crate.getEntity(file)) { + this.result.info.push({ + message: `File in crate directory is not included in the crate: ${file}` + }) + files[file].inCrate = false + } else { + files[file].inCrate = true + } + } + /* + for (let entity of this.crate.entities()) { + if (entity["@type"].includes("File") || entity["@type"].includes("Dataset")) { + if (!files?.[entity["@id"]]) { + files[entity["@id"]] = { + exists: false, + inCrate: true + } + } + } + + } + */ + + } async hasContext() { if (!this.json || !this.crate) return; @@ -229,7 +257,7 @@ class Validator { if (root?.datePublished && root.datePublished.length > 1) { this.result.errors.push({ entity: root['@id'], - message: `datePublished must be a string, but multiple values have been supplied`, + message: `datePublished must be a string, but multiple values have been supplied: ${root.datePublished} `, clause: `datePublished: MUST be a string in [ISO 8601 date format][DateTime] and SHOULD be specified to at least the precision of a day, MAY be a timestamp down to the millisecond.`, }); } @@ -237,7 +265,7 @@ class Validator { if ( root?.datePublished && - !root.datePublished.match(/^\d{4}-?([0]\d|1[0-2])?-?([0-2]\d|3[01])?/) + !root.datePublished[0].match(/^\d{4}-?([0]\d|1[0-2])?-?([0-2]\d|3[01])?/) ) { this.result.errors.push({ entity: root['@id'], @@ -250,6 +278,7 @@ class Validator { async validate() { await this.hasContext(); this.rootDataEntity(); + return true; } } diff --git a/package-lock.json b/package-lock.json index 4b6ed20..7cf4859 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,7 +13,8 @@ "cross-fetch": "^3.1.5" }, "bin": { - "roccheck": "roc-checker.js" + "roccheck": "roc-checker.js", + "rocval": "roc-validate.js" }, "devDependencies": { "@types/mocha": "^10.0.1", diff --git a/package.json b/package.json index 0cc6437..626a98b 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,9 @@ "test-node18": "docker run -v `pwd`:`pwd` -w `pwd` node:18-alpine npm test" }, "bin": { - "roccheck": "roc-checker.js" + "roccheck": "roc-checker.js", + "rocval": "roc-validate.js" + }, "repository": { "type": "git", diff --git a/roc-validate.js b/roc-validate.js new file mode 100755 index 0000000..2faf12f --- /dev/null +++ b/roc-validate.js @@ -0,0 +1,49 @@ +#!/usr/bin/env node + + +const program = require('commander'); +const fs = require('fs/promises'); +const path = require('path'); +const {ROCrate} = require("./lib/rocrate"); +const {Validator} = require("./lib/validator"); + +var crateDir; + +program + .version("0.1.0") + .description( + "Runs a minimal RO-Crate validation" + ) + .arguments("") + .action((dir) => { crateDir = dir }) + + + +program.parse(process.argv); +const outPath = program.outputPath ? program.outputPath : crateDir; + + + +async function main() { + const rawJson = await fs.readFile(path.join(crateDir, "ro-crate-metadata.json"), 'utf8'); + + const validator = new Validator(rawJson); + await validator.validate() + + const files = await fs.readdir(crateDir, {recursive: true}) + const filesObj = Object.fromEntries(files.map(value => [value, {exists: true, inCrate: false}])) + console.log(filesObj) + + validator.checkFiles(filesObj); + console.log(console.log(validator.result.warnings)); + + var csvString = "file,exists,inCrate\n" + for (let key of Object.keys(filesObj)) { + csvString += `"${key.replace(/([,"])/g, "$1$1")}",${filesObj[key].exists},${filesObj[key].inCrate}\n` + } + fs.writeFile("file-summary.csv", csvString) + +} + +main(); + From 01ab6bf81a5a41664b0ed15529cad59983011aa6 Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Thu, 28 Sep 2023 09:43:16 +1000 Subject: [PATCH 07/14] Basic commandline working --- lib/validator.js | 39 ++++++++++++++++++--------- roc-validate.js | 64 +++++++++++++++++++++++--------------------- validation/README.md | 15 +++++++++++ 3 files changed, 76 insertions(+), 42 deletions(-) create mode 100644 validation/README.md diff --git a/lib/validator.js b/lib/validator.js index 4071157..1cc2eb3 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -44,7 +44,7 @@ class Validator { * * @param {ROCrate} crate */ - constructor(json) { + constructor() { this.result = { errors: [], warnings: [], @@ -52,6 +52,9 @@ class Validator { }; this.json = null; this.crate = null; + } + + parseJSON(json) { this.isJSON(json); this.isCrate(); } @@ -114,41 +117,53 @@ class Validator { try { this.crate = new ROCrate(this.json, {array: true,link: true}); - console.log("got crate") } catch (error) { this.result.errors.push({ message: 'ROCrate-js can not parse this JSON: ' + String(error), }); } } - checkFiles(files) { - for (let file of Object.keys(files)) { + // Audit the files to see whether all the files in the crate are present and list those that are on disk but NOT in the crate + checkFiles(fileReferences, crate) { + if(crate) { + // Passing in a new crate + this.crate = crate; + } + // Check all the files that an external process has found by talking to the file system + for (let file of Object.keys(fileReferences)) { if (!this.crate.getEntity(file)) { this.result.info.push({ - message: `File in crate directory is not included in the crate: ${file}` + entity: file, + message: `Path in crate directory does not have a corresponding Data Entity in the crate` }) - files[file].inCrate = false + fileReferences[file].inCrate = false } else { - files[file].inCrate = true + fileReferences[file].inCrate = true } } - /* + for (let entity of this.crate.entities()) { if (entity["@type"].includes("File") || entity["@type"].includes("Dataset")) { - if (!files?.[entity["@id"]]) { - files[entity["@id"]] = { + if (!fileReferences?.[entity["@id"]]) { + fileReferences[entity["@id"]] = { exists: false, inCrate: true } + if (!isValidUrl(entity["@id"])) { + this.result.warnings.push({ + entity: entity["@id"], + message: `Data Entity in crate is not included in the crate directory` + }) } } } - */ - + + } } + async hasContext() { if (!this.json || !this.crate) return; diff --git a/roc-validate.js b/roc-validate.js index 2faf12f..952c899 100755 --- a/roc-validate.js +++ b/roc-validate.js @@ -1,49 +1,53 @@ #!/usr/bin/env node - const program = require('commander'); const fs = require('fs/promises'); const path = require('path'); -const {ROCrate} = require("./lib/rocrate"); -const {Validator} = require("./lib/validator"); +const { ROCrate } = require('./lib/rocrate'); +const { Validator } = require('./lib/validator'); var crateDir; program - .version("0.1.0") - .description( - "Runs a minimal RO-Crate validation" + .version('0.1.0') + .description('Runs a minimal RO-Crate validation') + .option('-f, --files ', + 'Path to a csv file into which the tool will write a summary of which files are in the crate directory and mentioned in the crate.', ) - .arguments("") - .action((dir) => { crateDir = dir }) - - + .arguments('') + .action((dir) => { + crateDir = dir; + }) program.parse(process.argv); const outPath = program.outputPath ? program.outputPath : crateDir; - - async function main() { - const rawJson = await fs.readFile(path.join(crateDir, "ro-crate-metadata.json"), 'utf8'); - - const validator = new Validator(rawJson); - await validator.validate() - - const files = await fs.readdir(crateDir, {recursive: true}) - const filesObj = Object.fromEntries(files.map(value => [value, {exists: true, inCrate: false}])) - console.log(filesObj) - - validator.checkFiles(filesObj); - console.log(console.log(validator.result.warnings)); - - var csvString = "file,exists,inCrate\n" - for (let key of Object.keys(filesObj)) { - csvString += `"${key.replace(/([,"])/g, "$1$1")}",${filesObj[key].exists},${filesObj[key].inCrate}\n` + const rawJson = await fs.readFile( + path.join(crateDir, 'ro-crate-metadata.json'), + 'utf8' + ); + const validator = new Validator(); + validator.parseJSON(rawJson); + await validator.validate(); + + if (program.files) { + const files = await fs.readdir(crateDir, { recursive: true }); + // Initialise a files object which has all the files found in the crate + const filesObj = Object.fromEntries( + files.map((value) => [value, { exists: true, inCrate: false }]) + ); + validator.checkFiles(filesObj); + + var csvString = 'file,exists,inCrate\n'; + for (let key of Object.keys(filesObj)) { + csvString += `"${key.replace(/([,"])/g, '$1$1')}",${ + filesObj[key].exists + },${filesObj[key].inCrate}\n`; + } + fs.writeFile(program.files, csvString); } - fs.writeFile("file-summary.csv", csvString) - + console.log(validator.result) } main(); - diff --git a/validation/README.md b/validation/README.md new file mode 100644 index 0000000..0bddf5a --- /dev/null +++ b/validation/README.md @@ -0,0 +1,15 @@ +# Validation tools + +RO-Crate-js has a basic RO-Crate validation tool as part of the library. RO-Crate is a very relaxed and permissive specification, so there are only a few requirements that count as validation errors - though the validator will also issue some warnings and some summary information. + +## Install + +To use validation from the command line. + +- Clone this repository +- Install: `npm install .` +- Enable the scripts: `npm link .` + +## Usage + + From 92a2ad03ab6159e4c3b136b4c13365166d042d23 Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Thu, 28 Sep 2023 10:05:13 +1000 Subject: [PATCH 08/14] Added README --- README.md | 1 + validation/README.md | 47 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cd3b799..5f5a937 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Install the library: - [**API documentation**](https://arkisto-platform.github.io/ro-crate-js/) - [**ROCrate documentation and specification**](https://www.researchobject.org/ro-crate/) +- [**Validate Crates**](validation) NEW!!! -- there is a new (2023-09) Validator class in development, we have a rudimentary command line interface to this -- documentation for how to include this as a library will follow soon. ## Usage diff --git a/validation/README.md b/validation/README.md index 0bddf5a..2737563 100644 --- a/validation/README.md +++ b/validation/README.md @@ -1,6 +1,6 @@ # Validation tools -RO-Crate-js has a basic RO-Crate validation tool as part of the library. RO-Crate is a very relaxed and permissive specification, so there are only a few requirements that count as validation errors - though the validator will also issue some warnings and some summary information. +RO-Crate-js has a basic RO-Crate validation tool as part of the library. RO-Crate is a very relaxed and permissive specification| so there are only a few requirements that count as validation errors - though the validator will also issue some warnings and some summary information. ## Install @@ -12,4 +12,49 @@ To use validation from the command line. ## Usage +To see the usage info: + +``` +>> rocval --help +Usage: rocval [options] + +Runs a minimal RO-Crate validation + +Options: + -V, --version output the version number + -f, --files Path to a csv file into which the tool will write a summary of which files are in the crate directory and mentioned in the crate. + -h, --help output usage information + + +``` + +To get a summary CSV of which data entites are included or not included in the RO-Crate directory, type: + +``` +rocval -f file-summary.csv path/to/crate/ +``` + +The CSV file has three columns. + +1. `file`: is a path relative to the RO-Crate Root +2. `exists`: is a boolean with value `true` if the path exists on the file system and `false` if it does not +3. `inCrate`: is a boolean with value `true` if the path is the `@id` of and RO-Crate Data Entity and `false` if it is not. + +The following example shows the three possible combinations of these. + +The first is a directory that is on the file system but is not mentioned in the crate as a `Dataset` the second is a file which is present in the crate but not described. Neither of these is an error but this report might be of use in analysing crates -- maybe you do want to describe the `.sql` files in a crate or the directory they are in but forgot. + +The fourth line shows a file which IS included as a Data Entity in the crate but is not included in the crate directory. In most cases this would be considered an error. + +| file | exists | inCrate | +| --- | ---- | --- | +|.DS_Store | true | false | +ohrm/web/sql|true|false +ohrm/web/sql/EMELarc2013.sql|true|false +objects/thumbs/tn_BAPT_001_L.jpg|true|true +objects/thumbs/PaperCityImages/tn_PaperCity_Civic_Crook.png|false|true + + + + From 52e67494007a5d81083d257940feeb0cca6febb6 Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Thu, 28 Sep 2023 10:07:09 +1000 Subject: [PATCH 09/14] Anonoymised examples --- validation/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/validation/README.md b/validation/README.md index 2737563..2abff70 100644 --- a/validation/README.md +++ b/validation/README.md @@ -49,10 +49,10 @@ The fourth line shows a file which IS included as a Data Entity in the crate but | file | exists | inCrate | | --- | ---- | --- | |.DS_Store | true | false | -ohrm/web/sql|true|false -ohrm/web/sql/EMELarc2013.sql|true|false -objects/thumbs/tn_BAPT_001_L.jpg|true|true -objects/thumbs/PaperCityImages/tn_PaperCity_Civic_Crook.png|false|true +sql|true|false +sql/XYZ.sql|true|false +objects/thumbs/3453.jpg|true|true +objects/thumbs/G899/56h.png|false|true From 5a2eb4ea56c761e2cf3e5b58d8e3f6beeb2b7e69 Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Thu, 28 Sep 2023 21:19:39 +1000 Subject: [PATCH 10/14] Improved README --- roc-validate.js | 19 ++++++++++++++----- validation/README.md | 11 +++++------ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/roc-validate.js b/roc-validate.js index 952c899..473e7b4 100755 --- a/roc-validate.js +++ b/roc-validate.js @@ -11,9 +11,12 @@ var crateDir; program .version('0.1.0') .description('Runs a minimal RO-Crate validation') - .option('-f, --files ', - 'Path to a csv file into which the tool will write a summary of which files are in the crate directory and mentioned in the crate.', + .option('-f, --files-path ', + 'Path to a csv file into which the tool will write a summary of which files are in the crate directory and mentioned in the crate', ) + .option('-r, --report-path ', + 'Path to a JSON file into which the tool will write a json file containing errors, warnings and info' +) .arguments('') .action((dir) => { crateDir = dir; @@ -31,7 +34,7 @@ async function main() { validator.parseJSON(rawJson); await validator.validate(); - if (program.files) { + if (program.filesPath) { const files = await fs.readdir(crateDir, { recursive: true }); // Initialise a files object which has all the files found in the crate const filesObj = Object.fromEntries( @@ -45,9 +48,15 @@ async function main() { filesObj[key].exists },${filesObj[key].inCrate}\n`; } - fs.writeFile(program.files, csvString); + await fs.writeFile(program.filesPath, csvString); + } + if (program.reportPath) { + + await fs.writeFile(program.reportPath, JSON.stringify(validator.result, null, 2)); + + } else { + //console.log(SON.stringify(validator.result, null, 2)) } - console.log(validator.result) } main(); diff --git a/validation/README.md b/validation/README.md index 2abff70..6c3bf76 100644 --- a/validation/README.md +++ b/validation/README.md @@ -15,17 +15,16 @@ To use validation from the command line. To see the usage info: ``` ->> rocval --help + % rocval --help Usage: rocval [options] Runs a minimal RO-Crate validation Options: - -V, --version output the version number - -f, --files Path to a csv file into which the tool will write a summary of which files are in the crate directory and mentioned in the crate. - -h, --help output usage information - - + -V, --version output the version number + -f, --files-path Path to a csv file into which the tool will write a summary of which files are in the crate directory and mentioned in the crate + -r, --report-path Path to a JSON file into which the tool will write a json file containing errors, warnings and info + -h, --help output usage information ``` To get a summary CSV of which data entites are included or not included in the RO-Crate directory, type: From 4b67b5747969e99fce8efe29fba6b6767368a8b2 Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Wed, 4 Oct 2023 12:56:34 +1100 Subject: [PATCH 11/14] Fixed tests --- test/validator.spec.js | 215 +++++++++++++++++++++++++---------------- 1 file changed, 130 insertions(+), 85 deletions(-) diff --git a/test/validator.spec.js b/test/validator.spec.js index b627def..d0460da 100644 --- a/test/validator.spec.js +++ b/test/validator.spec.js @@ -15,8 +15,6 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ - - /* The Root Data Entity MUST have the following properties: ["@type", "@id", "name", "description", "datePublished", "license"] @@ -32,13 +30,12 @@ license: SHOULD link to a Contextual Entity or Data Entity in the RO-Crate Metad */ - -const assert = require("assert"); -const {Validator} = require("../lib/validator"); -const chai = require("chai"); -chai.use(require("chai-fs")); -const defaults = require("../lib/defaults"); -const {ROCrate} = require("../lib/rocrate"); +const assert = require('assert'); +const { Validator } = require('../lib/validator'); +const chai = require('chai'); +chai.use(require('chai-fs')); +const defaults = require('../lib/defaults'); +const { ROCrate } = require('../lib/rocrate'); function hasClause(results, rule, id) { if (id) { @@ -51,121 +48,169 @@ function hasMessage(results, message, id) { if (id) { return results.some((r) => r.message === message && r.entity === id); } + return results.some((r) => r.message === message); } - - -describe("Incremental checking", async function () { - - it("should trigger all the right reporting", async function () { - var validator = new Validator("THIS IS NOT JSON IT IS A STRING"); - assert(validator.result.errors[0].message === `Crate is not JSON: SyntaxError: Unexpected token 'T', "THIS IS NO"... is not valid JSON`) - assert(validator.crate === null) - - var validator = new Validator(JSON.stringify({"Something" : ["THIS IS JSON but RO-Crate will not like it one bit"]})); +describe('Incremental checking', async function () { + it('should trigger all the right reporting', async function () { + var validator = new Validator(); + validator.parseJSON('THIS IS NOT JSON IT IS A STRING'); + assert( + validator.result.errors[0].message === + `Crate is not JSON: SyntaxError: Unexpected token 'T', "THIS IS NO"... is not valid JSON` + ); + assert(validator.crate === null); + + var validator = new Validator(); + validator.parseJSON( + JSON.stringify({ + Something: ['THIS IS JSON but RO-Crate will not like it one bit'], + }) + ); // TODO -- Actually - RO Crate does not care -- need to add some more validation :) - assert(hasMessage(validator.result.errors, "JSON Object not have a @graph")) - assert(hasMessage(validator.result.errors, "JSON object contains keys other than @graph and @context")) - - assert(validator.result.errors.length === 2) + assert( + hasMessage(validator.result.errors, 'JSON Object does not have a @graph') + ); + assert( + hasMessage( + validator.result.errors, + 'JSON object contains keys other than @graph and @context' + ) + ); + + assert(validator.result.errors.length === 2); this.timeout(10000); var crate = new ROCrate(); var json = crate.toJSON(); // should be a minimal viable datacrate - json["@context"] = []; - var validator = new Validator(json); + json['@context'] = []; + var validator = new Validator(); + validator.parseJSON(json) await validator.hasContext(); - assert(validator.result.errors.length === 0) - assert(validator.result.warnings[0].message === "There is no reference to an 'official' RO-Crate @context"); - + assert(validator.result.errors.length === 0); + assert( + validator.result.warnings[0].message === + "There is no reference to an 'official' RO-Crate @context" + ); // Now with context - json["@context"] = defaults.context; - var validator = new Validator(json); - + json['@context'] = defaults.context; + var validator = new Validator(); + validator.parseJSON(json); // Don't have a dataset tho yet - // Check that the RootDatset exists var crate = new ROCrate(); - crate.rootDataset["@id"] = "Nothing special"; - var validator = new Validator(crate.toJSON()); - validator.rootDataEntity() + crate.rootDataset['@id'] = 'Nothing special'; + var validator = new Validator(); + validator.parseJSON(crate.toJSON()); + validator.rootDataEntity(); - assert(hasMessage(validator.result.warnings, "Root Data Entity has appropriate @id. Is: Nothing special", "Nothing special")) - + assert( + hasMessage( + validator.result.warnings, + 'Root Data Entity has appropriate @id. Is: Nothing special', + 'Nothing special' + ) + ); // Check that the Root Data Entity has the right @type var crate = new ROCrate(); - crate.rootDataset["@type"] = ["Nothing", "Special"]; - var validator = new Validator(crate.toJSON()); + crate.rootDataset['@type'] = ['Nothing', 'Special']; + var validator = new Validator(); + validator.parseJSON(crate.toJSON()); validator.rootDataEntity(); - assert(validator.result.errors[0].clause === "@type: MUST be [Dataset] or an array that contain Dataset"); - + assert( + validator.result.errors[0].clause === + '@type: MUST be [Dataset] or an array that contain Dataset' + ); + // Check that the Root Data Entity has the right Type -- change the context so it doesn't var crate = new ROCrate(); - crate.addContext({"Dataset": "some:dodgy-definiton-of-dataset"}); - var validator = new Validator(crate.toJSON()); + crate.addContext({ Dataset: 'some:dodgy-definiton-of-dataset' }); + var validator = new Validator(); + validator.parseJSON(crate.toJSON()); validator.rootDataEntity(); - assert(validator.result.errors[0].clause === "@type: MUST be [Dataset] or an array that contain Dataset"); - - - // Check required props on Root Data Entity + assert( + validator.result.errors[0].clause === + '@type: MUST be [Dataset] or an array that contain Dataset' + ); + + // Check required props on Root Data Entity var crate = new ROCrate(); - var validator = new Validator(crate.toJSON()); + var validator = new Validator(); + validator.parseJSON(crate.toJSON()); validator.rootDataEntity(); assert(validator.result.errors.length === 4); - assert(hasMessage(validator.result.errors, "Missing required property: license")) - assert(hasMessage(validator.result.errors, "Missing required property: name")) - assert(hasMessage(validator.result.errors, "Missing required property: description")) - assert(hasMessage(validator.result.errors, "Missing required property: datePublished")) - + assert( + hasMessage(validator.result.errors, 'Missing required property: license') + ); + assert( + hasMessage(validator.result.errors, 'Missing required property: name') + ); + assert( + hasMessage( + validator.result.errors, + 'Missing required property: description' + ) + ); + assert( + hasMessage( + validator.result.errors, + 'Missing required property: datePublished' + ) + ); // Check required props on Root Data Entity are properly defined -- and if the context is wrong then they are not var crate = new ROCrate(); - var validator = new Validator(crate.toJSON()); - + var validator = new Validator(); + validator.parseJSON(crate.toJSON()); + crate.addContext({ - "name": "some:dodgy-definiton-of-name", - "license": "some:dodgy-definiton-of-license", - "description": "some:dodgy-definiton-of-description", - "datePublished": "some:dodgy-definiton-of-name" - - }); - crate.rootDataset.name = "name"; - crate.rootDataset.description = "description"; - crate.rootDataset.license = "license"; - crate.rootDataset.datePublished = "1983"; + name: 'some:dodgy-definiton-of-name', + license: 'some:dodgy-definiton-of-license', + description: 'some:dodgy-definiton-of-description', + datePublished: 'some:dodgy-definiton-of-name', + }); + crate.rootDataset.name = 'name'; + crate.rootDataset.description = 'description'; + crate.rootDataset.license = 'license'; + crate.rootDataset.datePublished = '1983'; validator.rootDataEntity(); assert(validator.result.errors.length === 4); - assert(hasMessage(validator.result.errors, "Missing required property: license")) - assert(hasMessage(validator.result.errors, "Missing required property: name")) - assert(hasMessage(validator.result.errors, "Missing required property: description")) - assert(hasMessage(validator.result.errors, "Missing required property: datePublished")) - - + assert( + hasMessage(validator.result.errors, 'Missing required property: license') + ); + assert( + hasMessage(validator.result.errors, 'Missing required property: name') + ); + assert( + hasMessage( + validator.result.errors, + 'Missing required property: description' + ) + ); + assert( + hasMessage( + validator.result.errors, + 'Missing required property: datePublished' + ) + ); + // Check required props on Root Data Entity are properly defined var crate = new ROCrate(); - crate.rootDataset.name = "name"; - crate.rootDataset.description = "description"; - crate.rootDataset.license = "bad license"; - crate.rootDataset.datePublished = "1983"; + crate.rootDataset.name = 'name'; + crate.rootDataset.description = 'description'; + crate.rootDataset.license = 'bad license'; + crate.rootDataset.datePublished = '1983'; validator.rootDataEntity(); - var validator = new Validator(crate.toJSON()); + var validator = new Validator(); + validator.parseJSON(crate.toJSON()); assert(validator.result.errors.length === 0); - - - - - - - - - }); }); From ccdcbaa39d0eaaac3f16a763421d77878f2ee91f Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Mon, 9 Oct 2023 21:49:54 +1100 Subject: [PATCH 12/14] ADded isDir to files report --- lib/validator.js | 22 +++++++++++++++++++++- roc-validate.js | 28 +++++++++++++++------------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/lib/validator.js b/lib/validator.js index 1cc2eb3..a12ca17 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -130,7 +130,27 @@ class Validator { // Passing in a new crate this.crate = crate; } - // Check all the files that an external process has found by talking to the file system + + + for (let entity of this.crate.entities()) { + if (entity["@type"].includes("File") || entity["@type"].includes("Dataset")) { + if (!fileReferences?.[entity["@id"]]) { + fileReferences[entity["@id"]] = { + exists: false, + inCrate: true + } + + if (!isValidUrl(entity["@id"])) { + this.result.warnings.push({ + entity: entity["@id"], + message: `Data Entity in crate is not included in the crate directory` + }) + } + } + + } + } + for (let file of Object.keys(fileReferences)) { if (!this.crate.getEntity(file)) { this.result.info.push({ diff --git a/roc-validate.js b/roc-validate.js index 473e7b4..3f1dd6b 100755 --- a/roc-validate.js +++ b/roc-validate.js @@ -1,7 +1,7 @@ #!/usr/bin/env node const program = require('commander'); -const fs = require('fs/promises'); +const fs = require('fs'); const path = require('path'); const { ROCrate } = require('./lib/rocrate'); const { Validator } = require('./lib/validator'); @@ -26,34 +26,36 @@ program.parse(process.argv); const outPath = program.outputPath ? program.outputPath : crateDir; async function main() { - const rawJson = await fs.readFile( - path.join(crateDir, 'ro-crate-metadata.json'), - 'utf8' + const rawJson = fs.readFileSync( + path.join(crateDir, 'ro-crate-metadata.json') ); const validator = new Validator(); validator.parseJSON(rawJson); await validator.validate(); if (program.filesPath) { - const files = await fs.readdir(crateDir, { recursive: true }); + const files = fs.readdirSync(crateDir, { recursive: true }); // Initialise a files object which has all the files found in the crate + + console.log(files) + const filesObj = Object.fromEntries( - files.map((value) => [value, { exists: true, inCrate: false }]) + files.map((value) => [path.join(value), { exists: true, inCrate: false, isDir: fs.lstatSync(path.join(crateDir, value)).isDirectory() }]) ); + console.log(filesObj); + validator.checkFiles(filesObj); - var csvString = 'file,exists,inCrate\n'; + var csvString = 'file,exists,inCrate,isDir\n'; for (let key of Object.keys(filesObj)) { - csvString += `"${key.replace(/([,"])/g, '$1$1')}",${ + csvString += `"${key.replace(/(["])/g, '$1$1')}",${ filesObj[key].exists - },${filesObj[key].inCrate}\n`; + },${filesObj[key].inCrate},${filesObj[key].isDir}\n`; } - await fs.writeFile(program.filesPath, csvString); + fs.writeFileSync(program.filesPath, csvString); } if (program.reportPath) { - - await fs.writeFile(program.reportPath, JSON.stringify(validator.result, null, 2)); - + fs.writeFileSync(program.reportPath, JSON.stringify(validator.result, null, 2)); } else { //console.log(SON.stringify(validator.result, null, 2)) } From e567d0fc6de942d74af837229501a2c5fa4d52a9 Mon Sep 17 00:00:00 2001 From: Peter Sefton Date: Tue, 10 Oct 2023 21:16:40 +1100 Subject: [PATCH 13/14] Added some tests, and a feature to detect if a file is described in a parent directory --- lib/validator.js | 20 +++++++++++++++++++- roc-validate.js | 36 +++++++++++++++++++++++------------- test/validator.spec.js | 42 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/lib/validator.js b/lib/validator.js index a12ca17..c540d4c 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -131,7 +131,7 @@ class Validator { this.crate = crate; } - + // First check for (let entity of this.crate.entities()) { if (entity["@type"].includes("File") || entity["@type"].includes("Dataset")) { if (!fileReferences?.[entity["@id"]]) { @@ -152,6 +152,10 @@ class Validator { } for (let file of Object.keys(fileReferences)) { + // If we don't know the status of parent directory lets say is is not described + fileReferences[file].dirDescribed = fileReferences[file].dirDescribed || false; + // TODO -- check that type (File or Dir) of entity corresponds with the type of the thing on + if (!this.crate.getEntity(file)) { this.result.info.push({ entity: file, @@ -160,8 +164,22 @@ class Validator { fileReferences[file].inCrate = false } else { fileReferences[file].inCrate = true + if (fileReferences[file].isDir) { + // TODO Check type + + // This directory has an entry in the crate, so mark all its descendents as being described + for (let f of Object.keys(fileReferences)) { + if (f.startsWith(file)) { + fileReferences[f].dirDescribed = true; + } + } + } else { + // TODO Check type + + } } } + for (let entity of this.crate.entities()) { if (entity["@type"].includes("File") || entity["@type"].includes("Dataset")) { diff --git a/roc-validate.js b/roc-validate.js index 3f1dd6b..d39f222 100755 --- a/roc-validate.js +++ b/roc-validate.js @@ -11,16 +11,18 @@ var crateDir; program .version('0.1.0') .description('Runs a minimal RO-Crate validation') - .option('-f, --files-path ', - 'Path to a csv file into which the tool will write a summary of which files are in the crate directory and mentioned in the crate', + .option( + '-f, --files-path ', + 'Path to a csv file into which the tool will write a summary of which files are in the crate directory and mentioned in the crate' + ) + .option( + '-r, --report-path ', + 'Path to a JSON file into which the tool will write a json file containing errors, warnings and info' ) - .option('-r, --report-path ', - 'Path to a JSON file into which the tool will write a json file containing errors, warnings and info' -) .arguments('') .action((dir) => { crateDir = dir; - }) + }); program.parse(process.argv); const outPath = program.outputPath ? program.outputPath : crateDir; @@ -37,25 +39,33 @@ async function main() { const files = fs.readdirSync(crateDir, { recursive: true }); // Initialise a files object which has all the files found in the crate - console.log(files) - const filesObj = Object.fromEntries( - files.map((value) => [path.join(value), { exists: true, inCrate: false, isDir: fs.lstatSync(path.join(crateDir, value)).isDirectory() }]) + files.map((value) => [ + path.join(value), + { + exists: true, + inCrate: false, + isDir: fs.lstatSync(path.join(crateDir, value)).isDirectory(), + }, + ]) ); - console.log(filesObj); validator.checkFiles(filesObj); + //console.log(filesObj) + var csvString = 'file,exists,inCrate,isDir,dirDescribed\n'; - var csvString = 'file,exists,inCrate,isDir\n'; for (let key of Object.keys(filesObj)) { csvString += `"${key.replace(/(["])/g, '$1$1')}",${ filesObj[key].exists - },${filesObj[key].inCrate},${filesObj[key].isDir}\n`; + },${filesObj[key].inCrate},${filesObj[key].isDir},${filesObj[key].dirDescribed}\n`; } fs.writeFileSync(program.filesPath, csvString); } if (program.reportPath) { - fs.writeFileSync(program.reportPath, JSON.stringify(validator.result, null, 2)); + fs.writeFileSync( + program.reportPath, + JSON.stringify(validator.result, null, 2) + ); } else { //console.log(SON.stringify(validator.result, null, 2)) } diff --git a/test/validator.spec.js b/test/validator.spec.js index d0460da..9ec89ea 100644 --- a/test/validator.spec.js +++ b/test/validator.spec.js @@ -214,6 +214,44 @@ describe('Incremental checking', async function () { }); }); -after(function () { - //TODO: destroy test repoPath + + + +describe('File Validation', async function () { + it('should trigger all the right reporting', async function () { + var validator = new Validator(); + + var crate = new ROCrate(); + crate.rootDataset.hasPart = {"@id": "/some/path/to/a/file.txt", "@type": "File"} + + var validator = new Validator(); + validator.parseJSON(crate.toJSON()); + var result = validator.validate(); + var files = {}; // Pretend we have no files + validator.checkFiles(files); + assert(!files["/some/path/to/a/file.txt"].exists) + assert(files["/some/path/to/a/file.txt"].inCrate) + + // Now add a directory + crate.rootDataset.hasPart = {"@id": "/some/path", "@type": "Dataset"} + // And file that aint in it + crate.rootDataset.hasPart = {"@id": "/someother/path/file", "@type": "File"} + + validator.parseJSON(crate.toJSON()); + validator.checkFiles(files); + files["/some/path"].isDir = true; + files["/some/path"].exists = true; + validator.checkFiles(files); + + assert(files["/some/path/to/a/file.txt"].dirDescribed) + assert(!files["/someother/path/file"].dirDescribed) + + + console.log(files); + + }); +}); + + + From 2214b76e48e2c8d5c2082c7e7f359e06f053ea1b Mon Sep 17 00:00:00 2001 From: Moises Sacal Date: Thu, 8 Aug 2024 09:56:42 +1000 Subject: [PATCH 14/14] updated readme, package version and mocha version --- README.md | 2 +- lib/validator.js | 3 +- package-lock.json | 115 +++++++++++++++++++++-------------------- package.json | 4 +- test/validator.spec.js | 2 +- 5 files changed, 64 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 0241379..a767902 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ fs.writeFileSync('ro-crate-metadata.json', JSON.stringify(crate, null, 2)); ``` For more usage examples, see the test files under the [test directory](test). -For more details, refer to the full [API documentation](https://arkisto-platform.github.io/ro-crate-js/). +For more details, refer to the full [API documentation](https://language-research-technology.github.io/ro-crate-js/). ## HTML Rendering diff --git a/lib/validator.js b/lib/validator.js index b3a1dde..24d8791 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -24,7 +24,8 @@ along with this program. If not, see . const defaults = require('./defaults'); const {Utils} = require('./utils'); const {ROCrate} = require('./rocrate'); -require('cross-fetch/polyfill'); // This for old versions of node. +const _g = typeof window === 'object' ? window : (typeof global === 'object' ? global : {}); +const fetch = _g.fetch || require('cross-fetch'); // errors, warnings, info diff --git a/package-lock.json b/package-lock.json index 3b76efb..73b2402 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,19 +1,20 @@ { "name": "ro-crate", - "version": "3.3.5", + "version": "3.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "ro-crate", - "version": "3.3.5", + "version": "3.4.0", "license": "GPL-3.0-or-later", "dependencies": { "commander": "^12.0.0", "cross-fetch": "^4.0.0" }, "bin": { - "roccheck": "roc-checker.js" + "roccheck": "roc-checker.js", + "rocval": "roc-validate.js" }, "devDependencies": { "@types/mocha": "^10.0.1", @@ -22,7 +23,7 @@ "chai": "^4.3.6", "chai-fs": "^2.0.0", "lodash": "^4.17.21", - "mocha": "^10.2.0", + "mocha": "^10.7.0", "typedoc": "^0.24.8", "uuid": "^8.3.2" }, @@ -46,9 +47,9 @@ } }, "node_modules/ansi-colors": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", - "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==", + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", + "integrity": "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==", "dev": true, "engines": { "node": ">=6" @@ -205,12 +206,12 @@ } }, "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dev": true, "dependencies": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" }, "engines": { "node": ">=8" @@ -406,9 +407,9 @@ } }, "node_modules/debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.6.tgz", + "integrity": "sha512-O/09Bd4Z1fBrU4VzkhFqVgpPzaGbw6Sm9FEkBT1A/YBXQFGuuSxa1dN2nxgxS34JmKXqYx8CZAwEVoJFImUXIg==", "dev": true, "dependencies": { "ms": "2.1.2" @@ -487,9 +488,9 @@ } }, "node_modules/diff": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/diff/-/diff-5.0.0.tgz", - "integrity": "sha512-/VTCrvm5Z0JGty/BWHljh+BAiw3IK+2j87NGMu8Nwc/f48WoDAC395uomO9ZD117ZOBaHmkX1oyLvkVM/aIT3w==", + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.0.tgz", + "integrity": "sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==", "dev": true, "engines": { "node": ">=0.3.1" @@ -562,9 +563,9 @@ } }, "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dev": true, "dependencies": { "to-regex-range": "^5.0.1" @@ -1048,9 +1049,9 @@ } }, "node_modules/minimatch": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.0.1.tgz", - "integrity": "sha512-nLDxIFRyhDblz3qMuq+SoRZED4+miJ/G+tdDrjkkkRnjAsBexeGpgjLEQ0blJy7rHhR2b93rhQY4SvyWu9v03g==", + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", + "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", "dev": true, "dependencies": { "brace-expansion": "^2.0.1" @@ -1060,31 +1061,31 @@ } }, "node_modules/mocha": { - "version": "10.4.0", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.4.0.tgz", - "integrity": "sha512-eqhGB8JKapEYcC4ytX/xrzKforgEc3j1pGlAXVy3eRwrtAy5/nIfT1SvgGzfN0XZZxeLq0aQWkOUAmqIJiv+bA==", - "dev": true, - "dependencies": { - "ansi-colors": "4.1.1", - "browser-stdout": "1.3.1", - "chokidar": "3.5.3", - "debug": "4.3.4", - "diff": "5.0.0", - "escape-string-regexp": "4.0.0", - "find-up": "5.0.0", - "glob": "8.1.0", - "he": "1.2.0", - "js-yaml": "4.1.0", - "log-symbols": "4.1.0", - "minimatch": "5.0.1", - "ms": "2.1.3", - "serialize-javascript": "6.0.0", - "strip-json-comments": "3.1.1", - "supports-color": "8.1.1", - "workerpool": "6.2.1", - "yargs": "16.2.0", - "yargs-parser": "20.2.4", - "yargs-unparser": "2.0.0" + "version": "10.7.0", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.7.0.tgz", + "integrity": "sha512-v8/rBWr2VO5YkspYINnvu81inSz2y3ODJrhO175/Exzor1RcEZZkizgE2A+w/CAXXoESS8Kys5E62dOHGHzULA==", + "dev": true, + "dependencies": { + "ansi-colors": "^4.1.3", + "browser-stdout": "^1.3.1", + "chokidar": "^3.5.3", + "debug": "^4.3.5", + "diff": "^5.2.0", + "escape-string-regexp": "^4.0.0", + "find-up": "^5.0.0", + "glob": "^8.1.0", + "he": "^1.2.0", + "js-yaml": "^4.1.0", + "log-symbols": "^4.1.0", + "minimatch": "^5.1.6", + "ms": "^2.1.3", + "serialize-javascript": "^6.0.2", + "strip-json-comments": "^3.1.1", + "supports-color": "^8.1.1", + "workerpool": "^6.5.1", + "yargs": "^16.2.0", + "yargs-parser": "^20.2.9", + "yargs-unparser": "^2.0.0" }, "bin": { "_mocha": "bin/_mocha", @@ -1311,9 +1312,9 @@ ] }, "node_modules/serialize-javascript": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz", - "integrity": "sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==", + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz", + "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==", "dev": true, "dependencies": { "randombytes": "^2.1.0" @@ -1557,9 +1558,9 @@ } }, "node_modules/workerpool": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.2.1.tgz", - "integrity": "sha512-ILEIE97kDZvF9Wb9f6h5aXK4swSlKGUcOEGiIYb2OOu/IrDU9iwj0fD//SsA6E5ibwJxpEvhullJY4Sl4GcpAw==", + "version": "6.5.1", + "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.5.1.tgz", + "integrity": "sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA==", "dev": true }, "node_modules/wrap-ansi": { @@ -1613,9 +1614,9 @@ } }, "node_modules/yargs-parser": { - "version": "20.2.4", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.4.tgz", - "integrity": "sha512-WOkpgNhPTlE73h4VFAFsOnomJVaovO8VqLDzy5saChRBFQFBoMYirowyW+Q9HB4HFF4Z7VZTiG3iSzJJA29yRA==", + "version": "20.2.9", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", + "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", "dev": true, "engines": { "node": ">=10" diff --git a/package.json b/package.json index 50351b4..aa0ba61 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "ro-crate", - "version": "3.3.11", + "version": "3.4.0", "description": "Research Object Crate (RO-Crate) utilities for making and consuming crates", "main": "index.js", "scripts": { @@ -45,7 +45,7 @@ "chai": "^4.3.6", "chai-fs": "^2.0.0", "lodash": "^4.17.21", - "mocha": "^10.2.0", + "mocha": "^10.7.0", "typedoc": "^0.24.8", "uuid": "^8.3.2" }, diff --git a/test/validator.spec.js b/test/validator.spec.js index a753f70..9ce710b 100644 --- a/test/validator.spec.js +++ b/test/validator.spec.js @@ -251,7 +251,7 @@ describe('File Validation', async function () { console.log(files); - + }); });