diff --git a/Notes.md b/Notes.md index ec2e498..cea8ee5 100644 --- a/Notes.md +++ b/Notes.md @@ -1,10 +1,10 @@ # Task in Refactoring -* datamodel/typesystem.ts should be types.ts and have all exported types +* _DONE_ datamodel/typesystem.ts should be types.ts and have all exported types and only have types, currently it has some functions and other types are - defined elsewhere + defined elsewhere. -* External modules that are imported - need a way to handle these +* _DONE_ External modules that are imported - need a way to handle these * getDataDB from 'sync' - returns the dataDB for a project_id * shouldDisplayRecord from 'users' - used to filter records, might be best to move this inside this library since it will be useful @@ -19,26 +19,49 @@ (unless we also want users in conductor). * in typesystem.ts we have isAttachment which checks to see if it's argument - is an instance of Blob. Blob isn't there in node by default so we will need + is an instance of Blob. It's only used in the equality comparitor in that file. + Blob isn't there in node by default so we will need a better test that will work on both browser and node. Initially just having - that fn return false to get the tests to run - -* the exported interface 'option' could do with a better name! + that fn return false to get the tests to run * data_storage/validation.ts is about form validation and relies on getting the uiSpec for a notebook via modules not included here. Remove it from here for now but consider pulling all of the metadata manipulation code into here at some later time -* to get a list of records we have getRecordsWithRegex (data_storage/index.ts) +* to get a list of records we have getRecordsWithRegex (data_storage/index.ts) and getAllRecordsWithRegex (data_storage/queries.ts). getRecords calls getAllRecords and then calls filterRecordMetadata to optionally remove deleted records and those - that shouldn't be visible to the user (shouldDisplayRecord). Both are + that shouldn't be visible to the user (shouldDisplayRecord). Both are used in FAIMS but surely only getRecordsWithRegex should be used because we always want to remove hidden records...actually the use of getAll is in my code to navigate via QR code so is not correct. getAll should not be exported here and that QR code (in gui/components/notebook/add_record_by_type.tsx) should be modified. - +* getMetadataForAllRecords is called in FAIMS3 to get the list of records to +display in the main data grid, it is slow with large number of records even +if only one page will be displayed. Let's see what it does... + + getMetadataForAllRecords: Promise + * listRecordMetadata - to get a full list of record metadata + * if no list of record ids passed, call getAllRecords to get one + getAllRecords is a simple query to the DB for record_Format_version: 1 + * get the revisions for the list of record ids (getRevisions) + * get the HRID for each record (getHRID) + * filterRecordMetadata - to remove deleted and inaccessible records + * deleted is a property on the frev record but we first retrieve + all records and then filter on this rather than doing it in a quer + * shouldDisplayRecord is called for each record, in FAIMS3 it returns + true unless the user does not have permissions on the cluster + * does a db query for the user info **for every record** + * parses the JWT to get permissions **for every record** (actually more than once) + getRecordsWithRegex: Promise + * getAllRecordsWithRegex + * query the db for all records with avp_format_version: 1 (all avp records?) + * which is 'find an avp that matches this regexp somewhere' + * filtered by the regex + * generate a list of record ids from this query, deduplicate it + * pass to listRecordMetadata to get the actual records + * filterRecordMetadata diff --git a/package-lock.json b/package-lock.json index 48e9ee5..2147864 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "faims3-datamodel", - "version": "1.0.4", + "version": "1.1.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "faims3-datamodel", - "version": "1.0.4", + "version": "1.1.0", "license": "Apache", "dependencies": { "@demvsystems/yup-ast": "^1.2.2", @@ -27,7 +27,7 @@ "pouchdb-adapter-memory": "^8.0.1", "pouchdb-find": "^8.0.1", "ts-jest": "^29.0.5", - "typescript": "^4.9.5" + "typescript": "^5.3.2" }, "peerDependencies": { "pouchdb": "^7.3.0" @@ -6515,16 +6515,16 @@ } }, "node_modules/typescript": { - "version": "4.9.5", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz", - "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==", + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.3.2.tgz", + "integrity": "sha512-6l+RyNy7oAHDfxC4FzSJcz9vnjTKxrLpDG5M2Vu4SHRVNg6xzqZp6LYSR9zjqQTu8DU/f5xwxUdADOkbrIX2gQ==", "dev": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" }, "engines": { - "node": ">=4.2.0" + "node": ">=14.17" } }, "node_modules/universalify": { @@ -11736,9 +11736,9 @@ } }, "typescript": { - "version": "4.9.5", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz", - "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==", + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.3.2.tgz", + "integrity": "sha512-6l+RyNy7oAHDfxC4FzSJcz9vnjTKxrLpDG5M2Vu4SHRVNg6xzqZp6LYSR9zjqQTu8DU/f5xwxUdADOkbrIX2gQ==", "dev": true }, "universalify": { diff --git a/package.json b/package.json index 07b92f5..2274a5c 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,6 @@ "pouchdb-adapter-memory": "^8.0.1", "pouchdb-find": "^8.0.1", "ts-jest": "^29.0.5", - "typescript": "^4.9.5" + "typescript": "^5.3.2" } } diff --git a/src/data_storage/attachments.ts b/src/data_storage/attachments.ts index 137aadf..984d935 100644 --- a/src/data_storage/attachments.ts +++ b/src/data_storage/attachments.ts @@ -109,6 +109,7 @@ export function attachment_to_file( ): File { const content_type = attachment.content_type; const data = (attachment as PouchDB.Core.FullAttachment).data; + // console.log('a2f', name, content_type, data); return new File([data], name, {type: content_type}); } diff --git a/src/data_storage/databases.ts b/src/data_storage/databases.ts index a9becb5..d1b0dfe 100644 --- a/src/data_storage/databases.ts +++ b/src/data_storage/databases.ts @@ -104,6 +104,14 @@ export const addDesignDocsForNotebook = async ( } }`, }, + avpAttachment: { + map: `function (doc) { + if (doc.avp_format_version === 1 && doc.faims_attachments) + doc.faims_attachments.forEach((att) => { + emit(doc._id, {_id: att.attachment_id}); + }); + }`, + }, revision: { map: 'function (doc) {\n if (doc.revision_format_version === 1) \n emit(doc._id, 1);\n}', }, @@ -126,7 +134,7 @@ export const addDesignDocsForNotebook = async ( documents[i]['_rev'] = existing['_rev']; // are they the same? if (isEqualObjects(existing, documents[i])) { - return; + continue; } } await dataDB.put(documents[i]); diff --git a/src/data_storage/index.ts b/src/data_storage/index.ts index ca88ab1..a15c0b4 100644 --- a/src/data_storage/index.ts +++ b/src/data_storage/index.ts @@ -27,7 +27,7 @@ import {v4 as uuidv4} from 'uuid'; -import {getDataDB} from '../index'; +import {file_attachments_to_data, getDataDB} from '../index'; import {DEFAULT_RELATION_LINK_VOCABULARY} from '../datamodel/core'; import { FAIMSTypeName, @@ -53,7 +53,7 @@ import { getHRID, listRecordMetadata, } from './internals'; -import {getAllRecordsOfType, getAllRecordsWithRegex} from './queries'; +import {getAllRecordsOfType, getAllRecordsWithRegex, queryAsMap} from './queries'; import {logError} from '../logging'; export function generateFAIMSDataID(): RecordID { @@ -507,6 +507,7 @@ const hydrateRecord = async ( project_id, record.revision ); + const result = { project_id: project_id, record_id: record.record_id, @@ -543,20 +544,84 @@ export async function getSomeRecords( options.startkey = bookmark; } try { - const res = await dataDB.query('index/recordRevisions', options); - let record_list = res.rows.map((doc: any) => { - return { - record_id: doc.id, - revision_id: doc.value._id, - created: doc.value.created, - conflict: doc.value.conflict, - type: doc.value.type, - revision: doc.doc, + // get a list of revisions + const revisions = await dataDB.query('index/recordRevisions', options); + + // collect the avp keys in these revisions + let avpKeys: string[] = []; + revisions.rows.map((row: any) => { + avpKeys = avpKeys.concat(Object.values(row.doc.avps)); + }); + + // now grab the attachments for these avps + const attOptions = { + include_docs: true, + attachments: true, + keys: avpKeys, + }; + + const attachments = await queryAsMap(dataDB, 'index/avpAttachment', attOptions); + const avps = await queryAsMap(dataDB, 'index/avp', {include_docs: true, keys: avpKeys}); + + + let record_list = revisions.rows.map((row: any) => { + const doc = row.doc; + const data: {[key: string]: any} = {}; + const annotations: {[key: string]: any} = {}; + const types: {[key: string]: string} = {}; + + Object.keys(doc.avps).map((key: string) => { + const avp = avps.get(doc.avps[key]); + if (avp.data != undefined) { + data[key] = avp.data; + } else { + // we might have an attachment + if (avp.faims_attachments) { + const attachmentList: any[] = []; + avp.faims_attachments.forEach((a: any) => { + const attachment = attachments.get(a.attachment_id); + if (attachment) { + attachmentList.push(attachment); + } + }); + data[key] = file_attachments_to_data(avp, attachmentList).data; + } else { + data[key] = ''; + } + } + // get annotations + annotations[key] = avp.annotations; + // and types + types[key] = avp.type; + }); + + // get the HRID by finding a property starting with HRID_STRING + const hridKey = Object.keys(data).find((key: string) => { + return key.startsWith('HRID_STRING'); + }); + + const result = { + project_id: project_id, + record_id: doc.record_id, + revision_id: doc._id, + created_by: doc.created_by, + updated: new Date(doc.created), + updated_by: doc.created_by, + deleted: doc.deleted ? true : false, + hrid: hridKey ? data[hridKey] : doc.record_id, + relationship: doc.relationship, + data: data, + annotations: annotations, + types: types, + created: new Date(row.value.created), + conflicts: row.value.conflict, + type: doc.type, }; + return result; }); if (filter_deleted) { record_list = record_list.filter((record: any) => { - return !record.revision.deleted; + return !record.deleted; }); } // don't return the first record if we have a bookmark @@ -580,12 +645,14 @@ export const notebookRecordIterator = async ( ) => { const batchSize = 100; const getNextBatch = async (bookmark: string | null) => { + const mark = performance.now(); const records = await getSomeRecords( project_id, batchSize, bookmark, filter_deleted ); + console.log('getNextBatch', performance.now() - mark); // select just those in this view return records.filter((record: any) => { return record.type === viewID; @@ -613,9 +680,8 @@ export const notebookRecordIterator = async ( index = 1; } } - if (record) { - const data = await hydrateRecord(project_id, record); - return {record: data, done: false}; + if (record) { + return {record: record, done: false}; } else { return {record: null, done: true}; } diff --git a/src/data_storage/internals.ts b/src/data_storage/internals.ts index 073982c..cddce3a 100644 --- a/src/data_storage/internals.ts +++ b/src/data_storage/internals.ts @@ -522,6 +522,7 @@ async function loadAttributeValuePair( project_id: ProjectID, avp: AttributeValuePair ): Promise { + const mark = performance.now(); const attach_refs = avp.faims_attachments; if (attach_refs === null || attach_refs === undefined) { // No attachments @@ -547,6 +548,7 @@ async function loadAttributeValuePair( attach_docs.push(doc); } }); + console.log('loadAVP', performance.now() - mark); return loader(avp, attach_docs); } diff --git a/src/data_storage/queries.ts b/src/data_storage/queries.ts index 49024ab..4082ae5 100644 --- a/src/data_storage/queries.ts +++ b/src/data_storage/queries.ts @@ -73,6 +73,30 @@ export async function getAllRecordsWithRegex( return avp.record_id; }); // Remove duplicates, no order is implied - const deduped_record_ids = Array.from(new Set(record_ids)); - return await listRecordMetadata(project_id, deduped_record_ids); + const unique_record_ids = Array.from(new Set(record_ids)); + return await listRecordMetadata(project_id, unique_record_ids); } + +/** + * Turn a query result (array of documents) into a Map keyed by document id + * so that we can access results by id rather than iterating over the array + * to find documents. + * + * @param db a PouchDB database + * @param index an index on the database + * @param options options to pass to db.query + * @returns a Map with record id as keys and documents as values + */ +export const queryAsMap = async ( + db: PouchDB.Database, + index: string, + options: PouchDB.Query.Options<{}, {}> +) => { + const queryResult = await db.query(index, options); + + const aMap = new Map(); + queryResult.rows.forEach((row: any) => { + aMap.set(row.doc._id, row.doc); + }); + return aMap; +}; diff --git a/tests/data_storage.test.ts b/tests/data_storage.test.ts index b604a73..69320ad 100644 --- a/tests/data_storage.test.ts +++ b/tests/data_storage.test.ts @@ -19,7 +19,14 @@ */ import {test, fc} from '@fast-check/jest'; -import {registerClient} from '../src'; +import { + file_attachments_to_data, + file_data_to_attachments, + getDataDB, + registerClient, + setAttachmentDumperForType, + setAttachmentLoaderForType, +} from '../src'; import {Record} from '../src/types'; import { deleteFAIMSDataForID, @@ -337,7 +344,7 @@ describe('record iterator', () => { const viewID = 'Test'; // test below, at and above the batch size of the iterator - const sizes = [0, 50, 100, 220, 550]; + const sizes = [0, 50, 100, 220]; for (let i = 0; i < sizes.length; i++) { const n = sizes[i]; await cleanDataDBS(); @@ -360,33 +367,123 @@ describe('record iterator', () => { expect(sumOfAges).toBe(Math.abs((n * (n - 1)) / 2)); } }); + + test('attachments', async () => { + const viewID = 'Test'; + const project_id = 'test'; + + setAttachmentLoaderForType( + 'faims-attachment::Files', + file_attachments_to_data + ); + setAttachmentDumperForType( + 'faims-attachment::Files', + file_data_to_attachments + ); + + const attachmentText = 'this is my test attachment'; + const n = 3; + await cleanDataDBS(); + await createNRecords(project_id, viewID, n, attachmentText); + + const iterator = await notebookRecordIterator(project_id, viewID); + + let {record, done} = await iterator.next(); + + while (record && !done) { + expect(record.data.name.startsWith('Bob')).toBe(true); + + // try to read the attachment data + + record.data.textFile.forEach(async (file: File) => { + const data = Buffer.from(await file.text(), 'base64'); + expect(data.toString()).toBe(attachmentText); + }); + + const next = await iterator.next(); + record = next.record; + done = next.done; + } + }); + }); -// describe('record queries', () => { -// test('map-reduce query', async () => { -// const viewID = 'Test'; -// const project_id = 'test'; - -// await cleanDataDBS(); -// await createNRecords(project_id, viewID, 10); - -// const db = await getDataDB(project_id); -// if (db) { -// const result = await db -// .query( -// { -// map: (doc: any, emit: CallableFunction) => { -// emit(doc.record_format_version, doc); -// }, -// }, -// {key: 1} -// ) -// .catch((err: any) => { -// console.log('query failed', err); -// }); -// console.log('query result: ', result); -// } else { -// fail('Failed to get database'); -// } -// }); -// }); +describe('record queries', () => { + + + + test.skip('map-reduce query', async () => { + const viewID = 'Test'; + const project_id = 'test'; + + setAttachmentLoaderForType( + 'faims-attachment::Files', + file_attachments_to_data + ); + setAttachmentDumperForType( + 'faims-attachment::Files', + file_data_to_attachments + ); + + const attachmentText = 'this is my test attachment'; + await cleanDataDBS(); + await createNRecords(project_id, viewID, 10, attachmentText); + + const db = await getDataDB(project_id); + if (db) { + // get revisions via the revisions index + const revOptions: {[key: string]: any} = { + include_docs: true, + limit: 3, + }; + const revisions = await db.query('index/recordRevisions', revOptions); + revisions.rows.forEach((row: any) => { + console.log(row.value, row.doc); + }); + const revMap = new Map(); + let avpKeys: string[] = []; + revisions.rows.map((row: any) => { + avpKeys = avpKeys.concat(Object.values(row.doc.avps)); + revMap.set(row.doc.record_id, row.doc); + }); + + console.log('avps: ', avpKeys); + + const avpResult = await db.query('index/avp', { + include_docs: true, + keys: avpKeys, + }); + console.log('avp result: ', avpResult); + + // get attachments via the avpAttachment index + const options = { + include_docs: true, + attachments: true, + keys: avpKeys, + }; + const result = await db + .query('index/avpAttachment', options) + .catch((err: any) => { + console.log('query failed', err); + }); + console.log('query result: ', result); + + + + + result.rows.forEach((row: any) => { + const record = revMap.get(row.doc.record_id); + if (record) { + if (record.attachments) { + record.attachments.push(row.doc); + } else { + record.attachments = [row.doc]; + } + } + console.log(record); + }); + } else { + fail('Failed to get database'); + } + }); +}); diff --git a/tests/mocks.ts b/tests/mocks.ts index 9f5bed1..41d0632 100644 --- a/tests/mocks.ts +++ b/tests/mocks.ts @@ -58,8 +58,25 @@ export const callbackObject: DBCallbackObject = { export const createRecord = async ( project_id: string, viewID: string, - data: {name: string; age: number} + data: {name: string; age: number; textFile?: any}, + attachment: string = '' ) => { + const fieldTypes: {name: string; age: string; textFile?: string} = { + name: 'faims::string', + age: 'faims::integer', + }; + + if (attachment != '') { + data['textFile'] = [ + { + name: 'test_attachment.txt', + type: 'text/plain', + data: Buffer.from(attachment), + }, + ]; + fieldTypes['textFile'] = 'faims-attachment::Files'; + } + const userID = 'user'; const doc: Record = { project_id: project_id, @@ -72,10 +89,7 @@ export const createRecord = async ( created: new Date(), updated: new Date(), annotations: {}, - field_types: { - name: 'faims::string', - age: 'faims::integer', - }, + field_types: fieldTypes, relationship: undefined, deleted: false, }; @@ -86,9 +100,15 @@ export const createRecord = async ( export const createNRecords = async ( project_id: string, viewID: string, - n: number + n: number, + attachment: string = '' ) => { for (let i = 0; i < n; i++) { - await createRecord(project_id, viewID, {name: `Bob ${i}`, age: i}); + await createRecord( + project_id, + viewID, + {name: `Bob ${i}`, age: i}, + attachment + ); } }; diff --git a/tests/test-attachment-image.jpg b/tests/test-attachment-image.jpg new file mode 100755 index 0000000..f2c6545 Binary files /dev/null and b/tests/test-attachment-image.jpg differ