-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bug/1056 clinical download #721
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
import { ADVERTISED_HOST } from 'config'; | ||
ciaranschutte marked this conversation as resolved.
Show resolved
Hide resolved
|
||
import { get } from 'lodash'; | ||
import fetch from 'node-fetch'; | ||
import urljoin from 'url-join'; | ||
|
||
const url = urljoin(ADVERTISED_HOST, '/graphql'); | ||
|
||
const hitsQuery = ` | ||
query hitsQuery($filters: JSON, $first: Int) { | ||
file { | ||
hits(filters: $filters, first: $first) { | ||
edges { | ||
node { | ||
donors { | ||
hits { | ||
edges { | ||
node { | ||
donor_id | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
}`; | ||
|
||
const totalQuery = ` | ||
query totalQuery($filters: JSON) { | ||
file { | ||
hits(filters: $filters) { | ||
total | ||
} | ||
} | ||
}`; | ||
|
||
function makeRequest({ query, variables }: { query: string; variables: Record<string, unknown> }) { | ||
return JSON.stringify({ | ||
query, | ||
variables, | ||
}); | ||
} | ||
|
||
function createQueryClient({ requestConfig }: { requestConfig: Record<string, unknown> }) { | ||
return async ({ query }: { query: string }) => { | ||
try { | ||
const response = await fetch(url, { | ||
...requestConfig, | ||
body: query, | ||
}); | ||
const data = await response.json(); | ||
return data; | ||
} catch (error) { | ||
console.error(`Failed to query server`, error); | ||
} | ||
}; | ||
} | ||
|
||
async function queryArranger({ | ||
requestFilter, | ||
egoJwt, | ||
}: { | ||
requestFilter: Record<string, unknown>; | ||
egoJwt: string; | ||
}): Promise<string[]> { | ||
const requestConfig = { | ||
method: 'post', | ||
headers: { Authorization: `Bearer ${egoJwt}`, 'Content-Type': 'application/json' }, | ||
}; | ||
try { | ||
const queryClient = createQueryClient({ requestConfig }); | ||
const queryVariables = { filters: requestFilter }; | ||
// need two queries to pass "total" into hits filter to specify offset range | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure it's helpful, just sharing unfinished/raw thoughts I got from reading this: without sufficient context, this comment doesn't really add much clarity, neither of the intent nor the results. that is to say that line 75 is not exactly readable in terms of what the server will do with the values passed in. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ...for instance, line 76 does say "this gets the total number of hits" -> a single absolute numerical value. clear. but not sure I'm making sense... but the code + comment seem opaque/arcane to me, is ultimately my point. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. rereading these lines a few more times, I think the issue I'm having is in how the code is separated into different lines; and that forces variable creation for every step, which in turn brings out how naming things is hard... cognitive burden multiplied. so, suggestion: it occurs to me lines 75 - 76 could look more like the following (which I realize mixes then and await), but conceptually explains better the intent:
...maybe query client should also take a callback as an optional param?
this doesn't really explain why the "totalHits" is needed, which is then easier to describe in the comment |
||
const totalData = await queryClient({ query: makeRequest({ query: totalQuery, variables: queryVariables }) }); | ||
const totalHits = get(totalData, 'data.file.hits.total'); | ||
const hits = await queryClient({ | ||
query: makeRequest({ query: hitsQuery, variables: { ...queryVariables, first: totalHits } }), | ||
}); | ||
|
||
const donorEdgePath = 'data.file.hits.edges'; | ||
const records = get(hits, donorEdgePath, undefined); | ||
|
||
if (!records) { | ||
return []; | ||
} else { | ||
const donorIdPath = 'node.donors.hits.edges[0].node.donor_id'; | ||
return Array.from(new Set(records.map((record: Record<string, unknown>) => get(record, donorIdPath)))); | ||
} | ||
} catch (error) { | ||
throw Error(`Query Arranger failed.`, error); | ||
} | ||
} | ||
|
||
export default queryArranger; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,20 +5,16 @@ import * as express from 'express'; | |
import { z as zod } from 'zod'; | ||
|
||
import { ARRANGER_FILE_CENTRIC_INDEX } from 'config'; | ||
import authenticatedRequestMiddleware, { | ||
AuthenticatedRequest, | ||
} from 'routes/middleware/authenticatedRequestMiddleware'; | ||
import { | ||
hasDacoAccess, | ||
hasSufficientProgramMembershipAccess, | ||
} from 'routes/utils/accessValidations'; | ||
import authenticatedRequestMiddleware, { AuthenticatedRequest } from 'routes/middleware/authenticatedRequestMiddleware'; | ||
import { hasDacoAccess, hasSufficientProgramMembershipAccess } from 'routes/utils/accessValidations'; | ||
import { downloadDonorTsv } from 'services/clinical/api'; | ||
import { EgoClient } from 'services/ego'; | ||
import createClinicalApiAuthClient from 'services/ego/clinicalApiAuthClient'; | ||
import { EsHits } from 'services/elasticsearch'; | ||
import { EsFileCentricDocument } from 'utils/commonTypes/EsFileCentricDocument'; | ||
|
||
import logger from '../../utils/logger'; | ||
import queryArranger from './arranger'; | ||
|
||
const createClincalApiRouter = async (esClient: Client, egoClient: EgoClient) => { | ||
const router = express.Router(); | ||
|
@@ -61,9 +57,7 @@ const createClincalApiRouter = async (esClient: Client, egoClient: EgoClient) => | |
logger.debug( | ||
`[clinical-routes] /donors/data-for-files: User ${userId} requesting clinical data without DACO access`, | ||
); | ||
return res | ||
.status(403) | ||
.json({ error: 'Users require DACO approval to access clinical data.' }); | ||
return res.status(403).json({ error: 'Users require DACO approval to access clinical data.' }); | ||
} | ||
|
||
// 1. validate request body | ||
|
@@ -99,9 +93,7 @@ const createClincalApiRouter = async (esClient: Client, egoClient: EgoClient) => | |
|
||
// 3. validations | ||
// 3a. all files were found | ||
const missingFiles = body.objectIds.filter( | ||
(objectId) => !files.find((doc) => doc.object_id === objectId), | ||
); | ||
const missingFiles = body.objectIds.filter((objectId) => !files.find((doc) => doc.object_id === objectId)); | ||
|
||
if (missingFiles.length > 0) { | ||
logger.info( | ||
|
@@ -127,9 +119,7 @@ const createClincalApiRouter = async (esClient: Client, egoClient: EgoClient) => | |
}); | ||
} | ||
// 3c. user has access to each file | ||
const forbiddenFiles = files.filter( | ||
(file) => !hasSufficientProgramMembershipAccess({ scopes, file }), | ||
); | ||
const forbiddenFiles = files.filter((file) => !hasSufficientProgramMembershipAccess({ scopes, file })); | ||
if (forbiddenFiles.length > 0) { | ||
logger.info( | ||
`[clinical-routes] /donors/data-for-files: User ${userId} missing permissions to access requested files - ${forbiddenFiles.map( | ||
|
@@ -144,9 +134,7 @@ const createClincalApiRouter = async (esClient: Client, egoClient: EgoClient) => | |
} | ||
|
||
// 4. fetch files for the donors of the files with data | ||
const donorIds = files | ||
.flatMap((file) => file.donors.map((donor) => donor.donor_id)) | ||
.filter((id) => !!id); | ||
const donorIds = files.flatMap((file) => file.donors.map((donor) => donor.donor_id)).filter((id) => !!id); | ||
|
||
const uniqueDonors = Array.from(new Set(donorIds)); | ||
|
||
|
@@ -172,6 +160,63 @@ const createClincalApiRouter = async (esClient: Client, egoClient: EgoClient) => | |
}); | ||
} | ||
}); | ||
|
||
router.use('/donors/data-for-all-files', async (req: AuthenticatedRequest, res) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if a filter is passed, this should be read as "'some' data about 'some' of the files". I think "all" here is either redundant or misleading, unless it actually means "'some' data for all the files", which is not immediately explicit in the name |
||
try { | ||
// Grab requester JWT | ||
const { scopes, userId, authenticated, egoJwt } = req.auth; | ||
|
||
if (!authenticated) { | ||
logger.debug(`[clinical-routes] /donors/data-for-all-files: Request not authenticated.`); | ||
return res.status(401).json({ error: 'Must be authenticated to access resource.' }); | ||
} | ||
const hasDaco = hasDacoAccess(scopes); | ||
if (!hasDaco) { | ||
logger.debug( | ||
`[clinical-routes] /donors/data-for-all-files: User ${userId} requesting clinical data without DACO access`, | ||
); | ||
return res.status(403).json({ error: 'Users require DACO approval to access clinical data.' }); | ||
} | ||
|
||
// Get request SQON filter | ||
const { filter: filterString }: { filter?: string } = req.query; | ||
let requestFilter = {}; | ||
try { | ||
requestFilter = filterString ? JSON.parse(filterString) : undefined; | ||
} catch (err) { | ||
res.status(400).send(`${filterString} is not a valid filter`); | ||
logger.error(err); | ||
throw err; | ||
} | ||
|
||
try { | ||
// query GQL Arranger | ||
// version 2.19 does not have direct access to Arranger apis directly | ||
const uniqueDonors = await queryArranger({ requestFilter, egoJwt }); | ||
|
||
// Return all files in a zip with a manifest | ||
const filename = `clinical_data_${Date.now()}`; | ||
|
||
const response = await downloadDonorTsv(clinicalAuthClient, uniqueDonors); | ||
res | ||
.status(200) | ||
.contentType('application/octet-stream') | ||
.setHeader('content-disposition', `filename=${filename}.zip`); | ||
return res.send(response); | ||
} catch (e) { | ||
logger.error(`Error retrieving clinical donor data from clinical-service ${e}`); | ||
return res.status(500).json({ | ||
error: e.message || 'An unexpected error occurred retrieving clinical file data.', | ||
}); | ||
} | ||
} catch (e) { | ||
logger.error(`[clinical-routes] /donors/data-for-all-files: unexpected error occurred: ${e}`); | ||
return res.status(500).json({ | ||
error: e.message || 'An unexpected error occurred retrieving clinical file data.', | ||
}); | ||
} | ||
}); | ||
|
||
return router; | ||
}; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thinking this may be a valid nuance?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thinking further on this, the "all" part of this endpoint implies to me no filtering should be possible. I mean why would a query give you partial results, unless it is due to pagination or hard limits?