Skip to content

Commit

Permalink
Remove file length check to help CORS
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin authored Dec 19, 2024
1 parent bc33167 commit 08098e1
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 176 deletions.
2 changes: 1 addition & 1 deletion src/cramFile/codecs/_base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ export default abstract class CramCodec<
coreDataBlock: CramFileBlock,
blocksByContentId: Record<number, CramFileBlock>,
cursors: Cursors,
): DataTypeMapping[TResult]
): DataTypeMapping[TResult] | undefined
}
16 changes: 4 additions & 12 deletions src/cramFile/codecs/byteArrayLength.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,14 @@ export default class ByteArrayStopCodec extends CramCodec<
cursors: Cursors,
) {
const lengthCodec = this._getLengthCodec()
const arrayLength = lengthCodec.decode(
slice,
coreDataBlock,
blocksByContentId,
cursors,
)
const arrayLength =
lengthCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0

const dataCodec = this._getDataCodec()
const data = new Uint8Array(arrayLength)
for (let i = 0; i < arrayLength; i += 1) {
data[i] = dataCodec.decode(
slice,
coreDataBlock,
blocksByContentId,
cursors,
)
data[i] =
dataCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0
}

return data
Expand Down
10 changes: 3 additions & 7 deletions src/cramFile/codecs/external.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import CramCodec, { Cursor, Cursors } from './_base'
import { CramMalformedError, CramUnimplementedError } from '../../errors'
import { CramUnimplementedError } from '../../errors'
import { CramFileBlock } from '../file'
import CramSlice from '../slice'
import { parseItf8 } from '../util'
Expand Down Expand Up @@ -39,13 +39,9 @@ export default class ExternalCodec extends CramCodec<
) {
const { blockContentId } = this.parameters
const contentBlock = blocksByContentId[blockContentId]
if (!contentBlock) {
throw new CramMalformedError(
`no block found with content ID ${blockContentId}}`,
)
}

const cursor = cursors.externalBlocks.getCursor(blockContentId)
return this._decodeData(contentBlock, cursor)
return contentBlock ? this._decodeData(contentBlock, cursor) : undefined
}

_decodeInt(contentBlock: CramFileBlock, cursor: Cursor) {
Expand Down
29 changes: 5 additions & 24 deletions src/cramFile/container/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,13 @@ export default class CramContainer {

// if there are no records in the container, there will be no compression
// header
if (!containerHeader?.numRecords) {
if (!containerHeader.numRecords) {
return null
}
const { majorVersion } = await this.file.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)

const block = await this.getFirstBlock()
if (block === undefined) {
return undefined
}
if (block.contentType !== 'COMPRESSION_HEADER') {
throw new CramMalformedError(
`invalid content type ${block.contentType} in compression header block`,
Expand All @@ -51,9 +48,6 @@ export default class CramContainer {

async getFirstBlock() {
const containerHeader = await this.getHeader()
if (!containerHeader) {
return undefined
}
return this.file.readBlock(containerHeader._endPosition)
}

Expand All @@ -78,12 +72,6 @@ export default class CramContainer {
const { majorVersion } = await this.file.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const { cramContainerHeader1, cramContainerHeader2 } = sectionParsers
const { size: fileSize } = await this.file.stat()

if (position >= fileSize) {
console.warn(`pos:${position}>=fileSize:${fileSize} in cram container`)
return undefined
}

// parse the container header. do it in 2 pieces because you cannot tell
// how much to buffer until you read numLandmarks
Expand All @@ -93,13 +81,6 @@ export default class CramContainer {
)
const header1 = parseItem(bytes1, cramContainerHeader1.parser)
const numLandmarksSize = itf8Size(header1.numLandmarks)
if (position + header1.length >= fileSize) {
// header indicates container goes beyond fileSize
console.warn(
`container at ${position} is beyond fileSize:${fileSize}, skipping`,
)
return undefined
}

const bytes2 = await this.file.read(
cramContainerHeader2.maxLength(header1.numLandmarks),
Expand All @@ -116,12 +97,12 @@ export default class CramContainer {
)
}

const completeHeader = Object.assign(header1, header2, {
return {
...header1,
...header2,
_size: header1._size + header2._size - numLandmarksSize,
_endPosition: header1._size + header2._size - numLandmarksSize + position,
})

return completeHeader
}
}
}

Expand Down
118 changes: 41 additions & 77 deletions src/cramFile/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,6 @@ export default class CramFile {
}
}

// can just stat this object like a filehandle
stat() {
return this.file.stat()
}

// can just stat this object like a filehandle
read(length: number, position: number) {
return this.file.read(length, position)
}
Expand All @@ -133,20 +127,17 @@ export default class CramFile {
}

const firstBlock = await firstContainer.getFirstBlock()
if (firstBlock === undefined) {
return parseHeaderText('')
} else {
const content = firstBlock.content
const dataView = new DataView(content.buffer)
const headerLength = dataView.getInt32(0, true)
const textStart = 4
const decoder = new TextDecoder('utf8')
const text = decoder.decode(
content.subarray(textStart, textStart + headerLength),
)
this.header = text
return parseHeaderText(text)
}

const content = firstBlock.content
const dataView = new DataView(content.buffer)
const headerLength = dataView.getInt32(0, true)
const textStart = 4
const decoder = new TextDecoder('utf8')
const text = decoder.decode(
content.subarray(textStart, textStart + headerLength),
)
this.header = text
return parseHeaderText(text)
}

async getHeaderText() {
Expand All @@ -158,35 +149,26 @@ export default class CramFile {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
let position = sectionParsers.cramFileDefinition.maxLength
const { size: fileSize } = await this.file.stat()
const { cramContainerHeader1 } = sectionParsers

// skip with a series of reads to the proper container
let currentContainer: CramContainer | undefined
for (let i = 0; i <= containerNumber; i++) {
// if we are about to go off the end of the file
// and have not found that container, it does not exist
if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
return undefined
}
// if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
// return undefined
// }

currentContainer = this.getContainerAtPosition(position)
const currentHeader = await currentContainer.getHeader()
if (!currentHeader) {
throw new CramMalformedError(
`container ${containerNumber} not found in file`,
)
}

// if this is the first container, read all the blocks in the container
// to determine its length, because we cannot trust the container
// header's given length due to a bug somewhere in htslib
if (i === 0) {
position = currentHeader._endPosition
for (let j = 0; j < currentHeader.numBlocks; j++) {
const block = await this.readBlock(position)
if (block === undefined) {
return undefined
}
position = block._endPosition
}
} else {
Expand Down Expand Up @@ -219,39 +201,41 @@ export default class CramFile {

/**
* @returns {Promise[number]} the number of containers in the file
*
* note: this is currently used only in unit tests, and after removing file
* length check, relies on a try catch to read return an error to break
*/
async containerCount(): Promise<number | undefined> {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const { size: fileSize } = await this.file.stat()
const { cramContainerHeader1 } = sectionParsers

let containerCount = 0
let position = sectionParsers.cramFileDefinition.maxLength
while (position + cramContainerHeader1.maxLength + 8 < fileSize) {
const currentHeader =
await this.getContainerAtPosition(position).getHeader()
if (!currentHeader) {
break
}
// if this is the first container, read all the blocks in the container,
// because we cannot trust the container header's given length due to a
// bug somewhere in htslib
if (containerCount === 0) {
position = currentHeader._endPosition
for (let j = 0; j < currentHeader.numBlocks; j++) {
const block = await this.readBlock(position)
if (block === undefined) {
return undefined
try {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
while (true) {
const currentHeader =
await this.getContainerAtPosition(position).getHeader()

// if this is the first container, read all the blocks in the container,
// because we cannot trust the container header's given length due to a
// bug somewhere in htslib
if (containerCount === 0) {
position = currentHeader._endPosition
for (let j = 0; j < currentHeader.numBlocks; j++) {
const block = await this.readBlock(position)
position = block._endPosition
}
position = block._endPosition
} else {
// otherwise, just traverse to the next container using the container's
// length
position += currentHeader._size + currentHeader.length
}
} else {
// otherwise, just traverse to the next container using the container's
// length
position += currentHeader._size + currentHeader.length
containerCount += 1
}
containerCount += 1
} catch (e) {
containerCount--
/* do nothing */
}

return containerCount
Expand All @@ -265,11 +249,6 @@ export default class CramFile {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const { cramBlockHeader } = sectionParsers
const { size: fileSize } = await this.file.stat()

if (position + cramBlockHeader.maxLength >= fileSize) {
return undefined
}

const buffer = await this.file.read(cramBlockHeader.maxLength, position)
return parseItem(buffer, cramBlockHeader.parser, 0, position)
Expand All @@ -287,16 +266,7 @@ export default class CramFile {
size = section.maxLength,
preReadBuffer?: Uint8Array,
) {
let buffer: Uint8Array
if (preReadBuffer) {
buffer = preReadBuffer
} else {
const { size: fileSize } = await this.file.stat()
if (position + size >= fileSize) {
return undefined
}
buffer = await this.file.read(size, position)
}
const buffer = preReadBuffer ?? (await this.file.read(size, position))
const data = parseItem(buffer, section.parser, 0, position)
if (data._size !== size) {
throw new CramMalformedError(
Expand Down Expand Up @@ -356,9 +326,6 @@ export default class CramFile {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const blockHeader = await this.readBlockHeader(position)
if (blockHeader === undefined) {
return undefined
}
const blockContentPosition = blockHeader._endPosition

const d = await this.file.read(
Expand Down Expand Up @@ -386,9 +353,6 @@ export default class CramFile {
sectionParsers.cramBlockCrc32,
blockContentPosition + blockHeader.compressedSize,
)
if (crc === undefined) {
return undefined
}
block.crc32 = crc.crc32

// check the block data crc32
Expand Down
2 changes: 1 addition & 1 deletion src/cramFile/record.ts
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ export default class CramRecord {

this.readGroupId = readGroupId
this.readName = readName
this.sequenceId = sequenceId
this.sequenceId = sequenceId!
this.uniqueId = uniqueId
this.templateSize = templateSize
this.alignmentStart = alignmentStart
Expand Down
Loading

0 comments on commit 08098e1

Please sign in to comment.