Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove file length check pt 2. #148

Merged
merged 2 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export default tseslint.config(
'*.mjs',
'example/*',
'src/htscodecs',
'coverage',
],
},
{
Expand Down
2 changes: 1 addition & 1 deletion src/cramFile/codecs/_base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ export default abstract class CramCodec<
coreDataBlock: CramFileBlock,
blocksByContentId: Record<number, CramFileBlock>,
cursors: Cursors,
): DataTypeMapping[TResult]
): DataTypeMapping[TResult] | undefined
}
16 changes: 4 additions & 12 deletions src/cramFile/codecs/byteArrayLength.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,14 @@ export default class ByteArrayStopCodec extends CramCodec<
cursors: Cursors,
) {
const lengthCodec = this._getLengthCodec()
const arrayLength = lengthCodec.decode(
slice,
coreDataBlock,
blocksByContentId,
cursors,
)
const arrayLength =
lengthCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0

const dataCodec = this._getDataCodec()
const data = new Uint8Array(arrayLength)
for (let i = 0; i < arrayLength; i += 1) {
data[i] = dataCodec.decode(
slice,
coreDataBlock,
blocksByContentId,
cursors,
)
data[i] =
dataCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0
}

return data
Expand Down
10 changes: 3 additions & 7 deletions src/cramFile/codecs/external.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import CramCodec, { Cursor, Cursors } from './_base'
import { CramMalformedError, CramUnimplementedError } from '../../errors'
import { CramUnimplementedError } from '../../errors'
import { CramFileBlock } from '../file'
import CramSlice from '../slice'
import { parseItf8 } from '../util'
Expand Down Expand Up @@ -39,13 +39,9 @@ export default class ExternalCodec extends CramCodec<
) {
const { blockContentId } = this.parameters
const contentBlock = blocksByContentId[blockContentId]
if (!contentBlock) {
throw new CramMalformedError(
`no block found with content ID ${blockContentId}}`,
)
}

const cursor = cursors.externalBlocks.getCursor(blockContentId)
return this._decodeData(contentBlock, cursor)
return contentBlock ? this._decodeData(contentBlock, cursor) : undefined
}

_decodeInt(contentBlock: CramFileBlock, cursor: Cursor) {
Expand Down
29 changes: 5 additions & 24 deletions src/cramFile/container/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,13 @@ export default class CramContainer {

// if there are no records in the container, there will be no compression
// header
if (!containerHeader?.numRecords) {
if (!containerHeader.numRecords) {
return null
}
const { majorVersion } = await this.file.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)

const block = await this.getFirstBlock()
if (block === undefined) {
return undefined
}
if (block.contentType !== 'COMPRESSION_HEADER') {
throw new CramMalformedError(
`invalid content type ${block.contentType} in compression header block`,
Expand All @@ -51,9 +48,6 @@ export default class CramContainer {

async getFirstBlock() {
const containerHeader = await this.getHeader()
if (!containerHeader) {
return undefined
}
return this.file.readBlock(containerHeader._endPosition)
}

Expand All @@ -78,12 +72,6 @@ export default class CramContainer {
const { majorVersion } = await this.file.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const { cramContainerHeader1, cramContainerHeader2 } = sectionParsers
const { size: fileSize } = await this.file.stat()

if (position >= fileSize) {
console.warn(`pos:${position}>=fileSize:${fileSize} in cram container`)
return undefined
}

// parse the container header. do it in 2 pieces because you cannot tell
// how much to buffer until you read numLandmarks
Expand All @@ -93,13 +81,6 @@ export default class CramContainer {
)
const header1 = parseItem(bytes1, cramContainerHeader1.parser)
const numLandmarksSize = itf8Size(header1.numLandmarks)
if (position + header1.length >= fileSize) {
// header indicates container goes beyond fileSize
console.warn(
`container at ${position} is beyond fileSize:${fileSize}, skipping`,
)
return undefined
}

const bytes2 = await this.file.read(
cramContainerHeader2.maxLength(header1.numLandmarks),
Expand All @@ -116,12 +97,12 @@ export default class CramContainer {
)
}

const completeHeader = Object.assign(header1, header2, {
return {
...header1,
...header2,
_size: header1._size + header2._size - numLandmarksSize,
_endPosition: header1._size + header2._size - numLandmarksSize + position,
})

return completeHeader
}
}
}

Expand Down
118 changes: 41 additions & 77 deletions src/cramFile/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,6 @@ export default class CramFile {
}
}

// can just stat this object like a filehandle
stat() {
return this.file.stat()
}

// can just stat this object like a filehandle
read(length: number, position: number) {
return this.file.read(length, position)
}
Expand All @@ -133,20 +127,17 @@ export default class CramFile {
}

const firstBlock = await firstContainer.getFirstBlock()
if (firstBlock === undefined) {
return parseHeaderText('')
} else {
const content = firstBlock.content
const dataView = new DataView(content.buffer)
const headerLength = dataView.getInt32(0, true)
const textStart = 4
const decoder = new TextDecoder('utf8')
const text = decoder.decode(
content.subarray(textStart, textStart + headerLength),
)
this.header = text
return parseHeaderText(text)
}

const content = firstBlock.content
const dataView = new DataView(content.buffer)
const headerLength = dataView.getInt32(0, true)
const textStart = 4
const decoder = new TextDecoder('utf8')
const text = decoder.decode(
content.subarray(textStart, textStart + headerLength),
)
this.header = text
return parseHeaderText(text)
}

async getHeaderText() {
Expand All @@ -158,35 +149,26 @@ export default class CramFile {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
let position = sectionParsers.cramFileDefinition.maxLength
const { size: fileSize } = await this.file.stat()
const { cramContainerHeader1 } = sectionParsers

// skip with a series of reads to the proper container
let currentContainer: CramContainer | undefined
for (let i = 0; i <= containerNumber; i++) {
// if we are about to go off the end of the file
// and have not found that container, it does not exist
if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
return undefined
}
// if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
// return undefined
// }

currentContainer = this.getContainerAtPosition(position)
const currentHeader = await currentContainer.getHeader()
if (!currentHeader) {
throw new CramMalformedError(
`container ${containerNumber} not found in file`,
)
}

// if this is the first container, read all the blocks in the container
// to determine its length, because we cannot trust the container
// header's given length due to a bug somewhere in htslib
if (i === 0) {
position = currentHeader._endPosition
for (let j = 0; j < currentHeader.numBlocks; j++) {
const block = await this.readBlock(position)
if (block === undefined) {
return undefined
}
position = block._endPosition
}
} else {
Expand Down Expand Up @@ -219,39 +201,41 @@ export default class CramFile {

/**
* @returns {Promise[number]} the number of containers in the file
*
* note: this is currently used only in unit tests, and after removing file
* length check, relies on a try catch to read return an error to break
*/
async containerCount(): Promise<number | undefined> {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const { size: fileSize } = await this.file.stat()
const { cramContainerHeader1 } = sectionParsers

let containerCount = 0
let position = sectionParsers.cramFileDefinition.maxLength
while (position + cramContainerHeader1.maxLength + 8 < fileSize) {
const currentHeader =
await this.getContainerAtPosition(position).getHeader()
if (!currentHeader) {
break
}
// if this is the first container, read all the blocks in the container,
// because we cannot trust the container header's given length due to a
// bug somewhere in htslib
if (containerCount === 0) {
position = currentHeader._endPosition
for (let j = 0; j < currentHeader.numBlocks; j++) {
const block = await this.readBlock(position)
if (block === undefined) {
return undefined
try {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
while (true) {
const currentHeader =
await this.getContainerAtPosition(position).getHeader()

// if this is the first container, read all the blocks in the container,
// because we cannot trust the container header's given length due to a
// bug somewhere in htslib
if (containerCount === 0) {
position = currentHeader._endPosition
for (let j = 0; j < currentHeader.numBlocks; j++) {
const block = await this.readBlock(position)
position = block._endPosition
}
position = block._endPosition
} else {
// otherwise, just traverse to the next container using the container's
// length
position += currentHeader._size + currentHeader.length
}
} else {
// otherwise, just traverse to the next container using the container's
// length
position += currentHeader._size + currentHeader.length
containerCount += 1
}
containerCount += 1
} catch (e) {
containerCount--
/* do nothing */
}

return containerCount
Expand All @@ -265,11 +249,6 @@ export default class CramFile {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const { cramBlockHeader } = sectionParsers
const { size: fileSize } = await this.file.stat()

if (position + cramBlockHeader.maxLength >= fileSize) {
return undefined
}

const buffer = await this.file.read(cramBlockHeader.maxLength, position)
return parseItem(buffer, cramBlockHeader.parser, 0, position)
Expand All @@ -287,16 +266,7 @@ export default class CramFile {
size = section.maxLength,
preReadBuffer?: Uint8Array,
) {
let buffer: Uint8Array
if (preReadBuffer) {
buffer = preReadBuffer
} else {
const { size: fileSize } = await this.file.stat()
if (position + size >= fileSize) {
return undefined
}
buffer = await this.file.read(size, position)
}
const buffer = preReadBuffer ?? (await this.file.read(size, position))
const data = parseItem(buffer, section.parser, 0, position)
if (data._size !== size) {
throw new CramMalformedError(
Expand Down Expand Up @@ -356,9 +326,6 @@ export default class CramFile {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const blockHeader = await this.readBlockHeader(position)
if (blockHeader === undefined) {
return undefined
}
const blockContentPosition = blockHeader._endPosition

const d = await this.file.read(
Expand Down Expand Up @@ -386,9 +353,6 @@ export default class CramFile {
sectionParsers.cramBlockCrc32,
blockContentPosition + blockHeader.compressedSize,
)
if (crc === undefined) {
return undefined
}
block.crc32 = crc.crc32

// check the block data crc32
Expand Down
2 changes: 1 addition & 1 deletion src/cramFile/record.ts
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ export default class CramRecord {

this.readGroupId = readGroupId
this.readName = readName
this.sequenceId = sequenceId
this.sequenceId = sequenceId!
this.uniqueId = uniqueId
this.templateSize = templateSize
this.alignmentStart = alignmentStart
Expand Down
Loading
Loading