From 57971d6cfb75882622aecd3dd4f77173d54f3506 Mon Sep 17 00:00:00 2001 From: Colin Date: Sat, 14 Dec 2024 15:50:27 -0500 Subject: [PATCH] Remove biome config --- src/cramFile/codecs/_base.ts | 2 +- src/cramFile/codecs/byteArrayLength.ts | 16 ++---- src/cramFile/codecs/external.ts | 8 +-- src/cramFile/container/index.ts | 26 ++-------- src/cramFile/file.ts | 69 ++++++++++++-------------- src/cramFile/record.ts | 2 +- src/cramFile/slice/decodeRecord.ts | 49 +++++++++--------- src/cramFile/slice/index.ts | 49 +++++++++--------- 8 files changed, 97 insertions(+), 124 deletions(-) diff --git a/src/cramFile/codecs/_base.ts b/src/cramFile/codecs/_base.ts index ecdf64e2..75b710fc 100644 --- a/src/cramFile/codecs/_base.ts +++ b/src/cramFile/codecs/_base.ts @@ -41,5 +41,5 @@ export default abstract class CramCodec< coreDataBlock: CramFileBlock, blocksByContentId: Record, cursors: Cursors, - ): DataTypeMapping[TResult] + ): DataTypeMapping[TResult] | undefined } diff --git a/src/cramFile/codecs/byteArrayLength.ts b/src/cramFile/codecs/byteArrayLength.ts index cce3f643..3407364c 100644 --- a/src/cramFile/codecs/byteArrayLength.ts +++ b/src/cramFile/codecs/byteArrayLength.ts @@ -32,22 +32,14 @@ export default class ByteArrayStopCodec extends CramCodec< cursors: Cursors, ) { const lengthCodec = this._getLengthCodec() - const arrayLength = lengthCodec.decode( - slice, - coreDataBlock, - blocksByContentId, - cursors, - ) + const arrayLength = + lengthCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0 const dataCodec = this._getDataCodec() const data = new Uint8Array(arrayLength) for (let i = 0; i < arrayLength; i += 1) { - data[i] = dataCodec.decode( - slice, - coreDataBlock, - blocksByContentId, - cursors, - ) + data[i] = + dataCodec.decode(slice, coreDataBlock, blocksByContentId, cursors) || 0 } return data diff --git a/src/cramFile/codecs/external.ts b/src/cramFile/codecs/external.ts index 621481c5..0e44a55a 100644 --- a/src/cramFile/codecs/external.ts +++ b/src/cramFile/codecs/external.ts @@ -39,13 +39,9 @@ export default class ExternalCodec extends CramCodec< ) { const { blockContentId } = this.parameters const contentBlock = blocksByContentId[blockContentId] - if (!contentBlock) { - throw new CramMalformedError( - `no block found with content ID ${blockContentId}}`, - ) - } + const cursor = cursors.externalBlocks.getCursor(blockContentId) - return this._decodeData(contentBlock, cursor) + return contentBlock ? this._decodeData(contentBlock, cursor) : undefined } _decodeInt(contentBlock: CramFileBlock, cursor: Cursor) { diff --git a/src/cramFile/container/index.ts b/src/cramFile/container/index.ts index 36614b11..c11c218b 100644 --- a/src/cramFile/container/index.ts +++ b/src/cramFile/container/index.ts @@ -21,7 +21,7 @@ export default class CramContainer { // if there are no records in the container, there will be no compression // header - if (!containerHeader?.numRecords) { + if (!containerHeader.numRecords) { return null } const { majorVersion } = await this.file.getDefinition() @@ -51,9 +51,6 @@ export default class CramContainer { async getFirstBlock() { const containerHeader = await this.getHeader() - if (!containerHeader) { - return undefined - } return this.file.readBlock(containerHeader._endPosition) } @@ -78,12 +75,6 @@ export default class CramContainer { const { majorVersion } = await this.file.getDefinition() const sectionParsers = getSectionParsers(majorVersion) const { cramContainerHeader1, cramContainerHeader2 } = sectionParsers - const { size: fileSize } = await this.file.stat() - - if (position >= fileSize) { - console.warn(`pos:${position}>=fileSize:${fileSize} in cram container`) - return undefined - } // parse the container header. do it in 2 pieces because you cannot tell // how much to buffer until you read numLandmarks @@ -93,13 +84,6 @@ export default class CramContainer { ) const header1 = parseItem(bytes1, cramContainerHeader1.parser) const numLandmarksSize = itf8Size(header1.numLandmarks) - if (position + header1.length >= fileSize) { - // header indicates container goes beyond fileSize - console.warn( - `container at ${position} is beyond fileSize:${fileSize}, skipping`, - ) - return undefined - } const bytes2 = await this.file.read( cramContainerHeader2.maxLength(header1.numLandmarks), @@ -116,12 +100,12 @@ export default class CramContainer { ) } - const completeHeader = Object.assign(header1, header2, { + return { + ...header1, + ...header2, _size: header1._size + header2._size - numLandmarksSize, _endPosition: header1._size + header2._size - numLandmarksSize + position, - }) - - return completeHeader + } } } diff --git a/src/cramFile/file.ts b/src/cramFile/file.ts index a3089f32..73077269 100644 --- a/src/cramFile/file.ts +++ b/src/cramFile/file.ts @@ -102,11 +102,6 @@ export default class CramFile { } } - // can just stat this object like a filehandle - stat() { - return this.file.stat() - } - // can just stat this object like a filehandle read(length: number, position: number) { return this.file.read(length, position) @@ -158,7 +153,6 @@ export default class CramFile { const { majorVersion } = await this.getDefinition() const sectionParsers = getSectionParsers(majorVersion) let position = sectionParsers.cramFileDefinition.maxLength - const { size: fileSize } = await this.file.stat() const { cramContainerHeader1 } = sectionParsers // skip with a series of reads to the proper container @@ -166,17 +160,13 @@ export default class CramFile { for (let i = 0; i <= containerNumber; i++) { // if we are about to go off the end of the file // and have not found that container, it does not exist - if (position + cramContainerHeader1.maxLength + 8 >= fileSize) { - return undefined - } + // if (position + cramContainerHeader1.maxLength + 8 >= fileSize) { + // return undefined + // } currentContainer = this.getContainerAtPosition(position) const currentHeader = await currentContainer.getHeader() - if (!currentHeader) { - throw new CramMalformedError( - `container ${containerNumber} not found in file`, - ) - } + // if this is the first container, read all the blocks in the container // to determine its length, because we cannot trust the container // header's given length due to a bug somewhere in htslib @@ -219,39 +209,44 @@ export default class CramFile { /** * @returns {Promise[number]} the number of containers in the file + * + * note: this is currently used only in unit tests, and after removing file + * length check, relies on a try catch to read return an error to break */ async containerCount(): Promise { const { majorVersion } = await this.getDefinition() const sectionParsers = getSectionParsers(majorVersion) - const { size: fileSize } = await this.file.stat() - const { cramContainerHeader1 } = sectionParsers let containerCount = 0 let position = sectionParsers.cramFileDefinition.maxLength - while (position + cramContainerHeader1.maxLength + 8 < fileSize) { - const currentHeader = - await this.getContainerAtPosition(position).getHeader() - if (!currentHeader) { - break - } - // if this is the first container, read all the blocks in the container, - // because we cannot trust the container header's given length due to a - // bug somewhere in htslib - if (containerCount === 0) { - position = currentHeader._endPosition - for (let j = 0; j < currentHeader.numBlocks; j++) { - const block = await this.readBlock(position) - if (block === undefined) { - return undefined + try { + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + while (true) { + const currentHeader = + await this.getContainerAtPosition(position).getHeader() + + // if this is the first container, read all the blocks in the container, + // because we cannot trust the container header's given length due to a + // bug somewhere in htslib + if (containerCount === 0) { + position = currentHeader._endPosition + for (let j = 0; j < currentHeader.numBlocks; j++) { + const block = await this.readBlock(position) + if (block === undefined) { + break + } + position = block._endPosition } - position = block._endPosition + } else { + // otherwise, just traverse to the next container using the container's + // length + position += currentHeader._size + currentHeader.length } - } else { - // otherwise, just traverse to the next container using the container's - // length - position += currentHeader._size + currentHeader.length + containerCount += 1 } - containerCount += 1 + } catch (e) { + containerCount-- + /* do nothing */ } return containerCount diff --git a/src/cramFile/record.ts b/src/cramFile/record.ts index 3af32429..2cd3d271 100644 --- a/src/cramFile/record.ts +++ b/src/cramFile/record.ts @@ -271,7 +271,7 @@ export default class CramRecord { this.readGroupId = readGroupId this.readName = readName - this.sequenceId = sequenceId + this.sequenceId = sequenceId! this.uniqueId = uniqueId this.templateSize = templateSize this.alignmentStart = alignmentStart diff --git a/src/cramFile/slice/decodeRecord.ts b/src/cramFile/slice/decodeRecord.ts index b88435df..b049afa0 100644 --- a/src/cramFile/slice/decodeRecord.ts +++ b/src/cramFile/slice/decodeRecord.ts @@ -211,7 +211,7 @@ function decodeReadFeatures( export type DataSeriesDecoder = ( dataSeriesName: T, -) => DataTypeMapping[DataSeriesTypes[T]] +) => DataTypeMapping[DataSeriesTypes[T]] | undefined export default function decodeRecord( slice: CramSlice, @@ -224,12 +224,11 @@ export default function decodeRecord( majorVersion: number, recordNumber: number, ) { - let flags = decodeDataSeries('BF') + let flags = decodeDataSeries('BF')! - // note: the C data type of compressionFlags is byte in cram v1 - // and int32 in cram v2+, but that does not matter for us here - // in javascript land. - const cramFlags = decodeDataSeries('CF') + // note: the C data type of compressionFlags is byte in cram v1 and int32 in + // cram v2+, but that does not matter for us here in javascript land. + const cramFlags = decodeDataSeries('CF')! if (!isMappedSliceHeader(sliceHeader.parsedContent)) { throw new Error('slice header not mapped') @@ -240,18 +239,18 @@ export default function decodeRecord( ? decodeDataSeries('RI') : sliceHeader.parsedContent.refSeqId - const readLength = decodeDataSeries('RL') + const readLength = decodeDataSeries('RL')! // if APDelta, will calculate the true start in a second pass - let alignmentStart = decodeDataSeries('AP') + let alignmentStart = decodeDataSeries('AP')! if (compressionScheme.APdelta) { alignmentStart = alignmentStart + cursors.lastAlignmentStart } cursors.lastAlignmentStart = alignmentStart - const readGroupId = decodeDataSeries('RG') + const readGroupId = decodeDataSeries('RG')! let readName: string | undefined if (compressionScheme.readNamesIncluded) { - readName = readNullTerminatedString(decodeDataSeries('RN')) + readName = readNullTerminatedString(decodeDataSeries('RN')!) } let mateToUse: @@ -268,14 +267,14 @@ export default function decodeRecord( if (CramFlagsDecoder.isDetached(cramFlags)) { // note: the MF is a byte in 1.0, int32 in 2+, but once again this doesn't // matter for javascript - const mateFlags = decodeDataSeries('MF') + const mateFlags = decodeDataSeries('MF')! let mateReadName: string | undefined if (!compressionScheme.readNamesIncluded) { - mateReadName = readNullTerminatedString(decodeDataSeries('RN')) + mateReadName = readNullTerminatedString(decodeDataSeries('RN')!) readName = mateReadName } - const mateSequenceId = decodeDataSeries('NS') - const mateAlignmentStart = decodeDataSeries('NP') + const mateSequenceId = decodeDataSeries('NS')! + const mateAlignmentStart = decodeDataSeries('NP')! if (mateFlags || mateSequenceId > -1) { mateToUse = { mateFlags, @@ -285,7 +284,7 @@ export default function decodeRecord( } } - templateSize = decodeDataSeries('TS') + templateSize = decodeDataSeries('TS')! // set mate unmapped if needed if (MateFlagsDecoder.isUnmapped(mateFlags)) { @@ -298,12 +297,12 @@ export default function decodeRecord( // detachedCount++ } else if (CramFlagsDecoder.isWithMateDownstream(cramFlags)) { - mateRecordNumber = decodeDataSeries('NF') + recordNumber + 1 + mateRecordNumber = decodeDataSeries('NF')! + recordNumber + 1 } // TODO: the aux tag parsing will have to be refactored if we want to support // cram v1 - const TLindex = decodeDataSeries('TL') + const TLindex = decodeDataSeries('TL')! if (TLindex < 0) { /* TODO: check nTL: TLindex >= compressionHeader.tagEncoding.size */ throw new CramMalformedError('invalid TL index') @@ -322,7 +321,11 @@ export default function decodeRecord( .getCodecForTag(tagId) .decode(slice, coreDataBlock, blocksByContentId, cursors) tags[tagName] = - typeof tagData === 'number' ? tagData : parseTagData(tagType, tagData) + tagData === undefined + ? undefined + : typeof tagData === 'number' + ? tagData + : parseTagData(tagType, tagData) } let readFeatures: ReadFeature[] | undefined @@ -332,7 +335,7 @@ export default function decodeRecord( let readBases = undefined if (!BamFlagsDecoder.isSegmentUnmapped(flags)) { // reading read features - const readFeatureCount = decodeDataSeries('FN') + const readFeatureCount = decodeDataSeries('FN')! if (readFeatureCount) { readFeatures = decodeReadFeatures( alignmentStart, @@ -367,11 +370,11 @@ export default function decodeRecord( } // mapping quality - mappingQuality = decodeDataSeries('MQ') + mappingQuality = decodeDataSeries('MQ')! if (CramFlagsDecoder.isPreservingQualityScores(cramFlags)) { qualityScores = new Array(readLength) for (let i = 0; i < qualityScores.length; i++) { - qualityScores[i] = decodeDataSeries('QS') + qualityScores[i] = decodeDataSeries('QS')! } } } else if (CramFlagsDecoder.isDecodeSequenceAsStar(cramFlags)) { @@ -380,14 +383,14 @@ export default function decodeRecord( } else { const bases = new Array(readLength) as number[] for (let i = 0; i < bases.length; i++) { - bases[i] = decodeDataSeries('BA') + bases[i] = decodeDataSeries('BA')! } readBases = String.fromCharCode(...bases) if (CramFlagsDecoder.isPreservingQualityScores(cramFlags)) { qualityScores = new Array(readLength) for (let i = 0; i < bases.length; i++) { - qualityScores[i] = decodeDataSeries('QS') + qualityScores[i] = decodeDataSeries('QS')! } } } diff --git a/src/cramFile/slice/index.ts b/src/cramFile/slice/index.ts index 85da52b2..4c01cfb2 100644 --- a/src/cramFile/slice/index.ts +++ b/src/cramFile/slice/index.ts @@ -20,6 +20,13 @@ export type SliceHeader = CramFileBlock & { parsedContent: MappedSliceHeader | UnmappedSliceHeader } +interface RefRegion { + id: number + start: number + end: number + seq: string | null +} + /** * @private * Try to estimate the template length from a bunch of interrelated multi-segment reads. @@ -233,7 +240,7 @@ export default class CramSlice { for (let i = 0; i < blocks.length; i++) { const block = await this.file.readBlock(blockPosition) if (block === undefined) { - throw new Error('block undefined') + continue } blocks[i] = block blockPosition = blocks[i]!._endPosition @@ -404,21 +411,14 @@ export default class CramSlice { T extends DataSeriesEncodingKey, >( dataSeriesName: T, - ): DataTypeMapping[DataSeriesTypes[T]] => { + ): DataTypeMapping[DataSeriesTypes[T]] | undefined => { const codec = compressionScheme.getCodecForDataSeries(dataSeriesName) if (!codec) { throw new CramMalformedError( `no codec defined for ${dataSeriesName} data series`, ) } - // console.log(dataSeriesName, Object.getPrototypeOf(codec)) - const decoded = codec.decode( - this, - coreDataBlock, - blocksByContentId, - cursors, - ) - return decoded + return codec.decode(this, coreDataBlock, blocksByContentId, cursors) } const records: CramRecord[] = new Array( sliceHeader.parsedContent.numRecords, @@ -457,16 +457,22 @@ export default class CramSlice { } // interpret `recordsToNextFragment` attributes to make standard `mate` - // objects Resolve mate pair cross-references between records in this slice + // objects + // + // Resolve mate pair cross-references between records in this slice for (let i = 0; i < records.length; i += 1) { - const { mateRecordNumber } = records[i]! - if (mateRecordNumber !== undefined && mateRecordNumber >= 0) { - associateIntraSliceMate( - records, - i, - records[i]!, - records[mateRecordNumber]!, - ) + const r = records[i] + // check for !!r added after removal of "stat" file size check: found + // some undefined entries + if (r) { + const { mateRecordNumber } = r + if ( + mateRecordNumber !== undefined && + mateRecordNumber >= 0 && + records[mateRecordNumber] + ) { + associateIntraSliceMate(records, i, r, records[mateRecordNumber]) + } } } @@ -501,10 +507,7 @@ export default class CramSlice { if (compressionScheme === undefined) { throw new Error('compression scheme undefined') } - const refRegions: Record< - string, - { id: number; start: number; end: number; seq: string | null } - > = {} + const refRegions: Record = {} // iterate over the records to find the spans of the reference // sequences we need to fetch