Skip to content

Commit

Permalink
Gf2
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Dec 11, 2024
1 parent 8a312e0 commit b15bbae
Show file tree
Hide file tree
Showing 9 changed files with 574 additions and 620 deletions.
17 changes: 10 additions & 7 deletions src/bai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,15 @@ export default class BAI extends IndexFile {

// fetch and parse the index
async _parse(opts?: BaseOpts) {
const bytes = (await this.filehandle.readFile(opts)) as Buffer
const bytes = await this.filehandle.readFile()
const dataView = new DataView(bytes.buffer)

// check BAI magic numbers
if (bytes.readUInt32LE(0) !== BAI_MAGIC) {
if (dataView.getUint32(0, true) !== BAI_MAGIC) {
throw new Error('Not a BAI file')
}

const refCount = bytes.readInt32LE(4)
const refCount = dataView.getInt32(4, true)
const depth = 5
const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7

Expand All @@ -57,16 +58,18 @@ export default class BAI extends IndexFile {
linearIndex: LinearIndex
stats?: { lineCount: number }
}>(refCount)

for (let i = 0; i < refCount; i++) {
// the binning index
const binCount = bytes.readInt32LE(curr)

const binCount = dataView.getInt32(curr, true)
let stats

curr += 4
const binIndex: Record<number, Chunk[]> = {}

for (let j = 0; j < binCount; j += 1) {
const bin = bytes.readUInt32LE(curr)
const bin = dataView.getUint32(curr, true)
curr += 4
if (bin === binLimit + 1) {
curr += 4
Expand All @@ -75,7 +78,7 @@ export default class BAI extends IndexFile {
} else if (bin > binLimit + 1) {
throw new Error('bai index contains too many bins, please use CSI')
} else {
const chunkCount = bytes.readInt32LE(curr)
const chunkCount = dataView.getInt32(curr, true)
curr += 4
const chunks = new Array<Chunk>(chunkCount)
for (let k = 0; k < chunkCount; k++) {
Expand All @@ -90,7 +93,7 @@ export default class BAI extends IndexFile {
}
}

const linearCount = bytes.readInt32LE(curr)
const linearCount = dataView.getInt32(curr, true)
curr += 4
// as we're going through the linear index, figure out the smallest
// virtual offset in the indexes, which tells us where the BAM header
Expand Down
62 changes: 22 additions & 40 deletions src/bamFile.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { Buffer } from 'buffer'
import crc32 from 'crc/crc32'
import { unzip, unzipChunkSlice } from '@gmod/bgzf-filehandle'
import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle'
Expand Down Expand Up @@ -148,23 +147,22 @@ export default class BamFile {
let buffer
if (ret) {
const s = ret + blockLen
const res = await this.bam.read(Buffer.alloc(s), 0, s, 0, opts)
if (!res.bytesRead) {
throw new Error('Error reading header')
}
buffer = res.buffer.subarray(0, Math.min(res.bytesRead, ret))
buffer = await this.bam.read(s, 0)
} else {
buffer = await this.bam.readFile(opts)
}

console.log({ buffer })
const uncba = await unzip(buffer)
const dataView = new DataView(uncba.buffer)

if (uncba.readInt32LE(0) !== BAM_MAGIC) {
if (dataView.getInt32(0, true) !== BAM_MAGIC) {
throw new Error('Not a BAM file')
}
const headLen = uncba.readInt32LE(4)
const headLen = dataView.getInt32(4, true)

this.header = uncba.toString('utf8', 8, 8 + headLen)
const decoder = new TextDecoder('utf8')
this.header = decoder.decode(uncba.subarray(8, 8 + headLen))
const { chrToIndex, indexToChr } = await this._readRefSeqs(
headLen + 8,
65535,
Expand Down Expand Up @@ -204,30 +202,21 @@ export default class BamFile {
if (start > refSeqBytes) {
return this._readRefSeqs(start, refSeqBytes * 2, opts)
}
const size = refSeqBytes + blockLen
const { bytesRead, buffer } = await this.bam.read(
Buffer.alloc(size),
0,
refSeqBytes,
0,
opts,
)
if (!bytesRead) {
throw new Error('Error reading refseqs from header')
}
const uncba = await unzip(
buffer.subarray(0, Math.min(bytesRead, refSeqBytes)),
)
const nRef = uncba.readInt32LE(start)
// const size = refSeqBytes + blockLen <-- use this?
const buffer = await this.bam.read(refSeqBytes, 0, opts)
const uncba = await unzip(buffer)
const dataView = new DataView(uncba.buffer)
const nRef = dataView.getInt32(start, true)
let p = start + 4
const chrToIndex: Record<string, number> = {}
const indexToChr: { refName: string; length: number }[] = []
const decoder = new TextDecoder('utf8')
for (let i = 0; i < nRef; i += 1) {
const lName = uncba.readInt32LE(p)
const lName = dataView.getInt32(p, true)
const refName = this.renameRefSeq(
uncba.toString('utf8', p + 4, p + 4 + lName - 1),
decoder.decode(uncba.subarray(p + 4, p + 4 + lName - 1)),
)
const lRef = uncba.readInt32LE(p + lName + 4)
const lRef = dataView.getInt32(p + lName + 4, true)

chrToIndex[refName] = i
indexToChr.push({ refName, length: lRef })
Expand Down Expand Up @@ -388,15 +377,7 @@ export default class BamFile {
}

async _readRegion(position: number, size: number, opts: BaseOpts = {}) {
const { bytesRead, buffer } = await this.bam.read(
Buffer.alloc(size),
0,
size,
position,
opts,
)

return buffer.subarray(0, Math.min(bytesRead, size))
return this.bam.read(size, position, opts)
}

async _readChunk({ chunk, opts }: { chunk: Chunk; opts: BaseOpts }) {
Expand All @@ -415,7 +396,7 @@ export default class BamFile {
}

async readBamFeatures(
ba: Buffer,
ba: Uint8Array,
cpositions: number[],
dpositions: number[],
chunk: Chunk,
Expand All @@ -425,8 +406,9 @@ export default class BamFile {
let pos = 0
let last = +Date.now()

const dataView = new DataView(ba.buffer)
while (blockStart + 4 < ba.length) {
const blockSize = ba.readInt32LE(blockStart)
const blockSize = dataView.getInt32(blockStart, true)
const blockEnd = blockStart + 4 + blockSize - 1

// increment position to the current decompressed status
Expand Down Expand Up @@ -471,8 +453,8 @@ export default class BamFile {
chunk.minv.dataPosition +
1
: // must be slice, not subarray for buffer polyfill on web
// eslint-disable-next-line @typescript-eslint/no-deprecated
crc32.signed(ba.slice(blockStart, blockEnd)),
// @ts-expect-error
crc32.signed(ba.subarray(blockStart, blockEnd)),
})

sink.push(feature)
Expand Down
36 changes: 19 additions & 17 deletions src/csi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ export default class CSI extends IndexFile {
return []
}

parseAuxData(bytes: Buffer, offset: number) {
const formatFlags = bytes.readInt32LE(offset)
parseAuxData(bytes: Uint8Array, offset: number) {
const dataView = new DataView(bytes.buffer)
const formatFlags = dataView.getUint32(offset, true)
const coordinateType =
formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed'
const format = (
Expand All @@ -48,14 +49,14 @@ export default class CSI extends IndexFile {
throw new Error(`invalid Tabix preset format flags ${formatFlags}`)
}
const columnNumbers = {
ref: bytes.readInt32LE(offset + 4),
start: bytes.readInt32LE(offset + 8),
end: bytes.readInt32LE(offset + 12),
ref: dataView.getInt32(offset + 4, true),
start: dataView.getInt32(offset + 8, true),
end: dataView.getInt32(offset + 12, true),
}
const metaValue = bytes.readInt32LE(offset + 16)
const metaValue = dataView.getInt32(offset + 16, true)
const metaChar = metaValue ? String.fromCharCode(metaValue) : ''
const skipLines = bytes.readInt32LE(offset + 20)
const nameSectionLength = bytes.readInt32LE(offset + 24)
const skipLines = dataView.getInt32(offset + 20, true)
const nameSectionLength = dataView.getInt32(offset + 24, true)

return {
columnNumbers,
Expand All @@ -77,23 +78,24 @@ export default class CSI extends IndexFile {
const buffer = await this.filehandle.readFile(opts)
const bytes = await unzip(buffer)

const dataView = new DataView(bytes.buffer)
let csiVersion
// check TBI magic numbers
if (bytes.readUInt32LE(0) === CSI1_MAGIC) {
if (dataView.getUint32(0, true) === CSI1_MAGIC) {
csiVersion = 1
} else if (bytes.readUInt32LE(0) === CSI2_MAGIC) {
} else if (dataView.getUint32(0, true) === CSI2_MAGIC) {
csiVersion = 2
} else {
throw new Error('Not a CSI file')
// TODO: do we need to support big-endian CSI files?
}

this.minShift = bytes.readInt32LE(4)
this.depth = bytes.readInt32LE(8)
this.minShift = dataView.getInt32(4, true)
this.depth = dataView.getInt32(8, true)
this.maxBinNumber = ((1 << ((this.depth + 1) * 3)) - 1) / 7
const auxLength = bytes.readInt32LE(12)
const auxLength = dataView.getInt32(12, true)
const aux = auxLength >= 30 ? this.parseAuxData(bytes, 16) : undefined
const refCount = bytes.readInt32LE(16 + auxLength)
const refCount = dataView.getInt32(16 + auxLength, true)

type BinIndex = Record<string, Chunk[]>

Expand All @@ -106,20 +108,20 @@ export default class CSI extends IndexFile {
}>(refCount)
for (let i = 0; i < refCount; i++) {
// the binning index
const binCount = bytes.readInt32LE(curr)
const binCount = dataView.getInt32(curr, true)
curr += 4
const binIndex: Record<string, Chunk[]> = {}
let stats // < provided by parsing a pseudo-bin, if present
for (let j = 0; j < binCount; j++) {
const bin = bytes.readUInt32LE(curr)
const bin = dataView.getUint32(curr, true)
curr += 4
if (bin > this.maxBinNumber) {
stats = parsePseudoBin(bytes, curr + 28)
curr += 28 + 16
} else {
firstDataLine = findFirstData(firstDataLine, fromBytes(bytes, curr))
curr += 8
const chunkCount = bytes.readInt32LE(curr)
const chunkCount = dataView.getInt32(curr, true)
curr += 4
const chunks = new Array<Chunk>(chunkCount)
for (let k = 0; k < chunkCount; k += 1) {
Expand Down
Loading

0 comments on commit b15bbae

Please sign in to comment.