Skip to content

Commit

Permalink
catch scanner errors
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasjungblut committed Sep 15, 2021
1 parent 8945ae7 commit b83e8ca
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 63 deletions.
80 changes: 42 additions & 38 deletions recordio/proto/recordio_proto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package proto
import (
"bufio"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/thomasjungblut/go-sstables/recordio"
"github.com/thomasjungblut/go-sstables/recordio/test_files"
"io/ioutil"
Expand All @@ -15,110 +16,112 @@ const TestFile = "../test_files/berlin52.tsp"

func TestReadWriteEndToEndProto(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "recordio_EndToEndProto")
assert.Nil(t, err)
defer func() { assert.Nil(t, os.Remove(tmpFile.Name())) }()
require.NoError(t, err)
defer func() { require.NoError(t, os.Remove(tmpFile.Name())) }()
writer, err := NewWriter(File(tmpFile))
assert.Nil(t, err)
require.NoError(t, err)

endToEndReadWriteProtobuf(writer, t, tmpFile)
}

func TestReadWriteEndToEndGzipProto(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "recordio_EndToEndGzipProto")
assert.Nil(t, err)
defer func() { assert.Nil(t, os.Remove(tmpFile.Name())) }()
require.NoError(t, err)
defer func() { require.NoError(t, os.Remove(tmpFile.Name())) }()
writer, err := NewWriter(File(tmpFile), CompressionType(recordio.CompressionTypeGZIP))
assert.Nil(t, err)
require.NoError(t, err)

endToEndReadWriteProtobuf(writer, t, tmpFile)
}

func TestReadWriteEndToEndSnappyProto(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "recordio_EndToEndSnappyProto")
assert.Nil(t, err)
defer func() { assert.Nil(t, os.Remove(tmpFile.Name())) }()
require.NoError(t, err)
defer func() { require.NoError(t, os.Remove(tmpFile.Name())) }()
writer, err := NewWriter(File(tmpFile), CompressionType(recordio.CompressionTypeSnappy))
assert.Nil(t, err)
require.NoError(t, err)

endToEndReadWriteProtobuf(writer, t, tmpFile)
}

func endToEndReadWriteProtobuf(writer WriterI, t *testing.T, tmpFile *os.File) {
// we're reading the file line by line and try to read it back and assert the same content
inFile, err := os.Open(TestFile)
assert.Nil(t, err)
assert.Nil(t, writer.Open())
require.NoError(t, err)
require.NoError(t, writer.Open())

numRead := 0
scanner := bufio.NewScanner(inFile)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
msg := test_files.TextLine{LineNumber: int32(numRead), Line: scanner.Text()}
_, err = writer.Write(&msg)
assert.Nil(t, err)
require.NoError(t, err)
numRead++
}
require.NoError(t, scanner.Err())
assert.Equal(t, 59, numRead)
assert.Nil(t, writer.Close())
assert.Nil(t, inFile.Close())
require.NoError(t, writer.Close())
require.NoError(t, inFile.Close())

reader, err := NewProtoReaderWithPath(tmpFile.Name())
assert.Nil(t, err)
assert.Nil(t, reader.Open())
require.NoError(t, err)
require.NoError(t, reader.Open())

inFile, err = os.Open(TestFile)
assert.Nil(t, err)
require.NoError(t, err)
scanner = bufio.NewScanner(inFile)
scanner.Split(bufio.ScanLines)
numRead = 0
for scanner.Scan() {
textLine := &test_files.TextLine{}
_, err := reader.ReadNext(textLine)
assert.Nil(t, err)
require.NoError(t, err)
assert.Equal(t, numRead, int(textLine.LineNumber))
assert.Equal(t, scanner.Text(), textLine.Line)
numRead++
}
require.NoError(t, scanner.Err())
assert.Equal(t, 59, numRead)
assert.Nil(t, reader.Close())
assert.Nil(t, inFile.Close())
require.NoError(t, reader.Close())
require.NoError(t, inFile.Close())
}

func TestRandomReadWriteEndToEndProto(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "recordio_EndToEndProto")
assert.Nil(t, err)
defer func() { assert.Nil(t, os.Remove(tmpFile.Name())) }()
require.NoError(t, err)
defer func() { require.NoError(t, os.Remove(tmpFile.Name())) }()
writer, err := NewWriter(File(tmpFile))
assert.Nil(t, err)
require.NoError(t, err)

endToEndRandomReadWriteProtobuf(writer, t, tmpFile)
}

func TestRandomReadWriteEndToEndGzipProto(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "recordio_EndToEndGzipProto")
assert.Nil(t, err)
defer func() { assert.Nil(t, os.Remove(tmpFile.Name())) }()
require.NoError(t, err)
defer func() { require.NoError(t, os.Remove(tmpFile.Name())) }()
writer, err := NewWriter(File(tmpFile), CompressionType(recordio.CompressionTypeGZIP))
assert.Nil(t, err)
require.NoError(t, err)

endToEndRandomReadWriteProtobuf(writer, t, tmpFile)
}

func TestRandomReadWriteEndToEndSnappyProto(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "recordio_EndToEndSnappyProto")
assert.Nil(t, err)
defer func() { assert.Nil(t, os.Remove(tmpFile.Name())) }()
require.NoError(t, err)
defer func() { require.NoError(t, os.Remove(tmpFile.Name())) }()
writer, err := NewWriter(File(tmpFile), CompressionType(recordio.CompressionTypeSnappy))
assert.Nil(t, err)
require.NoError(t, err)

endToEndRandomReadWriteProtobuf(writer, t, tmpFile)
}

func endToEndRandomReadWriteProtobuf(writer WriterI, t *testing.T, tmpFile *os.File) {
// same idea as above, but we're testing the random read via mmap
inFile, err := os.Open(TestFile)
assert.Nil(t, err)
assert.Nil(t, writer.Open())
require.NoError(t, err)
require.NoError(t, writer.Open())

var lines []string
offsetMap := make(map[string]uint64)
Expand All @@ -131,16 +134,17 @@ func endToEndRandomReadWriteProtobuf(writer WriterI, t *testing.T, tmpFile *os.F
offset, err := writer.Write(&msg)
offsetMap[line] = offset
lines = append(lines, line)
assert.Nil(t, err)
require.NoError(t, err)
numRead++
}
require.NoError(t, scanner.Err())
assert.Equal(t, 59, numRead)
assert.Nil(t, writer.Close())
assert.Nil(t, inFile.Close())
require.NoError(t, writer.Close())
require.NoError(t, inFile.Close())

reader, err := NewMMapProtoReaderWithPath(tmpFile.Name())
assert.Nil(t, err)
assert.Nil(t, reader.Open())
require.NoError(t, err)
require.NoError(t, reader.Open())

// we shuffle the lines, so we can test the actual random read behaviour
rand.Shuffle(len(lines), func(i, j int) {
Expand All @@ -152,10 +156,10 @@ func endToEndRandomReadWriteProtobuf(writer WriterI, t *testing.T, tmpFile *os.F
offset := offsetMap[s]
textLine := &test_files.TextLine{}
_, err := reader.ReadNextAt(textLine, offset)
assert.Nil(t, err)
require.NoError(t, err)
assert.Equal(t, s, textLine.Line)
numRead++
}
assert.Equal(t, 59, numRead)
assert.Nil(t, reader.Close())
require.NoError(t, reader.Close())
}
53 changes: 28 additions & 25 deletions recordio/recordio_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package recordio
import (
"bufio"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"io/ioutil"
"os"
"testing"
Expand All @@ -12,88 +13,90 @@ import (

func TestReadWriteEndToEnd(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "recordio_EndToEnd")
assert.Nil(t, err)
defer func() { assert.Nil(t, os.Remove(tmpFile.Name())) }()
require.NoError(t, err)
defer func() { require.NoError(t, os.Remove(tmpFile.Name())) }()
writer, err := NewFileWriter(File(tmpFile))
assert.Nil(t, err)
require.NoError(t, err)

endToEndReadWrite(writer, t, tmpFile)
}

func TestReadWriteEndToEndGzip(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "recordio_EndToEndGzip")
assert.Nil(t, err)
defer func() { assert.Nil(t, os.Remove(tmpFile.Name())) }()
require.NoError(t, err)
defer func() { require.NoError(t, os.Remove(tmpFile.Name())) }()
writer, err := NewFileWriter(File(tmpFile), CompressionType(CompressionTypeGZIP))
assert.Nil(t, err)
require.NoError(t, err)

endToEndReadWrite(writer, t, tmpFile)
}

func TestReadWriteEndToEndSnappy(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "recordio_EndToEndSnappy")
assert.Nil(t, err)
defer func() { assert.Nil(t, os.Remove(tmpFile.Name())) }()
require.NoError(t, err)
defer func() { require.NoError(t, os.Remove(tmpFile.Name())) }()
writer, err := NewFileWriter(File(tmpFile), CompressionType(CompressionTypeSnappy))
assert.Nil(t, err)
require.NoError(t, err)

endToEndReadWrite(writer, t, tmpFile)
}

func endToEndReadWrite(writer WriterI, t *testing.T, tmpFile *os.File) {
// we're reading the file line by line and try to read it back and assert the same content
inFile, err := os.Open("test_files/berlin52.tsp")
assert.Nil(t, err)
assert.Nil(t, writer.Open())
require.NoError(t, err)
require.NoError(t, writer.Open())

numRead := 0
scanner := bufio.NewScanner(inFile)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
_, err = writer.Write([]byte(scanner.Text()))
assert.Nil(t, err)
require.NoError(t, err)
numRead++
}
require.NoError(t, scanner.Err())
assert.Equal(t, 59, numRead)
assert.Nil(t, writer.Close())
assert.Nil(t, inFile.Close())
require.NoError(t, writer.Close())
require.NoError(t, inFile.Close())

reader, err := NewFileReaderWithPath(tmpFile.Name())
assert.Nil(t, err)
assert.Nil(t, reader.Open())
require.NoError(t, err)
require.NoError(t, reader.Open())

inFile, err = os.Open("test_files/berlin52.tsp")
assert.Nil(t, err)
require.NoError(t, err)
scanner = bufio.NewScanner(inFile)
scanner.Split(bufio.ScanLines)
numRead = 0
for scanner.Scan() {
bytes, err := reader.ReadNext()
assert.Nil(t, err)
require.NoError(t, err)
assert.Equal(t, scanner.Text(), string(bytes))
numRead++
}
require.NoError(t, scanner.Err())
assert.Equal(t, 59, numRead)
assert.Nil(t, reader.Close())
assert.Nil(t, inFile.Close())
require.NoError(t, reader.Close())
require.NoError(t, inFile.Close())
}

func closeFileWriter(t *testing.T, writer *FileWriter) {
func() { assert.Nil(t, writer.Close()) }()
func() { require.NoError(t, writer.Close()) }()
}

func closeOpenClosable(t *testing.T, oc OpenClosableI) {
func() { assert.Nil(t, oc.Close()) }()
func() { require.NoError(t, oc.Close()) }()
}

func closeFileReader(t *testing.T, reader *FileReader) {
func() { assert.Nil(t, reader.Close()) }()
func() { require.NoError(t, reader.Close()) }()
}

func closeMMapReader(t *testing.T, reader *MMapReader) {
func() { assert.Nil(t, reader.Close()) }()
func() { require.NoError(t, reader.Close()) }()
}

func removeFileWriterFile(t *testing.T, writer *FileWriter) {
func() { assert.Nil(t, os.Remove(writer.file.Name())) }()
func() { require.NoError(t, os.Remove(writer.file.Name())) }()
}

0 comments on commit b83e8ca

Please sign in to comment.