Skip to content

Commit

Permalink
optimize timestamp detection
Browse files Browse the repository at this point in the history
  • Loading branch information
def committed Oct 30, 2023
1 parent b8fd9a7 commit bbcd7a7
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 29 deletions.
40 changes: 18 additions & 22 deletions timestamp.go
Original file line number Diff line number Diff line change
@@ -1,35 +1,31 @@
package logparser

import (
"regexp"
)

const (
lookForTimestampLimit = 100
)

var (
timestampRegexes = []*regexp.Regexp{
regexp.MustCompile(`(^|\s)\d{2}:\d{2}(:\d{2}[^\s"']*)?`),
regexp.MustCompile(`\d{2} [A-Z][a-z]{2} \d{4}`),
regexp.MustCompile(`\d{4}-\d{2}-\d{2}`),
regexp.MustCompile(`\d{4}/\d{2}/\d{2}`),
regexp.MustCompile(`\d{4}\.\d{2}\.\d{2}`),
regexp.MustCompile(`[A-Z][a-z]{2} \d{2}`),
regexp.MustCompile(`\d{2}-\d{2}-\d{4}`),
regexp.MustCompile(`\d{2}/\d{2}/\d{4}`),
regexp.MustCompile(`\d{2}\.\d{2}\.\d{4}`),
regexp.MustCompile(`\d{2}/[A-Z][a-z]{2}/\d{4}`),
}
)

func containsTimestamp(line string) bool {
if len(line) > lookForTimestampLimit {
line = line[:lookForTimestampLimit]
}
for _, re := range timestampRegexes {
if re.MatchString(line) {
return true
var digits, colons int
for _, r := range line {
switch {
case r >= '0' && r <= '9':
digits++
if digits > 2 {
digits = 0
}
if digits == 2 && colons == 2 {
return true
}
case r == ':':
if digits == 2 {
colons++
}
digits = 0
default:
digits, colons = 0, 0
}
}
return false
Expand Down
33 changes: 26 additions & 7 deletions timestamp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,10 @@ package logparser
import (
"github.com/stretchr/testify/assert"
"testing"
"time"
)

func Test_containsTimestamp(t *testing.T) {
assert.True(t, containsTimestamp("2005-08-09"))
assert.True(t, containsTimestamp("2020/06/26"))
assert.True(t, containsTimestamp("02/17/2009"))
assert.True(t, containsTimestamp("25.02.2013"))
assert.True(t, containsTimestamp("2013.25.02"))
assert.True(t, containsTimestamp("18:31"))
assert.True(t, containsTimestamp("18:31:42"))
assert.True(t, containsTimestamp("18:31:42+03"))
assert.True(t, containsTimestamp("18:31:42-03"))
assert.True(t, containsTimestamp("18:31:42+03:30"))
Expand All @@ -25,4 +19,29 @@ func Test_containsTimestamp(t *testing.T) {
assert.True(t, containsTimestamp("2005-08-09T18:31:42"))
assert.True(t, containsTimestamp("2005-08-09T18:31:42.201"))
assert.True(t, containsTimestamp(`10/Oct/2000:13:55:36 -0700`))
assert.True(t, containsTimestamp(time.ANSIC))
assert.True(t, containsTimestamp(time.UnixDate))
assert.True(t, containsTimestamp(time.RubyDate))
assert.True(t, containsTimestamp(time.RFC850))
assert.True(t, containsTimestamp(time.RFC1123))
assert.True(t, containsTimestamp(time.RFC1123Z))
assert.True(t, containsTimestamp(time.RFC3339))
assert.True(t, containsTimestamp(time.RFC3339Nano))
assert.True(t, containsTimestamp(time.Stamp))
assert.True(t, containsTimestamp(time.StampMilli))
assert.True(t, containsTimestamp(time.StampMicro))

assert.False(t, containsTimestamp("13/32"))
assert.False(t, containsTimestamp("13:32"))
assert.False(t, containsTimestamp("100/5/100"))
assert.False(t, containsTimestamp("1:12:123"))
assert.False(t, containsTimestamp("12:aa:12:32"))

}

func Benchmark_containsTimestamp(b *testing.B) {
l := `10.42.0.21 - - [30/Oct/2023:11:55:47 +0000] "GET / HTTP/1.1" 200 612 "-" "-" "-"`
for n := 0; n < b.N; n++ {
containsTimestamp(l)
}
}

0 comments on commit bbcd7a7

Please sign in to comment.