Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: fail format validation on unnecessary uppercase character classes #166

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 29 additions & 23 deletions cmd/regex_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"os"
"path"
"path/filepath"
"regexp"
"strings"

"github.com/spf13/cobra"
Expand All @@ -20,6 +21,7 @@ import (
"github.com/coreruleset/crs-toolchain/v2/regex"
"github.com/coreruleset/crs-toolchain/v2/regex/parser"
"github.com/coreruleset/crs-toolchain/v2/regex/processors"
"github.com/coreruleset/crs-toolchain/v2/utils"
)

const (
Expand Down Expand Up @@ -191,7 +193,7 @@ func processFile(filePath string, ctxt *processors.Context, checkOnly bool) erro
}

if !checkStandardHeader(lines) {
logger.Info().Msgf("file %s does not have standard header", filePath)
logger.Info().Msgf("file %s does not have standard header", filename)
// prepend the standard header
lines = append([]string{regexAssemblyStandardHeader}, lines...)
}
Expand All @@ -205,15 +207,15 @@ func processFile(filePath string, ctxt *processors.Context, checkOnly bool) erro
return err
}
// sanity check: if we are using an ignore-case flag, we don't need to have any uppercase letters in the file
foundUppercase, errMessage := findUpperCaseOnIgnoreCaseFlag(lines, raParser.Flags['i'])
foundUppercase, errMessage := findUpperCaseCharacterClassOnIgnoreCaseFlag(lines, raParser.Flags['i'])
if foundUppercase {
logger.Warn().Msgf("File contains uppercase letters, but ignore-case flag is set. Please check your source files.")
logger.Warn().Msgf("%s contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.", filename)
logger.Warn().Msgf("%s", errMessage)
logger.Warn().Msg("Be aware that because of file inclusions and definitions, the actual line number or file might be different.")
}
equalContent := bytes.Equal(currentContents, newContents)
if !equalContent || foundUppercase {
message = formatMessage(fmt.Sprintf("File %s not properly formatted", filePath))
message = formatMessage(fmt.Sprintf("%s not properly formatted", filename))
fmt.Println(message)
processFileError = &UnformattedFileError{filePath: filePath}
}
Expand Down Expand Up @@ -318,9 +320,9 @@ func checkStandardHeader(lines []string) bool {
return false
}

// findUpperCaseOnIgnoreCaseFlag checks if the file contains uppercase letters when the ignore-case flag is set
// findUpperCaseCharacterClassOnIgnoreCaseFlag checks if the file contains uppercase letters when the ignore-case flag is set
// returns true if the file contains uppercase letters, and an error message pointing the line where it was found.
func findUpperCaseOnIgnoreCaseFlag(lines []string, iFlag bool) (bool, string) {
func findUpperCaseCharacterClassOnIgnoreCaseFlag(lines []string, iFlag bool) (bool, string) {
res := false
definition := false
message := ""
Expand All @@ -344,7 +346,7 @@ func findUpperCaseOnIgnoreCaseFlag(lines []string, iFlag bool) (bool, string) {
if definition {
// restore the original line
line = lines[i]
index = definitionRegex.FindSubmatchIndex([]byte(line))[3]
index += definitionRegex.FindSubmatchIndex([]byte(line))[3]
}
if index > 0 {
fill = strings.Repeat("=", index)
Expand All @@ -361,24 +363,28 @@ func findUpperCaseOnIgnoreCaseFlag(lines []string, iFlag bool) (bool, string) {
// findUppercaseNonEscaped finds an uppercase character that is not escaped in a given line
// returns true if the line contains an uppercase character that is not escaped, and the index of the character
func findUppercaseNonEscaped(line string) (bool, int) {
for i, c := range line {
if c >= 'A' && c <= 'Z' {
// go back and check if the character is escaped
count := 0
runes := []rune(line[:i]) // Convert string to rune slice
// Iterate over the rune slice in reverse order
for j := len(runes) - 1; j >= 0; j-- {
if runes[j] == '\\' {
// we found a backslash, so we need to check if it is escaped
count++
// if the character is not escaped, return the index
} else {
break
characterClassRegex := regexp.MustCompile(`(?:^|[^\\])\[.+?[^\\]\]`)
matches := characterClassRegex.FindAllStringIndex(line, -1)
if matches == nil {
return false, -1
}

for _, location := range matches {
start := location[0]
end := location[1]
// Skip match of non-backslash character before the first bracket.
// Don't skip if the match is at the start of the string.
if start > 0 {
start += 1
}
match := line[start:end]
for i, c := range match {
if c >= 'A' && c <= 'Z' {
// Escaped upper case letters are probably shortahands like `\W`, `\S`
if !utils.IsEscaped(match, i) {
return true, i + start
}
}
if count%2 == 0 {
return true, i
}
}
}
return false, -1
Expand Down
79 changes: 55 additions & 24 deletions cmd/regex_format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,101 +508,132 @@ this is a regex

_, err := rootCmd.ExecuteC()
s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra")))

s.Contains(out.String(), "File contains uppercase letters, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), "{1,3}[Bb]lah\\n======^ [HERE]\\n\"}\n")
}

func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercasePositionIndicator() {
func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_FirstCharacter() {
// send logs to buffer
out := &bytes.Buffer{}
log := zerolog.New(out)
logger = log.With().Str("component", "parser-test").Logger()

s.writeDataFile("123456.ra", regexAssemblyStandardHeader+`
##!+ i
First letter is uppercase
[First] letter is uppercase
`)
rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"})

_, err := rootCmd.ExecuteC()
s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra")))
s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), "[First] letter is uppercase\\n=^ [HERE]\\n\"}\n")
}

func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_LastCharacter() {
// send logs to buffer
out := &bytes.Buffer{}
log := zerolog.New(out)
logger = log.With().Str("component", "parser-test").Logger()

s.Contains(out.String(), "File contains uppercase letters, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), "First letter is uppercase\\n^ [HERE]\\n\"}\n")
s.writeDataFile("123456.ra", regexAssemblyStandardHeader+`
##!+ i
Last letter is upper[casE]
`)
rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"})

_, err := rootCmd.ExecuteC()
s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra")))
s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), "Last letter is upper[casE]\\n========================^ [HERE]\\n\"}\n")
}

func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercaseEscapedUppercase() {
func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_PlusDefinitions() {
// send logs to buffer
out := &bytes.Buffer{}
log := zerolog.New(out)
logger = log.With().Str("component", "parser-test").Logger()

s.writeDataFile("123456.ra", regexAssemblyStandardHeader+`
##!> define homer [simpson]
##!+ i
this regex has a \S letter which should not match
multiple escape sequences \S\S\S
multiple escape sequences \A\B\S should be good.
`)
rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"})

_, err := rootCmd.ExecuteC()
s.NoError(err)
s.Require().NoError(err)
}

func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercaseUnevenlyEscapedUppercase() {
func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_PlusDefinitionsWithUppercase() {
// send logs to buffer
out := &bytes.Buffer{}
log := zerolog.New(out)
logger = log.With().Str("component", "parser-test").Logger()

s.writeDataFile("123456.ra", regexAssemblyStandardHeader+`
##!> define homer No_[Bueno]
##!+ i
multiple escape sequences \A\B\S should be good.
odd number of escape sequences should be good also \\\A\\\S
even number of escape sequences should be bad \\\\A\\B
`)
rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"})

_, err := rootCmd.ExecuteC()
s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra")))
s.Contains(out.String(), "File contains uppercase letters, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), "even number of escape sequences should be bad")
s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), "##!> define homer No_[Bueno]\\n======================^ [HERE]")
}

func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_PlusDefinitions() {
func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_ComplexCharacterClass() {
// send logs to buffer
out := &bytes.Buffer{}
log := zerolog.New(out)
logger = log.With().Str("component", "parser-test").Logger()

s.writeDataFile("123456.ra", regexAssemblyStandardHeader+`
##!> define homer simpson
##!+ i
multiple escape sequences \A\B\S should be good.
I'm complex: [^S$%_+-fG-].
`)
rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"})

_, err := rootCmd.ExecuteC()
s.Require().NoError(err)
s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra")))
s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), "I'm complex: [^S$%_+-fG-].\\n===============^ [HERE]")
}

func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_PlusDefinitionsWithUppercase() {
func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_CharacterClassWithBrackets() {
// send logs to buffer
out := &bytes.Buffer{}
log := zerolog.New(out)
logger = log.With().Str("component", "parser-test").Logger()

s.writeDataFile("123456.ra", regexAssemblyStandardHeader+`
##!> define homer No_Bueno
##!+ i
multiple escape sequences \A\B\S should be good.
Chara[ct]er class with brackets: [$l\][2R [fl\]iF]
`)
rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"})

_, err := rootCmd.ExecuteC()
s.EqualError(err, fmt.Sprintf("File not properly formatted: %s", path.Join(s.dataDir, "123456.ra")))
s.Contains(out.String(), "File contains uppercase letters, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), "##!> define homer No_Bueno\\n==================^ [HERE]")
s.Contains(out.String(), "123456.ra contains uppercase letters in character classes, but ignore-case flag is set. Please check your source files.")
s.Contains(out.String(), `Chara[ct]er class with brackets: [$l\\][2R [fl\\]iF]\n========================================^ [HERE]`)
}

func (s *formatTestSuite) TestIgnoreCaseFlagWithUppercase_IgnoreShortHands() {
// send logs to buffer
out := &bytes.Buffer{}
log := zerolog.New(out)
logger = log.With().Str("component", "parser-test").Logger()

s.writeDataFile("123456.ra", regexAssemblyStandardHeader+`
##!+ i
[\W\S]
`)
rootCmd.SetArgs([]string{"-d", s.tempDir, "regex", "format", "-c", "123456", "-o", "github"})

_, err := rootCmd.ExecuteC()
s.Require().NoError(err)
}

func (s *formatTestSuite) writeDataFile(filename string, contents string) {
Expand Down
16 changes: 3 additions & 13 deletions regex/operators/assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/coreruleset/crs-toolchain/v2/regex"
"github.com/coreruleset/crs-toolchain/v2/regex/parser"
"github.com/coreruleset/crs-toolchain/v2/regex/processors"
"github.com/coreruleset/crs-toolchain/v2/utils"
)

// Create the processor stack
Expand Down Expand Up @@ -326,11 +327,11 @@ func (o *Operator) findGroupBodyEnd(input string, groupBodyStart int) (int, bool
char := input[index]
switch char {
case '(':
if !isEscaped(input, index) {
if !utils.IsEscaped(input, index) {
parensCounter++
}
case ')':
if !isEscaped(input, index) {
if !utils.IsEscaped(input, index) {
parensCounter--
}
case '|':
Expand All @@ -343,17 +344,6 @@ func (o *Operator) findGroupBodyEnd(input string, groupBodyStart int) (int, bool
return index - 2, hasAlternation
}

func isEscaped(input string, position int) bool {
escapeCounter := 0
for backtrackIndex := position - 1; backtrackIndex >= 0; backtrackIndex++ {
if input[backtrackIndex] != '\\' {
break
}
escapeCounter++
}
return escapeCounter%2 != 0
}

func (a *Operator) startPreprocessor(processorName string, args []string) error {
logger.Trace().Msgf("Found processor %s start\n", processorName)
switch processorName {
Expand Down
15 changes: 15 additions & 0 deletions utils/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright 2024 OWASP ModSecurity Core Rule Set Project
// SPDX-License-Identifier: Apache-2.0

package utils

func IsEscaped(input string, position int) bool {
theseion marked this conversation as resolved.
Show resolved Hide resolved
escapeCounter := 0
for backtrackIndex := position - 1; backtrackIndex >= 0; backtrackIndex-- {
if input[backtrackIndex] != '\\' {
break
}
escapeCounter++
}
return escapeCounter%2 != 0
}
42 changes: 42 additions & 0 deletions utils/utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2024 OWASP ModSecurity Core Rule Set Project
// SPDX-License-Identifier: Apache-2.0

package utils

import (
"testing"

"github.com/stretchr/testify/suite"
)

type utilsTestSuite struct {
suite.Suite
}

func TestRunUtilsTestSuite(t *testing.T) {
suite.Run(t, new(utilsTestSuite))
}

func (s *utilsTestSuite) TestIsEscaped() {
s.True(IsEscaped(`abc\(de`, 4))
s.True(IsEscaped(`\(abc`, 1))
s.True(IsEscaped(`abc\(`, 4))
}

func (s *utilsTestSuite) TestIsEscaped_Backslashes() {
s.True(IsEscaped(`abc\\de`, 4))
s.True(IsEscaped(`\\abc`, 1))
s.True(IsEscaped(`abc\\`, 4))
}

func (s *utilsTestSuite) TestIsEscaped_Not() {
s.False(IsEscaped(`abc\\(de`, 5))
s.False(IsEscaped(`\\(abc`, 2))
s.False(IsEscaped(`abc\\(`, 5))
}

func (s *utilsTestSuite) TestIsEscaped_Not_Backslashes() {
s.False(IsEscaped(`abc\\\de`, 5))
s.False(IsEscaped(`\\\abc`, 2))
s.False(IsEscaped(`abc\\\`, 5))
}
Loading