Skip to content

Commit

Permalink
Fix identifier matching, improve lexer performance by ~25%
Browse files Browse the repository at this point in the history
  • Loading branch information
JanJakes committed Oct 2, 2024
1 parent 01241b8 commit 3d9671b
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 651 deletions.
19 changes: 13 additions & 6 deletions custom-parser/parser/MySQLLexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ class MySQLLexer {
const SQL_MODE_IGNORE_SPACE = 1 << 3;
const SQL_MODE_NO_BACKSLASH_ESCAPES = 1 << 4;

/**
* MySQL unquoted identifiers: https://dev.mysql.com/doc/refman/8.4/en/identifiers.html
* 1. Allowed characters are ASCII a-z, A-Z, 0-9, $, _ and Unicode \x{0080}-\x{ffff}.
* 2. Unquoted identifiers may begin with a digit but may not consist solely of digits.
*/
const PATTERN_UNQUOTED_IDENTIFIER = '(?=\D)[\w_$\x{80}-\x{ffff}]+';

// Constants for token types.
// Operators
public const EQUAL_OPERATOR = 1;
Expand Down Expand Up @@ -1129,7 +1136,11 @@ private function nextToken()
$this->NUMBER();
} elseif (($la === 'x' || $la === 'X' || $la === 'b' || $la === 'B') && $this->LA(2) === "'") {
$this->NUMBER();
} elseif (safe_ctype_alpha($la)) {
} elseif (preg_match('/\G' . self::PATTERN_UNQUOTED_IDENTIFIER . '/u', $this->input, $matches, 0, $this->position)) {
$this->text = $matches[0];
$this->position += strlen($this->text);
$this->c = $this->input[$this->position] ?? null;
$this->n = $this->input[$this->position + 1] ?? null;
$this->IDENTIFIER_OR_KEYWORD();
} elseif ($la === null) {
$this->matchEOF();
Expand Down Expand Up @@ -3090,11 +3101,7 @@ protected function emitDot(): void

protected function IDENTIFIER_OR_KEYWORD()
{
// Match the longest possible keyword.
while (safe_ctype_alnum($this->LA(1)) || $this->LA(1) === '_' || $this->LA(1) === '$') {
$this->consume();
}
$text = strtoupper($this->getText());
$text = strtoupper($this->getText());

// Lookup the string in the token table.
$this->type = self::TOKENS[$text] ?? self::IDENTIFIER;
Expand Down
Loading

0 comments on commit 3d9671b

Please sign in to comment.