gwenn · gwenn · May 9, 2024 · May 9, 2024 · May 9, 2024 · May 9, 2024
diff --git a/checks.md b/checks.md
@@ -46,11 +46,11 @@ Parse error: must have at least one non-generated column
 
 ```sql
 sqlite> CREATE TABLE t(a REFERENCES o(a,b));
-Parse error: foreign key on a should reference only one column of table o
+Parse error: foreign key on a should reference only one column of table o -- done
   CREATE TABLE t(a REFERENCES o(a,b));
                 error here ---^
 sqlite> CREATE TABLE t(a PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID;
-Parse error: AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY
+Parse error: AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY -- done
 sqlite> CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID;
 Parse error: AUTOINCREMENT not allowed on WITHOUT ROWID tables
 ```
@@ -62,15 +62,6 @@ sqlite> CREATE TABLE test (a, b, FOREIGN KEY (b) REFERENCES test(a,b));
 Parse error: number of columns in foreign key does not match the number of columns in the referenced table
 ```
 
-```sql
-sqlite> create table test (a,b, primary key(a), primary key(b));
-Parse error: table "test" has more than one primary key
-sqlite> create table test (a primary key, b primary key);
-Parse error: table "test" has more than one primary key
-sqlite> create table test (a primary key, b, primary key(a));
-Parse error: table "test" has more than one primary key
-```
-
 ### `HAVING`
 
 - [x] HAVING clause on a non-aggregate query (`GroupBy::having`): grammar already prevents this case (grammar differs from SQLite official grammar).
@@ -115,7 +106,7 @@ Parse error: no such column: j
 
 ```sql
 sqlite> CREATE TABLE test (n, m);
-sqlite> INSERT INTO test (n, n, m) VALUES (1, 0, 1); -- pgsql KO
+sqlite> INSERT INTO test (n, n, m) VALUES (1, 0, 1); -- pgsql KO, done
 sqlite> SELECT * FROM test;
 1|1
 sqlite> UPDATE test SET n = 1, n = 0; -- pgsql KO

diff --git a/examples/sql_check.rs b/examples/sql_check.rs
@@ -29,14 +29,14 @@ fn main() {
                                 eprintln!(
                                     "Check Err in {}:{}, {} in\n{}\n{:?}",
                                     arg,
-                                    parser.line(),
+                                    parser.position(),
                                     err,
                                     input,
                                     cmd
                                 );
                             }
                             Ok(None) => {
-                                eprintln!("Check Err in {}:{}, {:?}", arg, parser.line(), cmd);
+                                eprintln!("Check Err in {}:{}, {:?}", arg, parser.position(), cmd);
                             }
                             Ok(Some(check)) => {
                                 if cmd != check {

diff --git a/examples/sql_tokens.rs b/examples/sql_tokens.rs
@@ -50,7 +50,7 @@ fn main() {
                             if res.is_err() {
                                 eprintln!("Err: {} in {}", res.unwrap_err(), arg);
                             }*/
-                            debug_assert!(token.iter().all(u8::is_ascii_digit))
+                            debug_assert!(token.iter().all(|b| b.is_ascii_digit() || *b == b'_'))
                         }
                     }
                     TK_FLOAT => {

diff --git a/src/lexer/scan.rs b/src/lexer/scan.rs
@@ -6,10 +6,40 @@ use std::error::Error;
 use std::fmt;
 use std::io;
 
+/// Position
+#[derive(Debug)]
+pub struct Pos {
+    /// line number
+    pub line: usize,
+    /// column number (byte offset, not char offset)
+    pub column: usize,
+}
+
+impl Pos {
+    pub fn from(input: &[u8], offset: usize) -> Self {
+        let (mut line, mut column) = (1, 1);
+        for byte in &input[..offset] {
+            if *byte == b'\n' {
+                line += 1;
+                column = 1;
+            } else {
+                column += 1;
+            }
+        }
+        Self { line, column }
+    }
+}
+
+impl fmt::Display for Pos {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "line: {}, column: {}", self.line, self.column)
+    }
+}
+
 /// Error with position
 pub trait ScanError: Error + From<io::Error> + Sized {
     /// Update the position where the error occurs
-    fn position(&mut self, line: u64, column: usize);
+    fn position(&mut self, p: Pos);
 }
 
 /// The `(&[u8], TokenType)` is the token.
@@ -23,7 +53,7 @@ pub trait Splitter: Sized {
     type Error: ScanError;
     //type Item: ?Sized;
     /// Token generated
-    type TokenType;
+    type TokenType: std::fmt::Debug;
 
     /// The arguments are an initial substring of the remaining unprocessed
     /// data.
@@ -49,54 +79,42 @@ pub struct Scanner<S: Splitter> {
     /// offset in `input`
     offset: usize,
     /// mark
-    mark: (usize, u64, usize),
+    mark: usize,
     /// The function to tokenize the input.
     splitter: S,
-    /// current line number
-    line: u64,
-    /// current column number (byte offset, not char offset)
-    column: usize,
 }
 
 impl<S: Splitter> Scanner<S> {
     /// Constructor
     pub fn new(splitter: S) -> Self {
         Self {
             offset: 0,
-            mark: (0, 0, 0),
+            mark: 0,
             splitter,
-            line: 1,
-            column: 1,
         }
     }
 
-    /// Current line number
-    pub fn line(&self) -> u64 {
-        self.line
+    /// Current position
+    pub fn position(&self, input: &[u8]) -> Pos {
+        Pos::from(input, self.offset)
     }
 
-    /// Current column number (byte offset, not char offset)
-    pub fn column(&self) -> usize {
-        self.column
-    }
     /// Associated splitter
     pub fn splitter(&self) -> &S {
         &self.splitter
     }
     /// Mark current position
     pub fn mark(&mut self) {
-        self.mark = (self.offset, self.line, self.column);
+        self.mark = self.offset;
     }
     /// Reset to mark
     pub fn reset_to_mark(&mut self) {
-        (self.offset, self.line, self.column) = self.mark;
+        self.offset = self.mark;
     }
 
     /// Reset the scanner such that it behaves as if it had never been used.
     pub fn reset(&mut self) {
         self.offset = 0;
-        self.line = 1;
-        self.column = 1;
     }
 }
 
@@ -112,15 +130,15 @@ impl<S: Splitter> Scanner<S> {
         &mut self,
         input: &'input [u8],
     ) -> ScanResult<'input, S::TokenType, S::Error> {
-        debug!(target: "scanner", "scan(line: {}, column: {})", self.line, self.column);
+        debug!(target: "scanner", "scan({})", Pos::from(input, self.offset));
         // Loop until we have a token.
         loop {
             // See if we can get a token with what we already have.
             if self.offset < input.len() {
                 let data = &input[self.offset..];
                 match self.splitter.split(data) {
                     Err(mut e) => {
-                        e.position(self.line, self.column);
+                        e.position(Pos::from(input, self.offset));
                         return Err(e);
                     }
                     Ok((None, 0)) => {
@@ -134,6 +152,7 @@ impl<S: Splitter> Scanner<S> {
                     Ok((tok, amt)) => {
                         let start = self.offset;
                         self.consume(data, amt);
+                        debug!(target: "scanner", "scan(start: {}, tok: {:?}, offset: {})", start, tok, self.offset);
                         return Ok((start, tok, self.offset));
                     }
                 }
@@ -148,14 +167,6 @@ impl<S: Splitter> Scanner<S> {
     fn consume(&mut self, data: &[u8], amt: usize) {
         debug!(target: "scanner", "consume({})", amt);
         debug_assert!(amt <= data.len());
-        for byte in &data[..amt] {
-            if *byte == b'\n' {
-                self.line += 1;
-                self.column = 1;
-            } else {
-                self.column += 1;
-            }
-        }
         self.offset += amt;
     }
 }
@@ -165,8 +176,6 @@ impl<S: Splitter> fmt::Debug for Scanner<S> {
         f.debug_struct("Scanner")
             .field("offset", &self.offset)
             .field("mark", &self.mark)
-            .field("line", &self.line)
-            .field("column", &self.column)
             .finish()
     }
 }
diff --git a/src/lexer/sql/error.rs b/src/lexer/sql/error.rs
@@ -2,7 +2,7 @@ use std::error;
 use std::fmt;
 use std::io;
 
-use crate::lexer::scan::ScanError;
+use crate::lexer::scan::{Pos, ScanError};
 use crate::parser::ParserError;
 
 /// SQL lexer and parser errors
@@ -12,49 +12,49 @@ pub enum Error {
     /// I/O Error
     Io(io::Error),
     /// Lexer error
-    UnrecognizedToken(Option<(u64, usize)>),
+    UnrecognizedToken(Option<Pos>),
     /// Missing quote or double-quote or backtick
-    UnterminatedLiteral(Option<(u64, usize)>),
+    UnterminatedLiteral(Option<Pos>),
     /// Missing `]`
-    UnterminatedBracket(Option<(u64, usize)>),
+    UnterminatedBracket(Option<Pos>),
     /// Missing `*/`
-    UnterminatedBlockComment(Option<(u64, usize)>),
+    UnterminatedBlockComment(Option<Pos>),
     /// Invalid parameter name
-    BadVariableName(Option<(u64, usize)>),
+    BadVariableName(Option<Pos>),
     /// Invalid number format
-    BadNumber(Option<(u64, usize)>),
+    BadNumber(Option<Pos>),
     /// Invalid or missing sign after `!`
-    ExpectedEqualsSign(Option<(u64, usize)>),
+    ExpectedEqualsSign(Option<Pos>),
     /// BLOB literals are string literals containing hexadecimal data and preceded by a single "x" or "X" character.
-    MalformedBlobLiteral(Option<(u64, usize)>),
+    MalformedBlobLiteral(Option<Pos>),
     /// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
-    MalformedHexInteger(Option<(u64, usize)>),
+    MalformedHexInteger(Option<Pos>),
     /// Grammar error
-    ParserError(ParserError, Option<(u64, usize)>),
+    ParserError(ParserError, Option<Pos>),
 }
 
 impl fmt::Display for Error {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match *self {
+        match self {
             Self::Io(ref err) => err.fmt(f),
-            Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos.unwrap()),
+            Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {pos:?}"),
             Self::UnterminatedLiteral(pos) => {
-                write!(f, "non-terminated literal at {:?}", pos.unwrap())
+                write!(f, "non-terminated literal at {pos:?}")
             }
             Self::UnterminatedBracket(pos) => {
-                write!(f, "non-terminated bracket at {:?}", pos.unwrap())
+                write!(f, "non-terminated bracket at {pos:?}")
             }
             Self::UnterminatedBlockComment(pos) => {
-                write!(f, "non-terminated block comment at {:?}", pos.unwrap())
+                write!(f, "non-terminated block comment at {pos:?}")
             }
-            Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos.unwrap()),
-            Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos.unwrap()),
-            Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos.unwrap()),
+            Self::BadVariableName(pos) => write!(f, "bad variable name at {pos:?}"),
+            Self::BadNumber(pos) => write!(f, "bad number at {pos:?}"),
+            Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {pos:?}"),
             Self::MalformedBlobLiteral(pos) => {
-                write!(f, "malformed blob literal at {:?}", pos.unwrap())
+                write!(f, "malformed blob literal at {pos:?}")
             }
             Self::MalformedHexInteger(pos) => {
-                write!(f, "malformed hex integer at {:?}", pos.unwrap())
+                write!(f, "malformed hex integer at {pos:?}")
             }
             Self::ParserError(ref msg, Some(pos)) => write!(f, "{msg} at {pos:?}"),
             Self::ParserError(ref msg, _) => write!(f, "{msg}"),
@@ -77,19 +77,19 @@ impl From<ParserError> for Error {
 }
 
 impl ScanError for Error {
-    fn position(&mut self, line: u64, column: usize) {
+    fn position(&mut self, p: Pos) {
         match *self {
             Self::Io(_) => {}
-            Self::UnrecognizedToken(ref mut pos) => *pos = Some((line, column)),
-            Self::UnterminatedLiteral(ref mut pos) => *pos = Some((line, column)),
-            Self::UnterminatedBracket(ref mut pos) => *pos = Some((line, column)),
-            Self::UnterminatedBlockComment(ref mut pos) => *pos = Some((line, column)),
-            Self::BadVariableName(ref mut pos) => *pos = Some((line, column)),
-            Self::BadNumber(ref mut pos) => *pos = Some((line, column)),
-            Self::ExpectedEqualsSign(ref mut pos) => *pos = Some((line, column)),
-            Self::MalformedBlobLiteral(ref mut pos) => *pos = Some((line, column)),
-            Self::MalformedHexInteger(ref mut pos) => *pos = Some((line, column)),
-            Self::ParserError(_, ref mut pos) => *pos = Some((line, column)),
+            Self::UnrecognizedToken(ref mut pos) => *pos = Some(p),
+            Self::UnterminatedLiteral(ref mut pos) => *pos = Some(p),
+            Self::UnterminatedBracket(ref mut pos) => *pos = Some(p),
+            Self::UnterminatedBlockComment(ref mut pos) => *pos = Some(p),
+            Self::BadVariableName(ref mut pos) => *pos = Some(p),
+            Self::BadNumber(ref mut pos) => *pos = Some(p),
+            Self::ExpectedEqualsSign(ref mut pos) => *pos = Some(p),
+            Self::MalformedBlobLiteral(ref mut pos) => *pos = Some(p),
+            Self::MalformedHexInteger(ref mut pos) => *pos = Some(p),
+            Self::ParserError(_, ref mut pos) => *pos = Some(p),
         }
     }
 }