Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extra checks #56

Draft
wants to merge 27 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
9ede9d3
Check DISTINCT aggregates arguments
gwenn May 9, 2024
230b36f
Check CTE column count
gwenn May 9, 2024
1da77dc
Check unknown join type
gwenn May 9, 2024
fc9748d
Check no tables specified
gwenn May 9, 2024
879b4a8
Check UPDATE FROM target
gwenn May 9, 2024
0003c25
Check AUTOINCREMENT
gwenn May 11, 2024
298f4d0
Check GENERATED column
gwenn May 11, 2024
df637dc
Check GENERATED column
gwenn May 11, 2024
50a74ac
cannot use RETURNING in a trigger
gwenn May 11, 2024
e9906d5
Check object name reserved for internal use
gwenn May 11, 2024
e05850d
Introduce ColFlags
gwenn May 20, 2024
c9b26ca
Introduce TabFlags
gwenn May 20, 2024
b1afe06
Fix is_reserved implementation
gwenn May 20, 2024
25dc5f2
Ignore quotes while checking column type
gwenn May 20, 2024
f561577
Check unsupported use of NULLS
gwenn Jun 30, 2024
2bca8e4
Check table has more than one primary key
gwenn Jul 3, 2024
ae8bc0c
Merge remote-tracking branch 'origin/master' into extra_checks
gwenn Jul 20, 2024
edfc24d
Merge remote-tracking branch 'origin/master' into extra_checks
gwenn Aug 4, 2024
c5ea195
Fix sql_cmds example
gwenn Aug 4, 2024
57807ef
Merge remote-tracking branch 'origin/master' into extra_checks
gwenn Sep 30, 2024
e19624c
Merge remote-tracking branch 'origin/master' into extra_checks
gwenn Oct 8, 2024
b5f8d16
Compute (line, column) position only on error
gwenn Oct 20, 2024
013aa7c
Oops
gwenn Oct 20, 2024
68da13b
Merge remote-tracking branch 'origin/master' into extra_checks
gwenn Nov 1, 2024
24ce015
Fix clippy warnings
gwenn Nov 1, 2024
531b72e
Check GROUP BY out of range
gwenn Nov 1, 2024
ee93c3f
Check ORDER BY out of range
gwenn Nov 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions checks.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ Parse error: must have at least one non-generated column

```sql
sqlite> CREATE TABLE t(a REFERENCES o(a,b));
Parse error: foreign key on a should reference only one column of table o
Parse error: foreign key on a should reference only one column of table o -- done
CREATE TABLE t(a REFERENCES o(a,b));
error here ---^
sqlite> CREATE TABLE t(a PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID;
Parse error: AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY
Parse error: AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY -- done
sqlite> CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID;
Parse error: AUTOINCREMENT not allowed on WITHOUT ROWID tables
```
Expand All @@ -62,15 +62,6 @@ sqlite> CREATE TABLE test (a, b, FOREIGN KEY (b) REFERENCES test(a,b));
Parse error: number of columns in foreign key does not match the number of columns in the referenced table
```

```sql
sqlite> create table test (a,b, primary key(a), primary key(b));
Parse error: table "test" has more than one primary key
sqlite> create table test (a primary key, b primary key);
Parse error: table "test" has more than one primary key
sqlite> create table test (a primary key, b, primary key(a));
Parse error: table "test" has more than one primary key
```

### `HAVING`

- [x] HAVING clause on a non-aggregate query (`GroupBy::having`): grammar already prevents this case (grammar differs from SQLite official grammar).
Expand Down Expand Up @@ -115,7 +106,7 @@ Parse error: no such column: j

```sql
sqlite> CREATE TABLE test (n, m);
sqlite> INSERT INTO test (n, n, m) VALUES (1, 0, 1); -- pgsql KO
sqlite> INSERT INTO test (n, n, m) VALUES (1, 0, 1); -- pgsql KO, done
sqlite> SELECT * FROM test;
1|1
sqlite> UPDATE test SET n = 1, n = 0; -- pgsql KO
Expand Down
4 changes: 2 additions & 2 deletions examples/sql_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ fn main() {
eprintln!(
"Check Err in {}:{}, {} in\n{}\n{:?}",
arg,
parser.line(),
parser.position(),
err,
input,
cmd
);
}
Ok(None) => {
eprintln!("Check Err in {}:{}, {:?}", arg, parser.line(), cmd);
eprintln!("Check Err in {}:{}, {:?}", arg, parser.position(), cmd);
}
Ok(Some(check)) => {
if cmd != check {
Expand Down
2 changes: 1 addition & 1 deletion examples/sql_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ fn main() {
if res.is_err() {
eprintln!("Err: {} in {}", res.unwrap_err(), arg);
}*/
debug_assert!(token.iter().all(u8::is_ascii_digit))
debug_assert!(token.iter().all(|b| b.is_ascii_digit() || *b == b'_'))
}
}
TK_FLOAT => {
Expand Down
75 changes: 42 additions & 33 deletions src/lexer/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,40 @@ use std::error::Error;
use std::fmt;
use std::io;

/// Position
#[derive(Debug)]
pub struct Pos {
/// line number
pub line: usize,
/// column number (byte offset, not char offset)
pub column: usize,
}

impl Pos {
pub fn from(input: &[u8], offset: usize) -> Self {
let (mut line, mut column) = (1, 1);
for byte in &input[..offset] {
if *byte == b'\n' {
line += 1;
column = 1;
} else {
column += 1;
}
}
Self { line, column }
}
}

impl fmt::Display for Pos {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "line: {}, column: {}", self.line, self.column)
}
}

/// Error with position
pub trait ScanError: Error + From<io::Error> + Sized {
/// Update the position where the error occurs
fn position(&mut self, line: u64, column: usize);
fn position(&mut self, p: Pos);
}

/// The `(&[u8], TokenType)` is the token.
Expand All @@ -23,7 +53,7 @@ pub trait Splitter: Sized {
type Error: ScanError;
//type Item: ?Sized;
/// Token generated
type TokenType;
type TokenType: std::fmt::Debug;

/// The arguments are an initial substring of the remaining unprocessed
/// data.
Expand All @@ -49,54 +79,42 @@ pub struct Scanner<S: Splitter> {
/// offset in `input`
offset: usize,
/// mark
mark: (usize, u64, usize),
mark: usize,
/// The function to tokenize the input.
splitter: S,
/// current line number
line: u64,
/// current column number (byte offset, not char offset)
column: usize,
}

impl<S: Splitter> Scanner<S> {
/// Constructor
pub fn new(splitter: S) -> Self {
Self {
offset: 0,
mark: (0, 0, 0),
mark: 0,
splitter,
line: 1,
column: 1,
}
}

/// Current line number
pub fn line(&self) -> u64 {
self.line
/// Current position
pub fn position(&self, input: &[u8]) -> Pos {
Pos::from(input, self.offset)
}

/// Current column number (byte offset, not char offset)
pub fn column(&self) -> usize {
self.column
}
/// Associated splitter
pub fn splitter(&self) -> &S {
&self.splitter
}
/// Mark current position
pub fn mark(&mut self) {
self.mark = (self.offset, self.line, self.column);
self.mark = self.offset;
}
/// Reset to mark
pub fn reset_to_mark(&mut self) {
(self.offset, self.line, self.column) = self.mark;
self.offset = self.mark;
}

/// Reset the scanner such that it behaves as if it had never been used.
pub fn reset(&mut self) {
self.offset = 0;
self.line = 1;
self.column = 1;
}
}

Expand All @@ -112,15 +130,15 @@ impl<S: Splitter> Scanner<S> {
&mut self,
input: &'input [u8],
) -> ScanResult<'input, S::TokenType, S::Error> {
debug!(target: "scanner", "scan(line: {}, column: {})", self.line, self.column);
debug!(target: "scanner", "scan({})", Pos::from(input, self.offset));
// Loop until we have a token.
loop {
// See if we can get a token with what we already have.
if self.offset < input.len() {
let data = &input[self.offset..];
match self.splitter.split(data) {
Err(mut e) => {
e.position(self.line, self.column);
e.position(Pos::from(input, self.offset));
return Err(e);
}
Ok((None, 0)) => {
Expand All @@ -134,6 +152,7 @@ impl<S: Splitter> Scanner<S> {
Ok((tok, amt)) => {
let start = self.offset;
self.consume(data, amt);
debug!(target: "scanner", "scan(start: {}, tok: {:?}, offset: {})", start, tok, self.offset);
return Ok((start, tok, self.offset));
}
}
Expand All @@ -148,14 +167,6 @@ impl<S: Splitter> Scanner<S> {
fn consume(&mut self, data: &[u8], amt: usize) {
debug!(target: "scanner", "consume({})", amt);
debug_assert!(amt <= data.len());
for byte in &data[..amt] {
if *byte == b'\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
}
self.offset += amt;
}
}
Expand All @@ -165,8 +176,6 @@ impl<S: Splitter> fmt::Debug for Scanner<S> {
f.debug_struct("Scanner")
.field("offset", &self.offset)
.field("mark", &self.mark)
.field("line", &self.line)
.field("column", &self.column)
.finish()
}
}
64 changes: 32 additions & 32 deletions src/lexer/sql/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::error;
use std::fmt;
use std::io;

use crate::lexer::scan::ScanError;
use crate::lexer::scan::{Pos, ScanError};
use crate::parser::ParserError;

/// SQL lexer and parser errors
Expand All @@ -12,49 +12,49 @@ pub enum Error {
/// I/O Error
Io(io::Error),
/// Lexer error
UnrecognizedToken(Option<(u64, usize)>),
UnrecognizedToken(Option<Pos>),
/// Missing quote or double-quote or backtick
UnterminatedLiteral(Option<(u64, usize)>),
UnterminatedLiteral(Option<Pos>),
/// Missing `]`
UnterminatedBracket(Option<(u64, usize)>),
UnterminatedBracket(Option<Pos>),
/// Missing `*/`
UnterminatedBlockComment(Option<(u64, usize)>),
UnterminatedBlockComment(Option<Pos>),
/// Invalid parameter name
BadVariableName(Option<(u64, usize)>),
BadVariableName(Option<Pos>),
/// Invalid number format
BadNumber(Option<(u64, usize)>),
BadNumber(Option<Pos>),
/// Invalid or missing sign after `!`
ExpectedEqualsSign(Option<(u64, usize)>),
ExpectedEqualsSign(Option<Pos>),
/// BLOB literals are string literals containing hexadecimal data and preceded by a single "x" or "X" character.
MalformedBlobLiteral(Option<(u64, usize)>),
MalformedBlobLiteral(Option<Pos>),
/// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits.
MalformedHexInteger(Option<(u64, usize)>),
MalformedHexInteger(Option<Pos>),
/// Grammar error
ParserError(ParserError, Option<(u64, usize)>),
ParserError(ParserError, Option<Pos>),
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
match self {
Self::Io(ref err) => err.fmt(f),
Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos.unwrap()),
Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {pos:?}"),
Self::UnterminatedLiteral(pos) => {
write!(f, "non-terminated literal at {:?}", pos.unwrap())
write!(f, "non-terminated literal at {pos:?}")
}
Self::UnterminatedBracket(pos) => {
write!(f, "non-terminated bracket at {:?}", pos.unwrap())
write!(f, "non-terminated bracket at {pos:?}")
}
Self::UnterminatedBlockComment(pos) => {
write!(f, "non-terminated block comment at {:?}", pos.unwrap())
write!(f, "non-terminated block comment at {pos:?}")
}
Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos.unwrap()),
Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos.unwrap()),
Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos.unwrap()),
Self::BadVariableName(pos) => write!(f, "bad variable name at {pos:?}"),
Self::BadNumber(pos) => write!(f, "bad number at {pos:?}"),
Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {pos:?}"),
Self::MalformedBlobLiteral(pos) => {
write!(f, "malformed blob literal at {:?}", pos.unwrap())
write!(f, "malformed blob literal at {pos:?}")
}
Self::MalformedHexInteger(pos) => {
write!(f, "malformed hex integer at {:?}", pos.unwrap())
write!(f, "malformed hex integer at {pos:?}")
}
Self::ParserError(ref msg, Some(pos)) => write!(f, "{msg} at {pos:?}"),
Self::ParserError(ref msg, _) => write!(f, "{msg}"),
Expand All @@ -77,19 +77,19 @@ impl From<ParserError> for Error {
}

impl ScanError for Error {
fn position(&mut self, line: u64, column: usize) {
fn position(&mut self, p: Pos) {
match *self {
Self::Io(_) => {}
Self::UnrecognizedToken(ref mut pos) => *pos = Some((line, column)),
Self::UnterminatedLiteral(ref mut pos) => *pos = Some((line, column)),
Self::UnterminatedBracket(ref mut pos) => *pos = Some((line, column)),
Self::UnterminatedBlockComment(ref mut pos) => *pos = Some((line, column)),
Self::BadVariableName(ref mut pos) => *pos = Some((line, column)),
Self::BadNumber(ref mut pos) => *pos = Some((line, column)),
Self::ExpectedEqualsSign(ref mut pos) => *pos = Some((line, column)),
Self::MalformedBlobLiteral(ref mut pos) => *pos = Some((line, column)),
Self::MalformedHexInteger(ref mut pos) => *pos = Some((line, column)),
Self::ParserError(_, ref mut pos) => *pos = Some((line, column)),
Self::UnrecognizedToken(ref mut pos) => *pos = Some(p),
Self::UnterminatedLiteral(ref mut pos) => *pos = Some(p),
Self::UnterminatedBracket(ref mut pos) => *pos = Some(p),
Self::UnterminatedBlockComment(ref mut pos) => *pos = Some(p),
Self::BadVariableName(ref mut pos) => *pos = Some(p),
Self::BadNumber(ref mut pos) => *pos = Some(p),
Self::ExpectedEqualsSign(ref mut pos) => *pos = Some(p),
Self::MalformedBlobLiteral(ref mut pos) => *pos = Some(p),
Self::MalformedHexInteger(ref mut pos) => *pos = Some(p),
Self::ParserError(_, ref mut pos) => *pos = Some(p),
}
}
}
Loading