Skip to content

Commit

Permalink
nom-sql: Add parsing for EXTRACT built-in
Browse files Browse the repository at this point in the history
This commit adds parsing for the built-in `EXTRACT` function. This
function is present in both MySQL and PostgreSQL, but the supported
fields across the two databases are different. To keep things simple and
scoped, only support for the PostgreSQL fields have been added.

Change-Id: Ic73ef858478e73b6c466695a84ddb0266d881e92
  • Loading branch information
ethowitz authored and vassili-zarouba committed Jul 12, 2024
1 parent 7bf77a8 commit b6e9453
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 7 deletions.
5 changes: 4 additions & 1 deletion nom-sql/src/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ impl<'a> ReferredColumnsIter<'a> {
Avg { expr, .. } => self.visit_expr(expr),
Count { expr, .. } => self.visit_expr(expr),
CountStar => None,
Extract { expr, .. } => self.visit_expr(expr),
Sum { expr, .. } => self.visit_expr(expr),
Max(arg) => self.visit_expr(arg),
Min(arg) => self.visit_expr(arg),
Expand Down Expand Up @@ -206,6 +207,7 @@ impl<'a> ReferredColumnsMut<'a> {
Avg { expr, .. } => self.visit_expr(expr),
Count { expr, .. } => self.visit_expr(expr),
CountStar => None,
Extract { expr, .. } => self.visit_expr(expr),
Sum { expr, .. } => self.visit_expr(expr),
Max(arg) => self.visit_expr(arg),
Min(arg) => self.visit_expr(arg),
Expand Down Expand Up @@ -345,7 +347,8 @@ pub fn is_aggregate(function: &FunctionExpr) -> bool {
| FunctionExpr::Max(_)
| FunctionExpr::Min(_)
| FunctionExpr::GroupConcat { .. } => true,
FunctionExpr::Substring { .. }
FunctionExpr::Extract { .. }
| FunctionExpr::Substring { .. }
// For now, assume all "generic" function calls are not aggregates
| FunctionExpr::Call { .. } => false,
}
Expand Down
1 change: 1 addition & 0 deletions nom-sql/src/analysis/visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ pub fn walk_function_expr<'ast, V: Visitor<'ast>>(
FunctionExpr::Max(expr) => visitor.visit_expr(expr.as_ref()),
FunctionExpr::Min(expr) => visitor.visit_expr(expr.as_ref()),
FunctionExpr::GroupConcat { expr, .. } => visitor.visit_expr(expr.as_ref()),
FunctionExpr::Extract { expr, .. } => visitor.visit_expr(expr.as_ref()),
FunctionExpr::Call { arguments, .. } => {
for arg in arguments {
visitor.visit_expr(arg)?;
Expand Down
1 change: 1 addition & 0 deletions nom-sql/src/analysis/visit_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ pub fn walk_function_expr<'ast, V: VisitorMut<'ast>>(
FunctionExpr::Avg { expr, .. } => visitor.visit_expr(expr.as_mut()),
FunctionExpr::Count { expr, .. } => visitor.visit_expr(expr.as_mut()),
FunctionExpr::CountStar => Ok(()),
FunctionExpr::Extract { expr, .. } => visitor.visit_expr(expr.as_mut()),
FunctionExpr::Sum { expr, .. } => visitor.visit_expr(expr.as_mut()),
FunctionExpr::Max(expr) => visitor.visit_expr(expr.as_mut()),
FunctionExpr::Min(expr) => visitor.visit_expr(expr.as_mut()),
Expand Down
183 changes: 182 additions & 1 deletion nom-sql/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::str::FromStr;
use itertools::Itertools;
use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case};
use nom::character::complete::{digit1, line_ending};
use nom::character::complete::{char, digit1, line_ending};
use nom::combinator::{map, map_res, not, opt, peek};
use nom::error::{ErrorKind, ParseError};
use nom::multi::{separated_list0, separated_list1};
Expand Down Expand Up @@ -499,6 +499,127 @@ fn function_call_without_parens(i: LocatedSpan<&[u8]>) -> NomSqlResult<&[u8], Fu
))
}

#[derive(
Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize, Arbitrary,
)]
pub enum TimestampField {
Century,
Day,
Decade,
Dow,
Doy,
Epoch,
Hour,
Isodow,
Isoyear,
Julian,
Microseconds,
Millennium,
Milliseconds,
Minute,
Month,
Quarter,
Second,
Timezone,
TimezoneHour,
TimezoneMinute,
Week,
Year,
}

impl fmt::Display for TimestampField {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Century => write!(f, "CENTURY"),
Self::Day => write!(f, "DAY"),
Self::Decade => write!(f, "DECADE"),
Self::Dow => write!(f, "DOW"),
Self::Doy => write!(f, "DOY"),
Self::Epoch => write!(f, "EPOCH"),
Self::Hour => write!(f, "HOUR"),
Self::Isodow => write!(f, "ISODOW"),
Self::Isoyear => write!(f, "ISOYEAR"),
Self::Julian => write!(f, "JULIAN"),
Self::Microseconds => write!(f, "MICROSECONDS"),
Self::Millennium => write!(f, "MILLENNIUM"),
Self::Milliseconds => write!(f, "MILLISECONDS"),
Self::Minute => write!(f, "MINUTE"),
Self::Month => write!(f, "MONTH"),
Self::Quarter => write!(f, "QUARTER"),
Self::Second => write!(f, "SECOND"),
Self::Timezone => write!(f, "TIMEZONE"),
Self::TimezoneHour => write!(f, "TIMEZONE_HOUR"),
Self::TimezoneMinute => write!(f, "TIMEZONE_MINUTE"),
Self::Week => write!(f, "WEEK"),
Self::Year => write!(f, "YEAR"),
}
}
}

fn timestamp_field() -> impl Fn(LocatedSpan<&[u8]>) -> NomSqlResult<&[u8], TimestampField> {
move |i| {
let alt1 = alt((
map(tag_no_case("century"), |_| TimestampField::Century),
map(tag_no_case("day"), |_| TimestampField::Day),
map(tag_no_case("decade"), |_| TimestampField::Decade),
map(tag_no_case("dow"), |_| TimestampField::Dow),
map(tag_no_case("doy"), |_| TimestampField::Doy),
map(tag_no_case("epoch"), |_| TimestampField::Epoch),
map(tag_no_case("hour"), |_| TimestampField::Hour),
map(tag_no_case("isodow"), |_| TimestampField::Isodow),
map(tag_no_case("isoyear"), |_| TimestampField::Isoyear),
map(tag_no_case("julian"), |_| TimestampField::Julian),
map(tag_no_case("microseconds"), |_| {
TimestampField::Microseconds
}),
map(tag_no_case("millennium"), |_| TimestampField::Millennium),
map(tag_no_case("milliseconds"), |_| {
TimestampField::Milliseconds
}),
map(tag_no_case("minute"), |_| TimestampField::Minute),
map(tag_no_case("month"), |_| TimestampField::Month),
map(tag_no_case("quarter"), |_| TimestampField::Quarter),
map(tag_no_case("second"), |_| TimestampField::Second),
map(tag_no_case("timezone_hour"), |_| {
TimestampField::TimezoneHour
}),
map(tag_no_case("timezone_minute"), |_| {
TimestampField::TimezoneMinute
}),
map(tag_no_case("timezone"), |_| TimestampField::Timezone),
map(tag_no_case("week"), |_| TimestampField::Week),
));

// `alt` has an upper limit on the number of items it supports in tuples, so we have to
// split the parsing for these fields into separate invocations
alt((alt1, map(tag_no_case("year"), |_| TimestampField::Year)))(i)
}
}

fn extract(dialect: Dialect) -> impl Fn(LocatedSpan<&[u8]>) -> NomSqlResult<&[u8], FunctionExpr> {
move |i| {
let (i, _) = tag_no_case("EXTRACT")(i)?;
let (i, _) = whitespace0(i)?;
let (i, _) = char('(')(i)?;
let (i, _) = whitespace0(i)?;
let (i, field) = timestamp_field()(i)?;
let (i, _) = whitespace1(i)?;
let (i, _) = tag_no_case("FROM")(i)?;
let (i, _) = whitespace1(i)?;
let (i, expr) = expression(dialect)(i)?;
let (i, _) = whitespace0(i)?;
let (i, _) = char(')')(i)?;

Ok((
i,
FunctionExpr::Extract {
field,
expr: Box::new(expr),
},
))
}
}

fn substring(dialect: Dialect) -> impl Fn(LocatedSpan<&[u8]>) -> NomSqlResult<&[u8], FunctionExpr> {
move |i| {
let (i, _) = alt((tag_no_case("substring"), tag_no_case("substr")))(i)?;
Expand Down Expand Up @@ -603,6 +724,7 @@ pub fn function_expr(
separator,
},
),
extract(dialect),
substring(dialect),
function_call(dialect),
function_call_without_parens,
Expand Down Expand Up @@ -1316,4 +1438,63 @@ mod tests {
assert_eq!(res2, expected);
}
}

mod extract {
use super::*;

macro_rules! extract_test {
($field:ident, $field_variant:ident, $field_expr:expr) => {
mod $field {
use super::*;

#[test]
fn parse_extract_expr() {
let expr = format!("EXTRACT({} FROM \"col\")", $field_expr);
assert_eq!(
test_parse!(extract(Dialect::PostgreSQL), expr.as_bytes()),
FunctionExpr::Extract {
field: TimestampField::$field_variant,
expr: Box::new(Expr::Column(Column {
name: "col".into(),
table: None,
})),
},
);
}

#[test]
fn format_round_trip() {
let expected = format!("EXTRACT({} FROM \"col\")", $field_expr);
let actual = test_parse!(extract(Dialect::PostgreSQL), expected.as_bytes())
.display(Dialect::PostgreSQL)
.to_string();

assert_eq!(expected, actual);
}
}
};
}

extract_test!(century, Century, "CENTURY");
extract_test!(decade, Decade, "DECADE");
extract_test!(dow, Dow, "DOW");
extract_test!(doy, Doy, "DOY");
extract_test!(epoch, Epoch, "EPOCH");
extract_test!(hour, Hour, "HOUR");
extract_test!(isodow, Isodow, "ISODOW");
extract_test!(isoyear, Isoyear, "ISOYEAR");
extract_test!(julian, Julian, "JULIAN");
extract_test!(microseconds, Microseconds, "MICROSECONDS");
extract_test!(millennium, Millennium, "MILLENNIUM");
extract_test!(milliseconds, Milliseconds, "MILLISECONDS");
extract_test!(minute, Minute, "MINUTE");
extract_test!(month, Month, "MONTH");
extract_test!(quarter, Quarter, "QUARTER");
extract_test!(second, Second, "SECOND");
extract_test!(timezone_hour, TimezoneHour, "TIMEZONE_HOUR");
extract_test!(timezone_minute, TimezoneMinute, "TIMEZONE_MINUTE");
extract_test!(timezone, Timezone, "TIMEZONE");
extract_test!(week, Week, "WEEK");
extract_test!(year, Year, "YEAR");
}
}
15 changes: 13 additions & 2 deletions nom-sql/src/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use readyset_util::fmt::fmt_with;
use serde::{Deserialize, Serialize};
use test_strategy::Arbitrary;

use crate::common::{column_identifier_no_alias, function_expr, ws_sep_comma};
use crate::common::{column_identifier_no_alias, function_expr, ws_sep_comma, TimestampField};
use crate::literal::{literal, Double, Float};
use crate::select::nested_selection;
use crate::set::{variable_scope_prefix, Variable};
Expand All @@ -41,6 +41,11 @@ pub enum FunctionExpr {
/// `COUNT(*)` aggregation
CountStar,

Extract {
field: TimestampField,
expr: Box<Expr>,
},

/// `SUM` aggregation
Sum { expr: Box<Expr>, distinct: bool },

Expand Down Expand Up @@ -89,7 +94,8 @@ impl FunctionExpr {
| FunctionExpr::Sum { expr: arg, .. }
| FunctionExpr::Max(arg)
| FunctionExpr::Min(arg)
| FunctionExpr::GroupConcat { expr: arg, .. } => {
| FunctionExpr::GroupConcat { expr: arg, .. }
| FunctionExpr::Extract { expr: arg, .. } => {
concrete_iter!(iter::once(arg.as_ref()))
}
FunctionExpr::CountStar => concrete_iter!(iter::empty()),
Expand Down Expand Up @@ -156,6 +162,9 @@ impl DialectDisplay for FunctionExpr {

write!(f, ")")
}
FunctionExpr::Extract { field, expr } => {
write!(f, "EXTRACT({field} FROM {})", expr.display(dialect))
}
})
}
}
Expand Down Expand Up @@ -690,6 +699,8 @@ impl Arbitrary for Expr {
Just(FunctionExpr::CountStar),
(box_expr.clone(), any::<bool>())
.prop_map(|(expr, distinct)| FunctionExpr::Sum { expr, distinct }),
(box_expr.clone(), any::<TimestampField>())
.prop_map(|(expr, field)| FunctionExpr::Extract { expr, field }),
box_expr.clone().prop_map(FunctionExpr::Max),
box_expr.clone().prop_map(FunctionExpr::Min),
(box_expr.clone(), any::<Option<String>>()).prop_map(|(expr, separator)| {
Expand Down
2 changes: 1 addition & 1 deletion nom-sql/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub use self::alter::{
};
pub use self::column::{Column, ColumnConstraint, ColumnSpecification};
pub use self::comment::CommentStatement;
pub use self::common::{FieldDefinitionExpr, FieldReference, IndexType, TableKey};
pub use self::common::{FieldDefinitionExpr, FieldReference, IndexType, TableKey, TimestampField};
pub use self::compound_select::{CompoundSelectOperator, CompoundSelectStatement};
pub use self::create::{
CacheInner, CreateCacheStatement, CreateTableBody, CreateTableStatement, CreateViewStatement,
Expand Down
2 changes: 1 addition & 1 deletion readyset-server/src/controller/sql/mir/grouped.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ pub(super) fn post_lookup_aggregates(
GroupConcat { separator, .. } => PostLookupAggregateFunction::GroupConcat {
separator: separator.clone().unwrap_or_else(|| ",".to_owned()),
},
Call { .. } | Substring { .. } => continue,
Extract { .. } | Call { .. } | Substring { .. } => continue,
},
});
}
Expand Down
4 changes: 3 additions & 1 deletion readyset-server/src/controller/sql/query_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,9 @@ fn default_row_for_select(st: &SelectStatement) -> Option<Vec<DfValue>> {
FunctionExpr::Max(..) => DfValue::None,
FunctionExpr::Min(..) => DfValue::None,
FunctionExpr::GroupConcat { .. } => DfValue::None,
FunctionExpr::Call { .. } | FunctionExpr::Substring { .. } => DfValue::None,
FunctionExpr::Extract { .. }
| FunctionExpr::Call { .. }
| FunctionExpr::Substring { .. } => DfValue::None,
},
_ => DfValue::None,
})
Expand Down

0 comments on commit b6e9453

Please sign in to comment.