Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(fmt): An attempt at aesthetic items into PL #4639

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions prqlc/prqlc-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ pub use self::lexer::TokenVec;
use crate::error::Error;
use crate::lexer::lr::TokenKind;
use crate::parser::pr::Stmt;
use crate::parser::prepare_stream;

/// Build PRQL AST from a PRQL query string.
pub fn parse_source(source: &str, source_id: u16) -> Result<Vec<Stmt>, Vec<Error>> {
let mut errors = Vec::new();

let (tokens, lex_errors) = ::chumsky::Parser::parse_recovery(&lexer::lexer(), source);
// let (tokens, lex_errors) = ::chumsky::Parser::parse_recovery_verbose(&lexer::lexer(), source);

log::debug!("Lex errors: {:?}", lex_errors);
errors.extend(
Expand All @@ -23,20 +25,13 @@ pub fn parse_source(source: &str, source_id: u16) -> Result<Vec<Stmt>, Vec<Error
.map(|e| lexer::convert_lexer_error(source, e, source_id)),
);

// We don't want comments in the AST (but we do intend to use them as part of
// formatting)
let semantic_tokens: Option<_> = tokens.map(|tokens| {
tokens.into_iter().filter(|token| {
!matches!(
token.kind,
TokenKind::Comment(_) | TokenKind::LineWrap(_) | TokenKind::DocComment(_)
)
})
});
let ast = if let Some(tokens) = tokens {
let stream = prepare_stream(tokens.into_iter(), source, source_id);

let ast = if let Some(semantic_tokens) = semantic_tokens {
let stream = parser::prepare_stream(semantic_tokens, source, source_id);
// let ast = if let Some(semantic_tokens) = semantic_tokens {
// let stream = parser::prepare_stream(semantic_tokens, source, source_id);

// ::chumsky::Parser::parse_recovery_verbose(&stmt::source(), stream);
let (ast, parse_errors) =
::chumsky::Parser::parse_recovery(&parser::stmt::source(), stream);

Expand Down
40 changes: 40 additions & 0 deletions prqlc/prqlc-parser/src/parser/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use chumsky::prelude::*;
use crate::error::parse_error::PError;
use crate::lexer::lr::TokenKind;
use crate::parser::pr::{Annotation, Expr, ExprKind, Stmt, StmtKind, Ty, TyKind};
use crate::parser::WithAesthetics;
use crate::span::Span;

pub fn ident_part() -> impl Parser<TokenKind, String, Error = PError> + Clone {
Expand Down Expand Up @@ -36,6 +37,8 @@ pub fn into_stmt((annotations, kind): (Vec<Annotation>, StmtKind), span: Span) -
kind,
span: Some(span),
annotations,
aesthetics_before: Vec::new(),
aesthetics_after: Vec::new(),
}
}

Expand All @@ -52,3 +55,40 @@ pub fn into_ty(kind: TyKind, span: Span) -> Ty {
..Ty::new(kind)
}
}

pub fn aesthetic() -> impl Parser<TokenKind, TokenKind, Error = PError> + Clone {
select! {
TokenKind::Comment(comment) => TokenKind::Comment(comment),
TokenKind::LineWrap(lw) => TokenKind::LineWrap(lw),
TokenKind::DocComment(dc) => TokenKind::DocComment(dc),
}
}

pub fn with_aesthetics<P, O>(parser: P) -> impl Parser<TokenKind, O, Error = PError> + Clone
where
P: Parser<TokenKind, O, Error = PError> + Clone,
O: WithAesthetics,
{
// We can safely remove newlines following the `aesthetics_before`, to cover
// a case like `# foo` here:
//
// ```prql
// # foo
//
// from bar
// # baz
// select artists
// ```
//
// ...but not following the `aesthetics_after`; since that would eat all
// newlines between `from_bar` and `select_artists`.
//
let aesthetics_before = aesthetic().then_ignore(new_line().repeated()).repeated();
let aesthetics_after = aesthetic().separated_by(new_line());

aesthetics_before.then(parser).then(aesthetics_after).map(
|((aesthetics_before, inner), aesthetics_after)| {
inner.with_aesthetics(aesthetics_before, aesthetics_after)
},
)
}
32 changes: 18 additions & 14 deletions prqlc/prqlc-parser/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use chumsky::prelude::*;
use super::interpolation;
use crate::error::parse_error::PError;
use crate::lexer::lr::{Literal, TokenKind};
use crate::parser::common::{ctrl, ident_part, into_expr, keyword, new_line};
use crate::parser::common::{ctrl, ident_part, into_expr, keyword, new_line, with_aesthetics};
use crate::parser::pr::ident::Ident;
use crate::parser::pr::ops::{BinOp, UnOp};
use crate::parser::pr::*;
Expand All @@ -29,7 +29,9 @@ pub fn expr() -> impl Parser<TokenKind, Expr, Error = PError> + Clone {
.map(|x| x.to_string())
.map(ExprKind::Internal);

let nested_expr = pipeline(lambda_func(expr.clone()).or(func_call(expr.clone()))).boxed();
let nested_expr = with_aesthetics(
pipeline(lambda_func(expr.clone()).or(func_call(expr.clone()))).boxed(),
);

let tuple = ident_part()
.then_ignore(ctrl('='))
Expand Down Expand Up @@ -124,18 +126,20 @@ pub fn expr() -> impl Parser<TokenKind, Expr, Error = PError> + Clone {

let param = select! { TokenKind::Param(id) => ExprKind::Param(id) };

let term = choice((
literal,
internal,
tuple,
array,
interpolation,
ident_kind,
case,
param,
))
.map_with_span(into_expr)
.or(pipeline)
let term = with_aesthetics(
choice((
literal,
internal,
tuple,
array,
interpolation,
ident_kind,
case,
param,
))
.map_with_span(into_expr)
.or(pipeline),
)
.boxed();

// indirections
Expand Down
4 changes: 4 additions & 0 deletions prqlc/prqlc-parser/src/parser/interpolation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ fn parse_interpolate() {
0:8-9,
),
alias: None,
aesthetics_before: [],
aesthetics_after: [],
},
format: None,
},
Expand Down Expand Up @@ -143,6 +145,8 @@ fn parse_interpolate() {
0:14-15,
),
alias: None,
aesthetics_before: [],
aesthetics_after: [],
},
format: None,
},
Expand Down
8 changes: 8 additions & 0 deletions prqlc/prqlc-parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,11 @@ pub fn prepare_stream(
};
Stream::from_iter(eoi, tokens)
}

pub trait WithAesthetics {
fn with_aesthetics(
self,
aesthetics_before: Vec<TokenKind>,
aethetics_after: Vec<TokenKind>,
) -> Self;
}
22 changes: 22 additions & 0 deletions prqlc/prqlc-parser/src/parser/pr/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,18 @@ use crate::lexer::lr::Literal;
use crate::parser::generic;
use crate::parser::pr::ops::{BinOp, UnOp};
use crate::parser::pr::Ty;
use crate::parser::WithAesthetics;
use crate::span::Span;
use crate::TokenKind;

impl Expr {
pub fn new<K: Into<ExprKind>>(kind: K) -> Self {
Expr {
kind: kind.into(),
span: None,
alias: None,
aesthetics_before: Vec::new(),
aesthetics_after: Vec::new(),
}
}
}
Expand All @@ -33,6 +37,24 @@ pub struct Expr {

#[serde(skip_serializing_if = "Option::is_none")]
pub alias: Option<String>,

// Maybe should be Token?
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_before: Vec<TokenKind>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_after: Vec<TokenKind>,
}

impl WithAesthetics for Expr {
fn with_aesthetics(
mut self,
aesthetics_before: Vec<TokenKind>,
aesthetics_after: Vec<TokenKind>,
) -> Self {
self.aesthetics_before = aesthetics_before;
self.aesthetics_after = aesthetics_after;
self
}
}

#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, strum::AsRefStr)]
Expand Down
42 changes: 42 additions & 0 deletions prqlc/prqlc-parser/src/parser/pr/stmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ use serde::{Deserialize, Serialize};

use crate::parser::pr::ident::Ident;
use crate::parser::pr::{Expr, Ty};
use crate::parser::WithAesthetics;
use crate::span::Span;
use crate::TokenKind;

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
pub struct QueryDef {
Expand All @@ -33,6 +35,26 @@ pub struct Stmt {

#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub annotations: Vec<Annotation>,

// Maybe should be Token?
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_before: Vec<TokenKind>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_after: Vec<TokenKind>,
}

impl WithAesthetics for Stmt {
fn with_aesthetics(
self,
aesthetics_before: Vec<TokenKind>,
aesthetics_after: Vec<TokenKind>,
) -> Self {
Stmt {
aesthetics_before,
aesthetics_after,
..self
}
}
}

#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize)]
Expand Down Expand Up @@ -75,6 +97,24 @@ pub struct ImportDef {
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Annotation {
pub expr: Box<Expr>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_before: Vec<TokenKind>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_after: Vec<TokenKind>,
}

impl WithAesthetics for Annotation {
fn with_aesthetics(
self,
aesthetics_before: Vec<TokenKind>,
aesthetics_after: Vec<TokenKind>,
) -> Self {
Annotation {
aesthetics_before,
aesthetics_after,
..self
}
}
}

impl Stmt {
Expand All @@ -83,6 +123,8 @@ impl Stmt {
kind,
span: None,
annotations: Vec::new(),
aesthetics_before: Vec::new(),
aesthetics_after: Vec::new(),
}
}
}
42 changes: 33 additions & 9 deletions prqlc/prqlc-parser/src/parser/stmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ use super::common::{ctrl, ident_part, into_stmt, keyword, new_line};
use super::expr::{expr, expr_call, ident, pipeline};
use crate::error::parse_error::PError;
use crate::lexer::lr::{Literal, TokenKind};
use crate::parser::common::with_aesthetics;
use crate::parser::pr::*;
use crate::parser::types::type_expr;

pub fn source() -> impl Parser<TokenKind, Vec<Stmt>, Error = PError> {
query_def()
with_aesthetics(query_def())
.or_not()
.chain(module_contents())
.then_ignore(end())
Expand All @@ -31,15 +32,36 @@ fn module_contents() -> impl Parser<TokenKind, Vec<Stmt>, Error = PError> {
.then_ignore(new_line().repeated())
.map(|expr| Annotation {
expr: Box::new(expr),
aesthetics_before: Vec::new(),
aesthetics_after: Vec::new(),
});

annotation
.repeated()
.then(choice((module_def, type_def(), import_def(), var_def())))
.map_with_span(into_stmt)
.separated_by(new_line().repeated().at_least(1))
.allow_leading()
.allow_trailing()
// TODO: I think some duplication here; we allow for potential
// newlines before each item here, but then also have `.allow_leading`
// below — since now we can get newlines after a comment between the
// aesthetic item and the stmt... So a bit messy
let stmt_kind = new_line().repeated().ignore_then(choice((
module_def,
type_def(),
import_def(),
var_def(),
)));

// Two wrapping of `with_aesthetics` — the first for the whole block,
// and the second for just the annotation; if there's a comment between
// the annotation and the code.
with_aesthetics(
with_aesthetics(annotation)
.repeated()
// TODO: do we need this? I think possibly we get an additional
// error when we remove it; check (because it seems redundant...).
.then_ignore(new_line().repeated())
.then(stmt_kind)
.map_with_span(into_stmt),
)
.separated_by(new_line().repeated().at_least(1))
.allow_leading()
.allow_trailing()
})
}

Expand Down Expand Up @@ -113,7 +135,9 @@ fn query_def() -> impl Parser<TokenKind, Stmt, Error = PError> + Clone {
}

fn var_def() -> impl Parser<TokenKind, StmtKind, Error = PError> + Clone {
let let_ = keyword("let")
let let_ = new_line()
.repeated()
.ignore_then(keyword("let"))
.ignore_then(ident_part())
.then(type_expr().delimited_by(ctrl('<'), ctrl('>')).or_not())
.then(ctrl('=').ignore_then(expr_call()).map(Box::new).or_not())
Expand Down
Loading
Loading