Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add comments to format output #4397

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
9 changes: 9 additions & 0 deletions prqlc/prqlc-ast/src/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ impl From<Span> for Range<usize> {
a.start..a.end
}
}
impl From<Range<usize>> for Span {
fn from(range: Range<usize>) -> Self {
Span {
start: range.start,
end: range.end,
source_id: 0, // Default value as Range<usize> does not provide a source_id
}
}
}

impl Debug for Span {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
Expand Down
4 changes: 2 additions & 2 deletions prqlc/prqlc-parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use prqlc_ast::expr::*;
use serde::{Deserialize, Serialize};

#[derive(Clone, PartialEq, Serialize, Deserialize)]
#[derive(Clone, PartialEq, Serialize, Deserialize, Eq)]

Check warning on line 10 in prqlc/prqlc-parser/src/lexer.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/prqlc-parser/src/lexer.rs#L10

Added line #L10 was not covered by tests
pub struct Token {
pub kind: TokenKind,
pub span: std::ops::Range<usize>,
Expand Down Expand Up @@ -575,7 +575,7 @@
}
}

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]

Check warning on line 578 in prqlc/prqlc-parser/src/lexer.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/prqlc-parser/src/lexer.rs#L578

Added line #L578 was not covered by tests
pub struct TokenVec(pub Vec<Token>);

// impl std::fmt::Debug for TokenVec {
Expand Down
3 changes: 1 addition & 2 deletions prqlc/prqlc-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ use prqlc_ast::error::{Error, Reason, WithErrorInfo};
use prqlc_ast::stmt::*;
use prqlc_ast::Span;

use lexer::Token;
pub use lexer::{TokenKind, TokenVec};
pub use lexer::{Token, TokenKind, TokenVec};
use span::ParserSpan;

/// Build PRQL AST from a PRQL query string.
Expand Down
169 changes: 162 additions & 7 deletions prqlc/prqlc/src/codegen/ast.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use std::collections::HashSet;

use once_cell::sync::Lazy;

use crate::ast::*;
use prqlc_parser::{Token, TokenKind, TokenVec};
use regex::Regex;

use crate::ast::*;
use crate::codegen::SeparatedExprs;

use super::{WriteOpt, WriteSource};
Expand All @@ -17,13 +17,28 @@
let parent_strength = binding_strength(parent);
opt.context_strength = opt.context_strength.max(parent_strength);

node.write(opt)
// FIXME: this is extremely hacky. Our issue is that in:
//
// from a.b # comment
//
// ...we're writing both `from a.b` and `a.b`, so we need to know which of
// these to write comments for. I'm sure there are better ways to do it.
let enable_comments = opt.enable_comments;
opt.enable_comments = false;
let out = node.write(opt.clone());
opt.enable_comments = enable_comments;
out
}

impl WriteSource for Expr {
fn write(&self, mut opt: WriteOpt) -> Option<String> {
let mut r = String::new();

// if let Some(span) = self.span {
// if let Some(comment) = find_comment_before(span, &opt.tokens) {
// r += &comment.to_string();
// }
// }
if let Some(alias) = &self.alias {
r += opt.consume(alias)?;
r += opt.consume(" = ")?;
Expand All @@ -41,9 +56,44 @@
if let Some(value) = value {
r += &value;
} else {
r += &break_line_within_parenthesis(&self.kind, opt)?;
r += &break_line_within_parenthesis(&self.kind, &mut opt)?;
}
};

if opt.enable_comments {
if let Some(span) = self.span {
// TODO: change underlying function so we can remove this
if opt.tokens.0.is_empty() {
return Some(r);
}

let comments = find_comments_after(span, &opt.tokens);

// If the first item is a comment, it's an inline comment, and
// so add two spaces
if matches!(
comments.first(),
Some(Token {
kind: TokenKind::Comment(_),
..
})
) {
r += " ";
}

for c in comments {
match c.kind {
// TODO: these are defined here since the debug
// representations aren't quite right (NewLine is `new
// line` as is used in error messages). But we should
// probably move them onto the Struct.
TokenKind::Comment(s) => r += format!("#{}", s).as_str(),
TokenKind::NewLine => r += "\n",
_ => unreachable!(),

Check warning on line 92 in prqlc/prqlc/src/codegen/ast.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/prqlc/src/codegen/ast.rs#L92

Added line #L92 was not covered by tests
}
}
}
}
Some(r)
}
}
Expand Down Expand Up @@ -197,7 +247,7 @@
if let Some(body) = c.body.write(opt.clone()) {
r += &body;
} else {
r += &break_line_within_parenthesis(c.body.as_ref(), opt)?;
r += &break_line_within_parenthesis(c.body.as_ref(), &mut opt)?;
}

Some(r)
Expand All @@ -222,7 +272,7 @@
}
}

fn break_line_within_parenthesis<T: WriteSource>(expr: &T, mut opt: WriteOpt) -> Option<String> {
fn break_line_within_parenthesis<T: WriteSource>(expr: &T, opt: &mut WriteOpt) -> Option<String> {
let mut r = "(\n".to_string();
opt.indent += 1;
r += &opt.write_indent();
Expand Down Expand Up @@ -321,6 +371,52 @@
}
}

// impl WriteSource for ModuleDef {
// fn write(&self, mut opt: WriteOpt) -> Option<String> {
// codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()
// }}

// /// Find a comment before a span. If there's exactly one newline prior, then the
// /// comment is included here. Any further above are included with the prior token.
// fn find_comment_before(span: Span, tokens: &TokenVec) -> Option<TokenKind> {
// // index of the span in the token vec
// let index = tokens
// .0
// .iter()
// .position(|t| t.span.start == span.start && t.span.end == span.end)?;
// if index <= 1 {
// return None;
// }
// let prior_token = &tokens.0[index - 1].kind;
// let prior_2_token = &tokens.0[index - 2].kind;
// if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) {
// Some(prior_2_token.clone())
// } else {
// None
// }
// }

/// Find comments after a given span.
fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec<Token> {
// index of the span in the token vec
let index = tokens
.0
.iter()
// FIXME: why isn't this working?
// .position(|t| t.1.start == span.start && t.1.end == span.end)
Comment on lines +405 to +406
Copy link
Member

@aljazerzen aljazerzen May 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because if expression 1 + x has span=0..5, the tokens will have spans 1: 0..1 +: 2..3 x: 4..5

.position(|t| t.span.end == span.end)
.unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span));

let mut out = vec![];
for token in tokens.0.iter().skip(index + 1) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably want this here:

tokens.0.iter().skip_while(|t| t.span.start < span.end)

... skipping all tokens that were before or within the current expression.

match token.kind {
TokenKind::NewLine | TokenKind::Comment(_) => out.push(token.clone()),
_ => break,
}
Comment on lines +412 to +415
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, and this is:

.take_while(|token| matches!(token.kind, TokenKind::NewLine | TokenKind::Comment(_))

}
out
}

impl WriteSource for Vec<Stmt> {
fn write(&self, mut opt: WriteOpt) -> Option<String> {
opt.reset_line()?;
Expand Down Expand Up @@ -469,7 +565,8 @@

#[cfg(test)]
mod test {
use insta::assert_snapshot;
use insta::{assert_debug_snapshot, assert_snapshot};
use prqlc_parser::lex_source;

use super::*;

Expand All @@ -490,6 +587,64 @@
stmt.write(WriteOpt::default()).unwrap()
}

// #[test]
// fn test_find_comment_before() {
// let tokens = lex_source(
// r#"
// # comment
// let a = 5
// "#,
// )
// .unwrap();
// let span = tokens
// .clone()
// .0
// .iter()
// .find(|t| t.kind == TokenKind::Keyword("let".to_string()))
// .unwrap()
// .span
// .clone();
// let comment = find_comment_before(span.into(), &tokens);
// assert_debug_snapshot!(comment, @r###"
// Some(
// Comment(
// " comment",
// ),
// )
// "###);
// }

#[test]
fn test_find_comments_after() {
let tokens = lex_source(
r#"
let a = 5 # on side
# below
# and another
"#,
)
.unwrap();
let span = tokens
.clone()
.0
.iter()
.find(|t| t.kind == TokenKind::Literal(Literal::Integer(5)))
.unwrap()
.span
.clone();
let comment = find_comments_after(span.into(), &tokens);
assert_debug_snapshot!(comment, @r###"
[
23..32: Comment(" on side"),
32..33: NewLine,
45..52: Comment(" below"),
52..53: NewLine,
65..78: Comment(" and another"),
78..79: NewLine,
]
"###);
}

#[test]
fn test_pipeline() {
let short = Expr::new(ExprKind::Ident("short".to_string()));
Expand Down
14 changes: 13 additions & 1 deletion prqlc/prqlc/src/codegen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ mod types;
pub(crate) use ast::write_expr;
pub(crate) use types::{write_ty, write_ty_kind};

pub trait WriteSource {
use prqlc_parser::TokenVec;

pub trait WriteSource: std::fmt::Debug {
/// Converts self to its source representation according to specified
/// options.
///
Expand Down Expand Up @@ -72,6 +74,13 @@ pub struct WriteOpt {
/// For example:
/// `join foo` has an unbound expr, since `join foo ==bar` produced a binary op.
pub unbound_expr: bool,

/// The lexer tokens that were used to produce this source; used for
/// comments.
pub tokens: TokenVec,
Comment on lines +81 to +83
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question if we could prematurely optimize this:

Do we need all of the tokens? Wouldn't just comments (and new lines maybe) be enough to get the same functionality?


// TODO: remove
pub enable_comments: bool,
}

impl Default for WriteOpt {
Expand All @@ -84,6 +93,8 @@ impl Default for WriteOpt {
rem_width: 50,
context_strength: 0,
unbound_expr: false,
tokens: TokenVec(vec![]),
enable_comments: true,
}
}
}
Expand Down Expand Up @@ -124,6 +135,7 @@ impl WriteOpt {
}
}

#[derive(Debug, Clone)]
struct SeparatedExprs<'a, T: WriteSource> {
exprs: &'a [T],
inline: &'static str,
Expand Down
1 change: 1 addition & 0 deletions prqlc/prqlc/src/codegen/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ impl WriteSource for TyTupleField {
}
}

#[derive(Debug, Clone)]
struct UnionVariant<'a>(&'a Option<String>, &'a Ty);

impl WriteSource for UnionVariant<'_> {
Expand Down
60 changes: 60 additions & 0 deletions prqlc/prqlc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,66 @@ pub fn pl_to_prql(pl: &ast::ModuleDef) -> Result<String, ErrorMessages> {
Ok(codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap())
}

pub fn format_prql(prql: &str) -> Result<String, ErrorMessages> {
// TODO: convert errors
let tokens = prqlc_parser::lex_source(prql).unwrap();
let pl = prql_to_pl(prql)?;
Ok(codegen::WriteSource::write(
&pl.stmts,
codegen::WriteOpt {
tokens,
..Default::default()
},
)
.unwrap())
}
#[test]
fn test_format_comment_basic() {
use insta::assert_snapshot;
assert_snapshot!(format_prql( r#"
from db.employees # inline comment
"#
).unwrap(), @r###"
from db.employees # inline comment

"###);
}

#[test]
fn test_format_prql() {
use insta::assert_snapshot;

assert_snapshot!(format_prql( "from db.employees | select {name, age}").unwrap(), @r###"
from db.employees
select {name, age}
"###);

assert_snapshot!(format_prql( r#"
from employees
# test comment
select {name}
"#
).unwrap(), @r###"
from employees
# test comment

select {name}

"###);

assert_snapshot!(format_prql( r#"
# test comment
from employees # inline comment
# another test comment
select {name}"#
).unwrap(), @r###"
from employees # inline comment
# another test comment

select {name}
"###);
Comment on lines +392 to +415
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, this is got me excited about this feature!

}

/// JSON serialization and deserialization functions
pub mod json {
use super::*;
Expand Down
Loading