From 2473cb74c337802ed65299f94364cd6ebe4a6789 Mon Sep 17 00:00:00 2001 From: Andreas Kollegger Date: Thu, 10 Dec 2020 22:27:43 +0100 Subject: [PATCH] feat(parse) more tolerant of whitespace --- packages/gram-builder/src/index.ts | 21 -- packages/gram-parse/src/gram-grammar.ts | 206 ++++++++++-------- packages/gram-parse/src/gram.ne | 123 ++++++----- packages/gram-parse/test/grammar.test.ts | 8 - packages/gram-parse/test/parse-nodes.test.ts | 5 + ....test.ts => parse-path-annotation.test.ts} | 113 ++++++---- ...test.ts => parse-path-expressions.test.ts} | 2 +- .../gram-parse/test/parse-path-points.test.ts | 57 +++++ 8 files changed, 314 insertions(+), 221 deletions(-) rename packages/gram-parse/test/{parse-decorator.test.ts => parse-path-annotation.test.ts} (50%) rename packages/gram-parse/test/{parse-compose.test.ts => parse-path-expressions.test.ts} (94%) create mode 100644 packages/gram-parse/test/parse-path-points.test.ts diff --git a/packages/gram-builder/src/index.ts b/packages/gram-builder/src/index.ts index 4929db2..2910e41 100644 --- a/packages/gram-builder/src/index.ts +++ b/packages/gram-builder/src/index.ts @@ -223,27 +223,6 @@ export const edge = ( children, }); -/** - * Build a path - * - * @param children - * @param id - * @param labels - * @param record - */ -// export const path = ( -// members: [GramPath] | [GramPath, GramPath], -// id?: string, -// labels?: string[], -// record?: GramRecord -// ): GramPath => ({ -// type: 'path', -// id, -// ...(labels && { labels }), -// ...(record && { record }), -// children: members, -// }); - /** * Build a pair * diff --git a/packages/gram-parse/src/gram-grammar.ts b/packages/gram-parse/src/gram-grammar.ts index fe26de4..ad21155 100644 --- a/packages/gram-parse/src/gram-grammar.ts +++ b/packages/gram-parse/src/gram-grammar.ts @@ -75,15 +75,17 @@ const empty = () => null; const text = ([token]: Array): string => token.text; -function extractPairs(pairGroups: Array) { - return pairGroups.map((pairGroup: Array) => { - return pairGroup[3]; - }); -} +/* +# function extractPairs(pairGroups:Array) { +# return pairGroups.map((pairGroup:Array) => { +# return pairGroup[3]; +# }) +# } -function extractArray(valueGroups: Array): Array { - return valueGroups.map(valueGroup => valueGroup[3]); -} +# function extractArray(valueGroups:Array):Array { +# return valueGroups.map( (valueGroup) => valueGroup[3]); +# } +*/ function separateTagFromString(taggedStringValue: string) { let valueParts = taggedStringValue.match(/([^`]+)`(.+)`$/); @@ -140,13 +142,13 @@ const grammar: Grammar = { ParserRules: [ { name: 'GramSeq$ebnf$1$subexpression$1', - symbols: ['Path', '_'], + symbols: ['Path'], postprocess: ([pp]) => pp, }, { name: 'GramSeq$ebnf$1', symbols: ['GramSeq$ebnf$1$subexpression$1'] }, { name: 'GramSeq$ebnf$1$subexpression$2', - symbols: ['Path', '_'], + symbols: ['Path'], postprocess: ([pp]) => pp, }, { @@ -154,12 +156,10 @@ const grammar: Grammar = { symbols: ['GramSeq$ebnf$1', 'GramSeq$ebnf$1$subexpression$2'], postprocess: d => d[0].concat([d[1]]), }, - { name: 'GramSeq$ebnf$2', symbols: ['EOL'], postprocess: id }, - { name: 'GramSeq$ebnf$2', symbols: [], postprocess: () => null }, { name: 'GramSeq', - symbols: ['GramSeq$ebnf$1', 'GramSeq$ebnf$2'], - postprocess: ([pp]) => g.seq(g.flatten(pp)), + symbols: ['_', 'GramSeq$ebnf$1'], + postprocess: ([, pp]) => g.seq(g.flatten(pp)), }, { name: 'Path', symbols: ['NodePattern'], postprocess: id }, { name: 'Path', symbols: ['PathComposition'], postprocess: id }, @@ -167,8 +167,8 @@ const grammar: Grammar = { { name: 'NodePattern', symbols: ['Node', '_', 'Edge', '_', 'NodePattern'], - postprocess: ([np, , es, , ep]) => - g.cons([np, ep], { + postprocess: ([n, , es, , np]) => + g.cons([n, np], { kind: es.kind, id: es.id, labels: es.labels, @@ -176,65 +176,85 @@ const grammar: Grammar = { }), }, { name: 'NodePattern', symbols: ['Node'], postprocess: id }, + { name: 'Node$ebnf$1', symbols: ['Attributes'], postprocess: id }, + { name: 'Node$ebnf$1', symbols: [], postprocess: () => null }, { name: 'Node', - symbols: [{ literal: '(' }, '_', 'Attributes', '_', { literal: ')' }], + symbols: [{ literal: '(' }, '_', 'Node$ebnf$1', { literal: ')' }, '_'], postprocess: ([, , attrs]) => - g.node(attrs.id, attrs.labels, attrs.record), + attrs ? g.node(attrs.id, attrs.labels, attrs.record) : g.node(), }, + { name: 'Edge$ebnf$1', symbols: ['Attributes'], postprocess: id }, + { name: 'Edge$ebnf$1', symbols: [], postprocess: () => null }, { name: 'Edge', - symbols: [{ literal: '-[' }, '_', 'Attributes', { literal: ']->' }], + symbols: [{ literal: '-[' }, '_', 'Edge$ebnf$1', { literal: ']->' }, '_'], postprocess: ([, , attrs]) => ({ kind: 'right', ...attrs }), }, + { name: 'Edge$ebnf$2', symbols: ['Attributes'], postprocess: id }, + { name: 'Edge$ebnf$2', symbols: [], postprocess: () => null }, { name: 'Edge', - symbols: [{ literal: '-[' }, '_', 'Attributes', { literal: ']-' }], + symbols: [{ literal: '-[' }, '_', 'Edge$ebnf$2', { literal: ']-' }, '_'], postprocess: ([, , attrs]) => ({ kind: 'either', ...attrs }), }, + { name: 'Edge$ebnf$3', symbols: ['Attributes'], postprocess: id }, + { name: 'Edge$ebnf$3', symbols: [], postprocess: () => null }, { name: 'Edge', - symbols: [{ literal: '<-[' }, '_', 'Attributes', { literal: ']-' }], + symbols: [{ literal: '<-[' }, '_', 'Edge$ebnf$3', { literal: ']-' }, '_'], postprocess: ([, , attrs]) => ({ kind: 'left', ...attrs }), }, { name: 'Edge', - symbols: [{ literal: '-[]->' }], + symbols: [{ literal: '-[]->' }, '_'], postprocess: () => ({ kind: 'right' }), }, { name: 'Edge', - symbols: [{ literal: '-[]-' }], + symbols: [{ literal: '-[]-' }, '_'], postprocess: () => ({ kind: 'either' }), }, { name: 'Edge', - symbols: [{ literal: '<-[]-' }], + symbols: [{ literal: '<-[]-' }, '_'], postprocess: () => ({ kind: 'left' }), }, { name: 'Edge', - symbols: [{ literal: '-->' }], + symbols: [{ literal: '-->' }, '_'], postprocess: () => ({ kind: 'right' }), }, { name: 'Edge', - symbols: [{ literal: '--' }], + symbols: [{ literal: '--' }, '_'], postprocess: () => ({ kind: 'either' }), }, { name: 'Edge', - symbols: [{ literal: '<--' }], + symbols: [{ literal: '<--' }, '_'], postprocess: () => ({ kind: 'left' }), }, { name: 'PathComposition', symbols: ['PathPoint'], postprocess: id }, { name: 'PathComposition', symbols: ['PathAnnotation'], postprocess: id }, { name: 'PathComposition', symbols: ['PathExpression'], postprocess: id }, + { name: 'PathPoint$ebnf$1', symbols: ['Attributes'], postprocess: id }, + { name: 'PathPoint$ebnf$1', symbols: [], postprocess: () => null }, { name: 'PathPoint', - symbols: [{ literal: '[' }, '_', 'Attributes', '_', { literal: ']' }], + symbols: [ + { literal: '[' }, + '_', + 'PathPoint$ebnf$1', + { literal: ']' }, + '_', + ], postprocess: ([, , attr]) => { - if ((attr.id || attr.labels || attr.record) && attr.id !== 'ø') { + if ( + attr && + (attr.id || attr.labels || attr.record) && + attr.id !== 'ø' + ) { // console.log(attr); return g.node(attr.id, attr.labels, attr.record); } else { @@ -242,43 +262,43 @@ const grammar: Grammar = { } }, }, + { name: 'PathAnnotation$ebnf$1', symbols: ['Attributes'], postprocess: id }, + { name: 'PathAnnotation$ebnf$1', symbols: [], postprocess: () => null }, { name: 'PathAnnotation', symbols: [ { literal: '[' }, '_', - 'Attributes', - '_', + 'PathAnnotation$ebnf$1', 'Path', { literal: ']' }, + '_', ], - postprocess: ([, , attr, , lhs]) => { + postprocess: ([, , attr, lhs]) => { // console.log('annotate()', lhs) - return g.cons([lhs], { - id: attr.id, - labels: attr.labels, - record: attr.record, - }); + return g.cons( + [lhs], + attr ? { id: attr.id, labels: attr.labels, record: attr.record } : {} + ); }, }, - { name: 'PathExpression$ebnf$1', symbols: ['Kind'], postprocess: id }, + { name: 'PathExpression$ebnf$1', symbols: ['Attributes'], postprocess: id }, { name: 'PathExpression$ebnf$1', symbols: [], postprocess: () => null }, + { name: 'PathExpression$ebnf$2', symbols: ['Kind'], postprocess: id }, + { name: 'PathExpression$ebnf$2', symbols: [], postprocess: () => null }, { name: 'PathExpression', symbols: [ { literal: '[' }, '_', - 'Attributes', - '_', 'PathExpression$ebnf$1', - '_', + 'PathExpression$ebnf$2', 'Path', - '_', 'Path', - '_', { literal: ']' }, + '_', ], - postprocess: ([, , attrs, , kind, , lhs, , rhs]) => { + postprocess: ([, , attrs, kind, lhs, rhs]) => { return g.cons([lhs, rhs], { kind, id: attrs.id, @@ -291,41 +311,44 @@ const grammar: Grammar = { { name: 'PathPair$subexpression$1', symbols: ['PathComposition'] }, { name: 'PathPair', - symbols: ['PathPair$subexpression$1', '_', { literal: ',' }, '_', 'Path'], - postprocess: ([lp, , , , rp]) => g.pair([lp[0], rp]), + symbols: ['PathPair$subexpression$1', { literal: ',' }, '_', 'Path'], + postprocess: ([lp, , , rp]) => g.pair([lp[0], rp]), }, - { name: 'Kind', symbols: [{ literal: ',' }], postprocess: () => 'pair' }, - { name: 'Kind', symbols: [{ literal: '-->' }], postprocess: () => 'right' }, - { name: 'Kind', symbols: [{ literal: '--' }], postprocess: () => 'either' }, - { name: 'Kind', symbols: [{ literal: '<--' }], postprocess: () => 'left' }, - { name: 'Attributes$ebnf$1', symbols: ['Identity'], postprocess: id }, - { name: 'Attributes$ebnf$1', symbols: [], postprocess: () => null }, { - name: 'Attributes$ebnf$2$subexpression$1', - symbols: ['_', 'LabelList'], - postprocess: ([, ll]) => ll, + name: 'Kind', + symbols: [{ literal: ',' }, '_'], + postprocess: () => 'pair', }, { - name: 'Attributes$ebnf$2', - symbols: ['Attributes$ebnf$2$subexpression$1'], - postprocess: id, + name: 'Kind', + symbols: [{ literal: '-->' }, '_'], + postprocess: () => 'right', }, - { name: 'Attributes$ebnf$2', symbols: [], postprocess: () => null }, { - name: 'Attributes$ebnf$3$subexpression$1', - symbols: ['_', 'Record'], - postprocess: ([, r]) => r, + name: 'Kind', + symbols: [{ literal: '--' }, '_'], + postprocess: () => 'either', }, { - name: 'Attributes$ebnf$3', - symbols: ['Attributes$ebnf$3$subexpression$1'], - postprocess: id, + name: 'Kind', + symbols: [{ literal: '<--' }, '_'], + postprocess: () => 'left', }, + { name: 'Attributes$ebnf$1', symbols: ['Identity'], postprocess: id }, + { name: 'Attributes$ebnf$1', symbols: [], postprocess: () => null }, + { name: 'Attributes$ebnf$2', symbols: ['LabelList'], postprocess: id }, + { name: 'Attributes$ebnf$2', symbols: [], postprocess: () => null }, + { name: 'Attributes$ebnf$3', symbols: ['Record'], postprocess: id }, { name: 'Attributes$ebnf$3', symbols: [], postprocess: () => null }, { name: 'Attributes', symbols: ['Attributes$ebnf$1', 'Attributes$ebnf$2', 'Attributes$ebnf$3'], - postprocess: ([id, labels, record]) => ({ id, labels, record }), + postprocess: function(d, _, reject) { + const [id, labels, record] = d; + if (id || labels || record) { + return { id, labels, record }; + } else return reject; + }, }, { name: 'LabelList$ebnf$1', symbols: ['Label'] }, { @@ -345,29 +368,33 @@ const grammar: Grammar = { }, { name: 'Identity', - symbols: [lexer.has('identifier') ? { type: 'identifier' } : identifier], + symbols: [ + lexer.has('identifier') ? { type: 'identifier' } : identifier, + '_', + ], postprocess: text, }, - { name: 'Identity', symbols: [{ literal: 'ø' }], postprocess: text }, + { name: 'Identity', symbols: [{ literal: 'ø' }, '_'], postprocess: text }, { name: 'Identity', - symbols: [lexer.has('symbol') ? { type: 'symbol' } : symbol], + symbols: [lexer.has('symbol') ? { type: 'symbol' } : symbol, '_'], postprocess: text, }, { name: 'Identity', - symbols: [lexer.has('integer') ? { type: 'integer' } : integer], + symbols: [lexer.has('integer') ? { type: 'integer' } : integer, '_'], postprocess: text, }, { name: 'Identity', - symbols: [lexer.has('octal') ? { type: 'octal' } : octal], + symbols: [lexer.has('octal') ? { type: 'octal' } : octal, '_'], postprocess: text, }, { name: 'Identity', symbols: [ lexer.has('hexadecimal') ? { type: 'hexadecimal' } : hexadecimal, + '_', ], postprocess: text, }, @@ -375,6 +402,7 @@ const grammar: Grammar = { name: 'Identity', symbols: [ lexer.has('measurement') ? { type: 'measurement' } : measurement, + '_', ], postprocess: text, }, @@ -382,30 +410,33 @@ const grammar: Grammar = { name: 'Identity', symbols: [ lexer.has('tickedString') ? { type: 'tickedString' } : tickedString, + '_', ], postprocess: ([t]) => t.text.slice(1, -1), }, { name: 'Symbol', - symbols: [lexer.has('symbol') ? { type: 'symbol' } : symbol], + symbols: [lexer.has('symbol') ? { type: 'symbol' } : symbol, '_'], postprocess: text, }, { name: 'Symbol', symbols: [ lexer.has('tickedString') ? { type: 'tickedString' } : tickedString, + '_', ], postprocess: ([t]) => t.text.slice(1, -1), }, { name: 'Record', - symbols: [{ literal: '{' }, '_', { literal: '}' }], + symbols: [{ literal: '{' }, '_', { literal: '}' }, '_'], postprocess: empty, }, { name: 'Record$ebnf$1', symbols: [] }, { name: 'Record$ebnf$1$subexpression$1', - symbols: ['_', { literal: ',' }, '_', 'Property'], + symbols: [{ literal: ',' }, '_', 'Property'], + postprocess: ([, , p]) => p, }, { name: 'Record$ebnf$1', @@ -419,27 +450,28 @@ const grammar: Grammar = { '_', 'Property', 'Record$ebnf$1', - '_', { literal: '}' }, + '_', ], - postprocess: ([, , p, ps]) => [p, ...extractPairs(ps)], + postprocess: ([, , p, ps]) => [p, ...ps], }, { name: 'Property', - symbols: ['Symbol', '_', { literal: ':' }, '_', 'Value'], - postprocess: ([k, , , , v]) => g.property(k, v), + symbols: ['Symbol', { literal: ':' }, '_', 'Value'], + postprocess: ([k, , , v]) => g.property(k, v), }, - { name: 'Value', symbols: ['StringLiteral'], postprocess: id }, - { name: 'Value', symbols: ['NumericLiteral'], postprocess: id }, + { name: 'Value', symbols: ['StringLiteral', '_'], postprocess: id }, + { name: 'Value', symbols: ['NumericLiteral', '_'], postprocess: id }, { name: 'Value', - symbols: [lexer.has('boolean') ? { type: 'boolean' } : boolean], + symbols: [lexer.has('boolean') ? { type: 'boolean' } : boolean, '_'], postprocess: d => g.boolean(JSON.parse(d[0].value.toLowerCase())), }, { name: 'Value$ebnf$1', symbols: [] }, { name: 'Value$ebnf$1$subexpression$1', - symbols: ['_', { literal: ',' }, '_', 'Value'], + symbols: [{ literal: ',' }, '_', 'Value'], + postprocess: ([, , v]) => v, }, { name: 'Value$ebnf$1', @@ -454,8 +486,9 @@ const grammar: Grammar = { 'Value', 'Value$ebnf$1', { literal: ']' }, + '_', ], - postprocess: ([, , v, vs]) => [v, ...extractArray(vs)], + postprocess: ([, , v, vs]) => [v, ...vs], }, { name: 'StringLiteral', @@ -524,12 +557,13 @@ const grammar: Grammar = { return g.measurement(parts.unit, parts.value); }, }, - { name: '_', symbols: [] }, { - name: '_', + name: '_$ebnf$1', symbols: [lexer.has('whitespace') ? { type: 'whitespace' } : whitespace], - postprocess: empty, + postprocess: id, }, + { name: '_$ebnf$1', symbols: [], postprocess: () => null }, + { name: '_', symbols: ['_$ebnf$1'], postprocess: empty }, { name: 'Comment', symbols: [ diff --git a/packages/gram-parse/src/gram.ne b/packages/gram-parse/src/gram.ne index 9503d9c..8929034 100644 --- a/packages/gram-parse/src/gram.ne +++ b/packages/gram-parse/src/gram.ne @@ -50,7 +50,7 @@ let lexer = moo.compile({ # Gram -> (Path | Comment):* # GramSeq is a sequence of paths -GramSeq -> (Path _ {% ([pp]) => pp %}):+ EOL:? {% ([pp]) => g.seq( g.flatten(pp) ) %} +GramSeq -> _ (Path {% ([pp]) => pp %}):+ {% ([,pp]) => g.seq( g.flatten(pp) ) %} # Paths are a generalization of nodes and edges Path -> @@ -61,36 +61,36 @@ Path -> # NodePattern is cypher-like (node1)-[edge]->(node2) NodePattern -> Node _ Edge _ NodePattern - {% ([np,,es,,ep]) => g.cons([np,ep], {kind:es.kind, id:es.id, labels:es.labels, record:es.record} ) %} + {% ([n,,es,,np]) => g.cons([n,np], {kind:es.kind, id:es.id, labels:es.labels, record:es.record} ) %} | Node {% id %} Node -> - "(" _ Attributes _ ")" - {% ([,,attrs]) => g.node(attrs.id, attrs.labels, attrs.record) %} + "(" _ Attributes:? ")" _ + {% ([,,attrs]) => attrs ? g.node(attrs.id, attrs.labels, attrs.record) : g.node() %} Edge -> - "-[" _ Attributes "]->" + "-[" _ Attributes:? "]->" _ {% ([,,attrs]) => ({kind:'right', ...attrs}) %} - | "-[" _ Attributes "]-" + | "-[" _ Attributes:? "]-" _ {% ([,,attrs]) => ({kind:'either', ...attrs}) %} - | "<-[" _ Attributes "]-" + | "<-[" _ Attributes:? "]-" _ {% ([,,attrs]) => ({kind:'left', ...attrs}) %} - | "-[]->" {% () => ({kind:'right'}) %} - | "-[]-" {% () => ({kind:'either'}) %} - | "<-[]-" {% () => ({kind:'left'}) %} - | "-->" {% () => ({kind:'right'}) %} - | "--" {% () => ({kind:'either'}) %} - | "<--" {% () => ({kind:'left'}) %} + | "-[]->" _ {% () => ({kind:'right'}) %} + | "-[]-" _ {% () => ({kind:'either'}) %} + | "<-[]-" _ {% () => ({kind:'left'}) %} + | "-->" _ {% () => ({kind:'right'}) %} + | "--" _ {% () => ({kind:'either'}) %} + | "<--" _ {% () => ({kind:'left'}) %} PathComposition -> - PathPoint {% id %} + PathPoint {% id %} | PathAnnotation {% id %} | PathExpression {% id %} PathPoint -> - "[" _ Attributes _ "]" + "[" _ Attributes:? "]" _ {% ([,,attr]) => { - if ( (attr.id || attr.labels || attr.record) && attr.id !== 'ø' ) { + if ( attr && (attr.id || attr.labels || attr.record) && attr.id !== 'ø' ) { // console.log(attr); return g.node(attr.id, attr.labels, attr.record) } else { @@ -100,33 +100,38 @@ PathPoint -> %} PathAnnotation -> - "[" _ Attributes _ Path "]" - {% ([,,attr,,lhs]) => { + "[" _ Attributes:? Path "]" _ + {% ([,,attr,lhs]) => { // console.log('annotate()', lhs) - return g.cons( [lhs], {id:attr.id, labels:attr.labels, record:attr.record}) + return g.cons( [lhs], attr ? {id:attr.id, labels:attr.labels, record:attr.record} : {}) } %} PathExpression -> - "[" _ Attributes _ Kind:? _ Path _ Path _ "]" - # with both optional, rhs will match first - {% ([,,attrs,,kind,,lhs,,rhs]) => { + "[" _ Attributes:? Kind:? Path Path "]" _ + {% ([,,attrs,kind,lhs,rhs]) => { return g.cons( [lhs,rhs], {kind, id:attrs.id, labels:attrs.labels, record:attrs.record}) } %} PathPair -> - (NodePattern | PathComposition) _ "," _ Path - {% ([lp,,,,rp]) => g.pair([lp[0],rp] ) %} + (NodePattern | PathComposition) "," _ Path + {% ([lp,,,rp]) => g.pair([lp[0],rp] ) %} Kind -> - "," {% () => ('pair') %} - | "-->" {% () => ('right') %} - | "--" {% () => ('either') %} - | "<--" {% () => ('left') %} - -Attributes -> - Identity:? (_ LabelList {% ([,ll]) => ll %}):? (_ Record {% ([,r]) => r %}):? {% ([id,labels,record]) => ( {id, labels, record} ) %} + "," _ {% () => ('pair') %} + | "-->" _ {% () => ('right') %} + | "--" _ {% () => ('either') %} + | "<--" _ {% () => ('left') %} + +Attributes -> Identity:? LabelList:? Record:? + {% function (d,_,reject) { + const [id,labels,record] = d; + if (id || labels || record) { + return {id, labels, record} + } else return reject; + } + %} LabelList -> Label:+ {% ([labels]) => labels %} @@ -134,32 +139,32 @@ LabelList -> Label -> ":" Symbol {% ([,label]) => label %} Identity -> - %identifier {% text %} - | "ø" {% text %} - | %symbol {% text %} - | %integer {% text %} - | %octal {% text %} - | %hexadecimal {% text %} - | %measurement {% text %} - | %tickedString {% ([t]) => t.text.slice(1,-1) %} + %identifier _ {% text %} + | "ø" _ {% text %} + | %symbol _ {% text %} + | %integer _ {% text %} + | %octal _ {% text %} + | %hexadecimal _ {% text %} + | %measurement _ {% text %} + | %tickedString _ {% ([t]) => t.text.slice(1,-1) %} Symbol -> - %symbol {% text %} - | %tickedString {% ([t]) => t.text.slice(1,-1) %} + %symbol _ {% text %} + | %tickedString _ {% ([t]) => t.text.slice(1,-1) %} Record -> - "{" _ "}" {% empty %} - | "{" _ Property (_ "," _ Property):* _ "}" {% ([,,p,ps]) => [p, ...extractPairs(ps)] %} + "{" _ "}" _ {% empty %} + | "{" _ Property ("," _ Property {% ([,,p]) => p %}):* "}" _ {% ([,,p,ps]) => [p, ...ps] %} -Property -> Symbol _ ":" _ Value {% ([k,,,,v]) => g.property(k,v) %} +Property -> Symbol ":" _ Value {% ([k,,,v]) => g.property(k,v) %} # Key -> Symbol {% id %} Value -> - StringLiteral {% id %} - | NumericLiteral {% id %} - | %boolean {% (d) => g.boolean(JSON.parse(d[0].value.toLowerCase())) %} - | "[" _ Value (_ "," _ Value):* "]" {% ([,,v,vs]) => ([v, ...extractArray(vs)]) %} + StringLiteral _ {% id %} + | NumericLiteral _ {% id %} + | %boolean _ {% (d) => g.boolean(JSON.parse(d[0].value.toLowerCase())) %} + | "[" _ Value ("," _ Value {% ([,,v]) => v %}):* "]" _ {% ([,,v,vs]) => ([v, ...vs]) %} StringLiteral -> %singleQuotedString {% (d) => g.string(d[0].value) %} @@ -184,7 +189,7 @@ NumericLiteral -> # # Whitespace and comments # -_ -> null | %whitespace {% empty %} +_ -> %whitespace:? {% empty %} # Comment -> %lineComment [\n]:? {% empty %} Comment -> %lineComment {% empty %} @@ -197,15 +202,17 @@ const empty = () => null; const text =([token]:Array):string => token.text; -function extractPairs(pairGroups:Array) { - return pairGroups.map((pairGroup:Array) => { - return pairGroup[3]; - }) -} - -function extractArray(valueGroups:Array):Array { - return valueGroups.map( (valueGroup) => valueGroup[3]); -} +/* +# function extractPairs(pairGroups:Array) { +# return pairGroups.map((pairGroup:Array) => { +# return pairGroup[3]; +# }) +# } + +# function extractArray(valueGroups:Array):Array { +# return valueGroups.map( (valueGroup) => valueGroup[3]); +# } +*/ function separateTagFromString(taggedStringValue:string) { let valueParts = taggedStringValue.match(/([^`]+)`(.+)`$/); diff --git a/packages/gram-parse/test/grammar.test.ts b/packages/gram-parse/test/grammar.test.ts index 4d48af4..2f4538b 100644 --- a/packages/gram-parse/test/grammar.test.ts +++ b/packages/gram-parse/test/grammar.test.ts @@ -2,14 +2,6 @@ import nearley, { Parser } from 'nearley'; import grammar from '../src/gram-grammar'; -// import { -// EMPTY_PATH_ID, -// isGramNode, -// isGramSeq, -// isGramPath, -// isGramEdge, -// isGramEmptyPath, -// } from '@gram-data/gram-ast'; import { Node } from 'unist'; let DEBUG = true; diff --git a/packages/gram-parse/test/parse-nodes.test.ts b/packages/gram-parse/test/parse-nodes.test.ts index 926b938..591b1a6 100644 --- a/packages/gram-parse/test/parse-nodes.test.ts +++ b/packages/gram-parse/test/parse-nodes.test.ts @@ -181,8 +181,13 @@ describe('parsing nodes', () => { it.each` gram ${'({k:`v`})'} + ${'({k:`v`} )'} + ${'( {k:`v`})'} ${'( {k:`v`} )'} ${'( { k:`v` } )'} + ${'( { k: `v` } )'} + ${'( { k : `v` } )'} + ${'({k: `v`, l: 2})'} `('$gram is tolerant of whitespace', ({ gram }) => { const result = toAST(gram); expect(result).toBeDefined(); diff --git a/packages/gram-parse/test/parse-decorator.test.ts b/packages/gram-parse/test/parse-path-annotation.test.ts similarity index 50% rename from packages/gram-parse/test/parse-decorator.test.ts rename to packages/gram-parse/test/parse-path-annotation.test.ts index 6333924..31f141b 100644 --- a/packages/gram-parse/test/parse-decorator.test.ts +++ b/packages/gram-parse/test/parse-path-annotation.test.ts @@ -1,62 +1,31 @@ +import nearley, { Parser } from 'nearley'; import { toAST } from '../src'; import { - EMPTY_PATH_ID, isGramNode, isGramPath, - isGramEmptyPath, } from '@gram-data/gram-ast'; import { Node } from 'unist'; -let DEBUG = true; +import grammar from '../src/gram-grammar'; + const inspect = require('unist-util-inspect'); // @ts-ignore const show = (ast: Node) => { + const DEBUG = false; if (DEBUG) console.log(inspect(ast)); }; -describe('parsing empty paths', () => { - it('[] as an empty path', () => { - const src = `[]`; - const result = toAST(src); - expect(result).toBeDefined(); - // show(result); - const firstPath = result.children[0]; - expect(isGramEmptyPath(firstPath)).toBeTruthy(); - expect(firstPath.id).toBe(EMPTY_PATH_ID); - }); - it('[ø] as an empty path with explicit, exclusive ID for empty paths', () => { - const src = `[ø]`; - const result = toAST(src); - expect(result).toBeDefined(); - // console.log(inspect(result)); - const firstPath = result.children[0]; - expect(isGramEmptyPath(firstPath)).toBeTruthy(); - expect(firstPath.id).toBe(EMPTY_PATH_ID); - }); - - it('[[]] =~ [ [ø] [ø] ] =~ ()', () => { - const src = `[[]]`; - const result = toAST(src); - expect(result).toBeDefined(); - // show(result); - const firstPath = result.children[0]; - expect(isGramNode(firstPath)).toBeTruthy(); - }); - - it('[[[]]] as a path equivalent to [ [ [ø] [ø] ] [ø] ] =~ [ () [ø] ]', () => { - const src = `[[[]]]`; - const result = toAST(src); - expect(result).toBeDefined(); - // console.log(inspect(result)); - const firstPath = result.children[0]; - expect(isGramPath(firstPath)).toBeTruthy(); - }); -}); +export const rawParse = (text: string): Parser => { + const nearleyParser = new nearley.Parser( + nearley.Grammar.fromCompiled(grammar) + ); + return nearleyParser.feed(text); +}; -describe('parsing nested nodes (implied ø rhs)', () => { - it('[p (n)] as a defined path containing a single node', () => { +describe('parsing path annotation (composition with an implied ø rhs)', () => { + it('[p (n)] ', () => { const pathId = 'p'; const nodeId = 'n'; const src = `[${pathId} (${nodeId})]`; @@ -71,7 +40,7 @@ describe('parsing nested nodes (implied ø rhs)', () => { expect(isGramNode(nestedPath)).toBeTruthy(); }); - it('[p [n]] =~ [p (n) [ø]]', () => { + it('[p [n]]', () => { const pathId = 'p'; const nodeId = 'n'; const src = `[${pathId} [${nodeId}]]`; @@ -86,13 +55,13 @@ describe('parsing nested nodes (implied ø rhs)', () => { expect(isGramNode(nestedPath)).toBeTruthy(); }); - it('[p (n) []] =~ [p (n) [ø]]', () => { + it('[p (n) ] with whitespace before the closing "]"', () => { const pathId = 'p'; const nodeId = 'n'; - const src = `[${pathId} (${nodeId}) []]`; + const src = `[${pathId} (${nodeId}) ]`; const result = toAST(src); expect(result).toBeDefined(); - // console.log(inspect(result)); + // show(result); const firstPath = result.children[0]; expect(isGramPath(firstPath)).toBeTruthy(); expect(firstPath?.id).toBe(pathId); @@ -100,4 +69,54 @@ describe('parsing nested nodes (implied ø rhs)', () => { expect(nestedPath).toBeDefined(); expect(isGramNode(nestedPath)).toBeTruthy(); }); + + it.each` + gram + ${'[()]'} + ${'[() ]'} + ${'[ ()]'} + ${'[p()]'} + ${'[p ()]'} + ${'[ p ()]'} + ${'[:Path()]'} + ${'[:Path ()]'} + ${'[ :Path()]'} + ${'[p:Path()]'} + ${'[p:Path ()]'} + ${'[ p:Path()]'} + ${'[ p :Path ()]'} + ${'[p:Path{k:1}()]'} + ${'[p:Path{k:1} ()]'} + ${'[ p:Path{k:1}()]'} + ${'[ p:Path{k:1} ()]'} + ${'[ p:Path {k:1} ()]'} + ${'[ p :Path {k:1} ()]'} + ${'(),()'} + ${'(), ()'} + ${'(),() '} + `('$gram is unambiguous', ({ gram }) => { + const parsed = rawParse(gram); + // console.dir(parsed.results); + // show(parsed.results[0]) + expect(parsed).toBeDefined(); + expect(parsed.results).toHaveLength(1); + }); + + + it('tolerates linebreaks', () => { + const gram = ` + (), + () () + () + ( + + ) + ` + const parsed = rawParse(gram); + // console.dir(parsed.results); + // show(parsed.results[0]) + expect(parsed).toBeDefined(); + expect(parsed.results).toHaveLength(1); + }); + }); diff --git a/packages/gram-parse/test/parse-compose.test.ts b/packages/gram-parse/test/parse-path-expressions.test.ts similarity index 94% rename from packages/gram-parse/test/parse-compose.test.ts rename to packages/gram-parse/test/parse-path-expressions.test.ts index b4dc58b..3a56497 100644 --- a/packages/gram-parse/test/parse-compose.test.ts +++ b/packages/gram-parse/test/parse-path-expressions.test.ts @@ -11,7 +11,7 @@ const show = (ast: Node) => { if (DEBUG) console.log(inspect(ast)); }; -describe('parsing path composition', () => { +describe('parsing composition by path expression (composing two members)', () => { it('[e -- () ()] ≅ ()-[e]-(), an edge identified as "e"', () => { const edgeId = 'e'; const src = `[${edgeId} -- () ()]`; diff --git a/packages/gram-parse/test/parse-path-points.test.ts b/packages/gram-parse/test/parse-path-points.test.ts new file mode 100644 index 0000000..6e38da9 --- /dev/null +++ b/packages/gram-parse/test/parse-path-points.test.ts @@ -0,0 +1,57 @@ +import { toAST } from '../src'; +import { + EMPTY_PATH_ID, + isGramNode, + isGramPath, + isGramEmptyPath, +} from '@gram-data/gram-ast'; +import { Node } from 'unist'; + +let DEBUG = true; + +const inspect = require('unist-util-inspect'); + +// @ts-ignore +const show = (ast: Node) => { + if (DEBUG) console.log(inspect(ast)); +}; + +describe('parsing empty paths', () => { + it('[] as an empty path', () => { + const src = `[]`; + const result = toAST(src); + expect(result).toBeDefined(); + // show(result); + const firstPath = result.children[0]; + expect(isGramEmptyPath(firstPath)).toBeTruthy(); + expect(firstPath.id).toBe(EMPTY_PATH_ID); + }); + it('[ø] as an empty path with explicit, exclusive ID for empty paths', () => { + const src = `[ø]`; + const result = toAST(src); + expect(result).toBeDefined(); + // console.log(inspect(result)); + const firstPath = result.children[0]; + expect(isGramEmptyPath(firstPath)).toBeTruthy(); + expect(firstPath.id).toBe(EMPTY_PATH_ID); + }); + + it('[[]] =~ [ [ø] [ø] ] =~ ()', () => { + const src = `[[]]`; + const result = toAST(src); + expect(result).toBeDefined(); + // show(result); + const firstPath = result.children[0]; + expect(isGramNode(firstPath)).toBeTruthy(); + }); + + it('[[[]]] as a path equivalent to [ [ [ø] [ø] ] [ø] ] =~ [ () [ø] ]', () => { + const src = `[[[]]]`; + const result = toAST(src); + expect(result).toBeDefined(); + // console.log(inspect(result)); + const firstPath = result.children[0]; + expect(isGramPath(firstPath)).toBeTruthy(); + }); +}); +