From 6e4a912b1196de10c1516ffd6279476f7fb4de81 Mon Sep 17 00:00:00 2001 From: Bernd Ahlers Date: Fri, 31 Mar 2023 10:51:50 +0200 Subject: [PATCH 1/2] Implement a custom parser for tags This replaces the workaround to replace colons in tags with a magic string with a custom parser for these tags to parse them as ast.KindRawHtml. The custom parser is a stripped down version of goldmark's rawHTMLParser. --- pkg/mark/ac_tag_parser.go | 112 +++++++++++++++++++++++++++ pkg/mark/markdown.go | 22 ++---- pkg/mark/testdata/macro-include.html | 4 +- 3 files changed, 121 insertions(+), 17 deletions(-) create mode 100644 pkg/mark/ac_tag_parser.go diff --git a/pkg/mark/ac_tag_parser.go b/pkg/mark/ac_tag_parser.go new file mode 100644 index 00000000..763a1ca0 --- /dev/null +++ b/pkg/mark/ac_tag_parser.go @@ -0,0 +1,112 @@ +package mark + +import ( + "bytes" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "regexp" +) + +// NewACTagParser returns an inline parser that parses tags to ensure that Confluence specific tags are parsed +// as ast.KindRawHtml so they are not escaped at render time. The parser must be registered with a higher priority +// than goldmark's linkParser. Otherwise, the linkParser would parse the tags. +func NewACTagParser() parser.InlineParser { + return &acTagParser{} +} + +var _ parser.InlineParser = (*acTagParser)(nil) + +// acTagParser is a stripped down version of goldmark's rawHTMLParser. +// See: https://github.com/yuin/goldmark/blob/master/parser/raw_html.go +type acTagParser struct { +} + +func (s *acTagParser) Trigger() []byte { + return []byte{'<'} +} + +func (s *acTagParser) Parse(_ ast.Node, block text.Reader, pc parser.Context) ast.Node { + line, _ := block.PeekLine() + if len(line) > 1 && util.IsAlphaNumeric(line[1]) { + return s.parseMultiLineRegexp(openTagRegexp, block, pc) + } + if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) { + return s.parseMultiLineRegexp(closeTagRegexp, block, pc) + } + if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' { + return s.parseUntil(block, closeDecl, pc) + } + if bytes.HasPrefix(line, openCDATA) { + return s.parseUntil(block, closeCDATA, pc) + } + return nil +} + +var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)` + +var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)` + +// Only match tags +var openTagRegexp = regexp.MustCompile("^`) +var closeTagRegexp = regexp.MustCompile("^`) + +var openCDATA = []byte("") +var closeDecl = []byte(">") + +func (s *acTagParser) parseUntil(block text.Reader, closer []byte, _ parser.Context) ast.Node { + savedLine, savedSegment := block.Position() + node := ast.NewRawHTML() + for { + line, segment := block.PeekLine() + if line == nil { + break + } + index := bytes.Index(line, closer) + if index > -1 { + node.Segments.Append(segment.WithStop(segment.Start + index + len(closer))) + block.Advance(index + len(closer)) + return node + } + node.Segments.Append(segment) + block.AdvanceLine() + } + block.SetPosition(savedLine, savedSegment) + return nil +} + +func (s *acTagParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, _ parser.Context) ast.Node { + sline, ssegment := block.Position() + if block.Match(reg) { + node := ast.NewRawHTML() + eline, esegment := block.Position() + block.SetPosition(sline, ssegment) + for { + line, segment := block.PeekLine() + if line == nil { + break + } + l, _ := block.Position() + start := segment.Start + if l == sline { + start = ssegment.Start + } + end := segment.Stop + if l == eline { + end = esegment.Start + } + + node.Segments.Append(text.NewSegment(start, end)) + if l == eline { + block.Advance(end - start) + break + } else { + block.AdvanceLine() + } + } + return node + } + return nil +} diff --git a/pkg/mark/markdown.go b/pkg/mark/markdown.go index 2ff2b717..3c44ab89 100644 --- a/pkg/mark/markdown.go +++ b/pkg/mark/markdown.go @@ -430,22 +430,9 @@ func (r *ConfluenceRenderer) renderCodeBlock(writer util.BufWriter, source []byt return ast.WalkContinue, nil } -// compileMarkdown will replace tags like with escaped -// equivalent, because goldmark markdown parser replaces that tags with -// ac:rich-text-body because of the autolink -// rule. func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string { log.Tracef(nil, "rendering markdown:\n%s", string(markdown)) - colon := []byte("---bf-COLON---") - - tags := regexp.MustCompile(`]+>`) - - for _, match := range tags.FindAll(markdown, -1) { - // Replace the colon in all "" tags with the colon bytes to avoid having Goldmark escape the HTML output. - markdown = bytes.ReplaceAll(markdown, match, bytes.ReplaceAll(match, []byte(":"), colon)) - } - converter := goldmark.New( goldmark.WithExtensions( extension.GFM, @@ -461,6 +448,12 @@ func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string { html.WithUnsafe(), )) + converter.Parser().AddOptions(parser.WithInlineParsers( + // Must be registered with a higher priority than goldmark's linkParser to make sure goldmark doesn't parse + // the tags. + util.Prioritized(NewACTagParser(), 199), + )) + converter.Renderer().AddOptions(renderer.WithNodeRenderers( util.Prioritized(NewConfluenceRenderer(stdlib), 100), )) @@ -472,8 +465,7 @@ func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string { panic(err) } - // Restore all the colons we previously replaced. - html := bytes.ReplaceAll(buf.Bytes(), colon, []byte(":")) + html := buf.Bytes() log.Tracef(nil, "rendered markdown to html:\n%s", string(html)) diff --git a/pkg/mark/testdata/macro-include.html b/pkg/mark/testdata/macro-include.html index 55f14962..8d8ba051 100644 --- a/pkg/mark/testdata/macro-include.html +++ b/pkg/mark/testdata/macro-include.html @@ -1,6 +1,6 @@

bar

- +

true Attention This is an info! - \ No newline at end of file +

From 80d906417c230c86e539e3765b36df6a6ae37af8 Mon Sep 17 00:00:00 2001 From: Bernd Ahlers Date: Fri, 31 Mar 2023 11:09:58 +0200 Subject: [PATCH 2/2] Fix custom link renderer and add tests for Confluence links Since we now have a custom parser for tags, the custom link renderer added an additional tag at the end of each internal Confluence link. Add tests for internal links and add an example for internal links with spaces in page titles to the README file. --- README.md | 2 ++ pkg/mark/markdown.go | 3 +-- pkg/mark/testdata/links.html | 4 ++++ pkg/mark/testdata/links.md | 8 ++++++++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 78cb49ab..c8f3b8f8 100644 --- a/README.md +++ b/README.md @@ -517,6 +517,8 @@ See task MYJIRA-123. This is a [link to an existing confluence page](ac:Pagetitle) And this is how to link when the linktext is the same as the [Pagetitle](ac:) + +Link to a [page title with space]() ``` ### Add width for an image diff --git a/pkg/mark/markdown.go b/pkg/mark/markdown.go index 3c44ab89..680375d0 100644 --- a/pkg/mark/markdown.go +++ b/pkg/mark/markdown.go @@ -276,9 +276,8 @@ func (r *ConfluenceRenderer) renderLink(writer util.BufWriter, source []byte, no if err != nil { return ast.WalkStop, err } - - return ast.WalkSkipChildren, nil } + return ast.WalkSkipChildren, nil } return r.goldmarkRenderLink(writer, source, node, entering) } diff --git a/pkg/mark/testdata/links.html b/pkg/mark/testdata/links.html index 3b1f468b..625f00ee 100644 --- a/pkg/mark/testdata/links.html +++ b/pkg/mark/testdata/links.html @@ -1,5 +1,9 @@

Use https://example.com

Use aaa

+

Use

+

Use

+

Use

+

Use

Use footnotes link 1


diff --git a/pkg/mark/testdata/links.md b/pkg/mark/testdata/links.md index f8147957..ce4a282d 100644 --- a/pkg/mark/testdata/links.md +++ b/pkg/mark/testdata/links.md @@ -2,5 +2,13 @@ Use Use aaa +Use [page link](ac:Page) + +Use [AnotherPage](ac:) + +Use [Another Page](ac:) + +Use [page link with spaces]() + Use footnotes link [^1] [^1]: a footnote link \ No newline at end of file