diff --git a/README.md b/README.md index 78cb49ab..c8f3b8f8 100644 --- a/README.md +++ b/README.md @@ -517,6 +517,8 @@ See task MYJIRA-123. This is a [link to an existing confluence page](ac:Pagetitle) And this is how to link when the linktext is the same as the [Pagetitle](ac:) + +Link to a [page title with space]() ``` ### Add width for an image diff --git a/pkg/mark/ac_tag_parser.go b/pkg/mark/ac_tag_parser.go new file mode 100644 index 00000000..763a1ca0 --- /dev/null +++ b/pkg/mark/ac_tag_parser.go @@ -0,0 +1,112 @@ +package mark + +import ( + "bytes" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "regexp" +) + +// NewACTagParser returns an inline parser that parses tags to ensure that Confluence specific tags are parsed +// as ast.KindRawHtml so they are not escaped at render time. The parser must be registered with a higher priority +// than goldmark's linkParser. Otherwise, the linkParser would parse the tags. +func NewACTagParser() parser.InlineParser { + return &acTagParser{} +} + +var _ parser.InlineParser = (*acTagParser)(nil) + +// acTagParser is a stripped down version of goldmark's rawHTMLParser. +// See: https://github.com/yuin/goldmark/blob/master/parser/raw_html.go +type acTagParser struct { +} + +func (s *acTagParser) Trigger() []byte { + return []byte{'<'} +} + +func (s *acTagParser) Parse(_ ast.Node, block text.Reader, pc parser.Context) ast.Node { + line, _ := block.PeekLine() + if len(line) > 1 && util.IsAlphaNumeric(line[1]) { + return s.parseMultiLineRegexp(openTagRegexp, block, pc) + } + if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) { + return s.parseMultiLineRegexp(closeTagRegexp, block, pc) + } + if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' { + return s.parseUntil(block, closeDecl, pc) + } + if bytes.HasPrefix(line, openCDATA) { + return s.parseUntil(block, closeCDATA, pc) + } + return nil +} + +var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)` + +var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)` + +// Only match tags +var openTagRegexp = regexp.MustCompile("^`) +var closeTagRegexp = regexp.MustCompile("^`) + +var openCDATA = []byte("") +var closeDecl = []byte(">") + +func (s *acTagParser) parseUntil(block text.Reader, closer []byte, _ parser.Context) ast.Node { + savedLine, savedSegment := block.Position() + node := ast.NewRawHTML() + for { + line, segment := block.PeekLine() + if line == nil { + break + } + index := bytes.Index(line, closer) + if index > -1 { + node.Segments.Append(segment.WithStop(segment.Start + index + len(closer))) + block.Advance(index + len(closer)) + return node + } + node.Segments.Append(segment) + block.AdvanceLine() + } + block.SetPosition(savedLine, savedSegment) + return nil +} + +func (s *acTagParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, _ parser.Context) ast.Node { + sline, ssegment := block.Position() + if block.Match(reg) { + node := ast.NewRawHTML() + eline, esegment := block.Position() + block.SetPosition(sline, ssegment) + for { + line, segment := block.PeekLine() + if line == nil { + break + } + l, _ := block.Position() + start := segment.Start + if l == sline { + start = ssegment.Start + } + end := segment.Stop + if l == eline { + end = esegment.Start + } + + node.Segments.Append(text.NewSegment(start, end)) + if l == eline { + block.Advance(end - start) + break + } else { + block.AdvanceLine() + } + } + return node + } + return nil +} diff --git a/pkg/mark/markdown.go b/pkg/mark/markdown.go index 2ff2b717..680375d0 100644 --- a/pkg/mark/markdown.go +++ b/pkg/mark/markdown.go @@ -276,9 +276,8 @@ func (r *ConfluenceRenderer) renderLink(writer util.BufWriter, source []byte, no if err != nil { return ast.WalkStop, err } - - return ast.WalkSkipChildren, nil } + return ast.WalkSkipChildren, nil } return r.goldmarkRenderLink(writer, source, node, entering) } @@ -430,22 +429,9 @@ func (r *ConfluenceRenderer) renderCodeBlock(writer util.BufWriter, source []byt return ast.WalkContinue, nil } -// compileMarkdown will replace tags like with escaped -// equivalent, because goldmark markdown parser replaces that tags with -// ac:rich-text-body because of the autolink -// rule. func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string { log.Tracef(nil, "rendering markdown:\n%s", string(markdown)) - colon := []byte("---bf-COLON---") - - tags := regexp.MustCompile(`]+>`) - - for _, match := range tags.FindAll(markdown, -1) { - // Replace the colon in all "" tags with the colon bytes to avoid having Goldmark escape the HTML output. - markdown = bytes.ReplaceAll(markdown, match, bytes.ReplaceAll(match, []byte(":"), colon)) - } - converter := goldmark.New( goldmark.WithExtensions( extension.GFM, @@ -461,6 +447,12 @@ func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string { html.WithUnsafe(), )) + converter.Parser().AddOptions(parser.WithInlineParsers( + // Must be registered with a higher priority than goldmark's linkParser to make sure goldmark doesn't parse + // the tags. + util.Prioritized(NewACTagParser(), 199), + )) + converter.Renderer().AddOptions(renderer.WithNodeRenderers( util.Prioritized(NewConfluenceRenderer(stdlib), 100), )) @@ -472,8 +464,7 @@ func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string { panic(err) } - // Restore all the colons we previously replaced. - html := bytes.ReplaceAll(buf.Bytes(), colon, []byte(":")) + html := buf.Bytes() log.Tracef(nil, "rendered markdown to html:\n%s", string(html)) diff --git a/pkg/mark/testdata/links.html b/pkg/mark/testdata/links.html index 3b1f468b..625f00ee 100644 --- a/pkg/mark/testdata/links.html +++ b/pkg/mark/testdata/links.html @@ -1,5 +1,9 @@

Use https://example.com

Use aaa

+

Use

+

Use

+

Use

+

Use

Use footnotes link 1


diff --git a/pkg/mark/testdata/links.md b/pkg/mark/testdata/links.md index f8147957..ce4a282d 100644 --- a/pkg/mark/testdata/links.md +++ b/pkg/mark/testdata/links.md @@ -2,5 +2,13 @@ Use Use aaa +Use [page link](ac:Page) + +Use [AnotherPage](ac:) + +Use [Another Page](ac:) + +Use [page link with spaces]() + Use footnotes link [^1] [^1]: a footnote link \ No newline at end of file diff --git a/pkg/mark/testdata/macro-include.html b/pkg/mark/testdata/macro-include.html index 55f14962..8d8ba051 100644 --- a/pkg/mark/testdata/macro-include.html +++ b/pkg/mark/testdata/macro-include.html @@ -1,6 +1,6 @@

bar

- +

true Attention This is an info! - \ No newline at end of file +