Skip to content

Commit

Permalink
Fix parsing of link destinations that look like code or <html>
Browse files Browse the repository at this point in the history
Fixes #136

This works by re-parsing the tokens that come after the link,
but only when the end delimiter isn't on a chunk boundary
(since that's the only way this problem can happen).

Re-parsing a specific chunk won't work, because the part that
needs re-interpreted can span more than one chunk. For example,
we can draw the bounds of the erroneous code chunk in this example:

    [x](`) <a href="`">
        ^-----------^

If we re-parse the underlined part in isolation, we'll fix the
first link, but won't find the HTML (since the closing angle
bracket is in the next chunk).

On the other hand, parsing links, code, and HTML in a single pass
would make writing extensions more complicated. For example,
LaTeX math is supposed to have the same binding strength as
code spans:

    $first[$](about)
    ^------^ this is a math span, not a link

    [first]($)$5/8$
            ^-^ this is an analogue of the original bug
                it shouldn't be a math span, but looks like one
  • Loading branch information
notriddle committed Jan 27, 2024
1 parent 4f79ac4 commit f8b163e
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 24 deletions.
54 changes: 30 additions & 24 deletions commonmark/src/Commonmark/Inlines.hs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ import Data.List (foldl')
import Unicode.Char (isAscii, isAlpha)
import qualified Data.IntMap.Strict as IntMap
import qualified Data.Map.Strict as M
import Data.Maybe (isJust, mapMaybe, listToMaybe)
import Data.Maybe (isJust, mapMaybe, listToMaybe, maybeToList)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
Expand All @@ -73,18 +73,22 @@ mkInlineParser :: (Monad m, IsInline a)
mkInlineParser bracketedSpecs formattingSpecs ilParsers attrParsers rm toks = do
let iswhite t = hasType Spaces t || hasType LineEnd t
let attrParser = choice attrParsers
let toks' = dropWhile iswhite . reverse . dropWhile iswhite . reverse $ toks
res <- {-# SCC parseChunks #-} evalStateT
(parseChunks bracketedSpecs formattingSpecs ilParsers
attrParser rm toks') defaultEnders
let go chunks toks' bottoms = do
chunks' <- {-# SCC parseChunks #-} evalStateT
(parseChunks bracketedSpecs formattingSpecs ilParsers
attrParser rm toks') defaultEnders
case chunks' of
Left err -> return $ Left err
Right chunks'' ->
case (processBrackets bracketedSpecs rm (chunks ++ chunks'') bottoms) of
Left st -> go ((reverse . befores . rightCursor) st) (mconcat (map chunkToks $ (maybeToList . center $ rightCursor st) ++ (afters $ rightCursor st))) (stackBottoms st)
Right chunks''' -> return $ Right chunks'''
res <- go [] ((dropWhile iswhite . reverse . dropWhile iswhite . reverse) toks) mempty
return $!
case res of
Left err -> Left err
Right chunks ->
(Right .
unChunks .
processEmphasis .
processBrackets bracketedSpecs rm) chunks
case res of
Left err -> Left err
Right chunks ->
(Right . unChunks . processEmphasis) chunks

defaultInlineParser :: (Monad m, IsInline a) => InlineParser m a
defaultInlineParser =
Expand Down Expand Up @@ -695,19 +699,19 @@ bracketMatchedCount :: [Chunk a] -> Int
bracketMatchedCount chunksinside = sum $ map bracketChunkToNumber chunksinside

processBrackets :: IsInline a
=> [BracketedSpec a] -> ReferenceMap -> [Chunk a] -> [Chunk a]
processBrackets bracketedSpecs rm xs =
=> [BracketedSpec a] -> ReferenceMap -> [Chunk a] -> M.Map Text SourcePos -> Either (DState a) [Chunk a]
processBrackets bracketedSpecs rm xs bottoms =
case break (\case
(Chunk Delim{ delimType = '[' } _ _) -> True
_ -> False) xs of
(_,[]) -> xs
(_,[]) -> Right xs
(ys,z:zs) ->
let startcursor = Cursor (Just z) (reverse ys) zs
in processBs bracketedSpecs
DState{ leftCursor = startcursor
, rightCursor = startcursor
, refmap = rm
, stackBottoms = mempty
, stackBottoms = bottoms
, absoluteBottom = chunkPos z
}

Expand All @@ -733,15 +737,15 @@ moveRight (Cursor (Just x) zs (y:ys)) = Cursor (Just y) (x:zs) ys
{-# INLINE moveRight #-}

processBs :: IsInline a
=> [BracketedSpec a] -> DState a -> [Chunk a]
=> [BracketedSpec a] -> DState a -> Either (DState a) [Chunk a]
processBs bracketedSpecs st =
let left = leftCursor st
right = rightCursor st
bottoms = stackBottoms st
bottom = absoluteBottom st
-- trace (prettyCursors left right) $ return $! ()
in {-# SCC processBs #-} case (center left, center right) of
(_, Nothing) -> reverse $
(_, Nothing) -> Right $ reverse $
case center (rightCursor st) of
Nothing -> befores (rightCursor st)
Just c -> c : befores (rightCursor st)
Expand Down Expand Up @@ -828,8 +832,8 @@ processBs bracketedSpecs st =
firstAfterTokPos = tokPos <$> listToMaybe
(concatMap chunkToks afterchunks)
-- in the event that newpos is not at the
-- beginning of a chunk, we need to add
-- some tokens from that chunk...
-- beginning of a chunk, we need to re-chunk
-- with those tokens and everything after them
missingtoks =
[t | t <- suffixToks
, tokPos t >= newpos
Expand All @@ -842,13 +846,12 @@ processBs bracketedSpecs st =
(str (untokenize missingtoks))))
newpos missingtoks :)

in case addMissing afterchunks of
[] -> processBs bracketedSpecs
st{ rightCursor = Cursor Nothing
st' = case addMissing afterchunks of
[] -> st{ rightCursor = Cursor Nothing
(eltchunk : befores left') [] }
(y:ys) ->
let lbs = befores left'
in processBs bracketedSpecs st{
in st{
leftCursor =
Cursor (Just eltchunk) lbs (y:ys)
, rightCursor = fixSingleQuote $
Expand All @@ -862,6 +865,9 @@ processBs bracketedSpecs st =
(chunkPos opener)
$ stackBottoms st
}
in if null missingtoks
then processBs bracketedSpecs st'
else Left st'
-- Bracket matched count /= 0
--
-- Links § 6.3 ¶ 2 • 2
Expand Down
32 changes: 32 additions & 0 deletions commonmark/test/regression.md
Original file line number Diff line number Diff line change
Expand Up @@ -336,3 +336,35 @@ Test <span> x <span>
<p>Test <span> <span></p>
<p>Test <span> x <span></p>
````````````````````````````````


Issue #136
```````````````````````````````` example
[link](`) `x`
.
<p><a href="%60">link</a> <code>x</code></p>
````````````````````````````````

```````````````````````````````` example
[link](`)[link](`) `x`
.
<p><a href="%60">link</a><a href="%60">link</a> <code>x</code></p>
````````````````````````````````

```````````````````````````````` example
[link](<foo bar=">)">) `x`
.
<p><a href="foo%20bar=%22">link</a>&quot;&gt;) <code>x</code></p>
````````````````````````````````

```````````````````````````````` example
[![image](<foo bar=">)">)![image](<foo bar=">)">)](v) `x`
.
<p><a href="v"><img src="foo%20bar=%22" alt="image" />&quot;&gt;)<img src="foo%20bar=%22" alt="image" />&quot;&gt;)</a> <code>x</code></p>
````````````````````````````````

```````````````````````````````` example
[x](`) <a href="`">
.
<p><a href="%60">x</a> <a href="`"></p>
````````````````````````````````
Expand Down

0 comments on commit f8b163e

Please sign in to comment.