markedjs · UziTech · Apr 20, 2024 · Apr 21, 2024 · Apr 22, 2024 · Apr 26, 2024
diff --git a/src/Lexer.ts b/src/Lexer.ts
@@ -101,9 +101,9 @@ export class _Lexer {
  /**
  * Lexing
  */
- blockTokens(src: string, tokens?: Token[]): Token[];
- blockTokens(src: string, tokens?: TokensList): TokensList;
- blockTokens(src: string, tokens: Token[] = []) {
+ blockTokens(src: string, tokens?: Token[], lastParagraphClipped?: boolean): Token[];
+ blockTokens(src: string, tokens?: TokensList, lastParagraphClipped?: boolean): TokensList;
+ blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) {
  if (this.options.pedantic) {
  src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
  } else {
@@ -115,7 +115,6 @@ export class _Lexer {
  let token: Tokens.Generic | undefined;
  let lastToken;
  let cutSrc;
- let lastParagraphClipped;
 
  while (src) {
  if (this.options.extensions
@@ -249,7 +248,7 @@ export class _Lexer {
  }
  if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) {
  lastToken = tokens[tokens.length - 1];
- if (lastParagraphClipped && lastToken.type === 'paragraph') {
+ if (lastParagraphClipped && lastToken?.type === 'paragraph') {
  lastToken.raw += '\n' + token.raw;
  lastToken.text += '\n' + token.text;
  this.inlineQueue.pop();

diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
@@ -7,7 +7,7 @@ import {
 } from './helpers.ts';
 import type { Rules } from './rules.ts';
 import type { _Lexer } from './Lexer.ts';
-import type { Links, Tokens } from './Tokens.ts';
+import type { Links, Tokens, Token } from './Tokens.ts';
 import type { MarkedOptions } from './MarkedOptions.ts';
 
 function outputLink(cap: string[], link: Pick<Tokens.Link, 'href' | 'title'>, raw: string, lexer: _Lexer): Tokens.Link | Tokens.Image {
@@ -148,24 +148,89 @@ export class _Tokenizer {
  if (cap) {
  return {
  type: 'hr',
- raw: cap[0]
+ raw: rtrim(cap[0], '\n')
  };
  }
  }
 
  blockquote(src: string): Tokens.Blockquote | undefined {
  const cap = this.rules.block.blockquote.exec(src);
  if (cap) {
- // precede setext continuation with 4 spaces so it isn't a setext
- let text = cap[0].replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1');
- text = rtrim(text.replace(/^ *>[ \t]?/gm, ''), '\n');
- const top = this.lexer.state.top;
- this.lexer.state.top = true;
- const tokens = this.lexer.blockTokens(text);
- this.lexer.state.top = top;
+ let lines = rtrim(cap[0], '\n').split('\n');
+ let raw = '';
+ let text = '';
+ const tokens: Token[] = [];
+
+ while (lines.length > 0) {
+ let inBlockquote = false;
+ const currentLines = [];
+
+ let i;
+ for (i = 0; i < lines.length; i++) {
+ // get lines up to a continuation
+ if (/^ {0,3}>/.test(lines[i])) {
+ currentLines.push(lines[i]);
+ inBlockquote = true;
+ } else if (!inBlockquote) {
+ currentLines.push(lines[i]);
+ } else {
+ break;
+ }
+ }
+ lines = lines.slice(i);
+
+ const currentRaw = currentLines.join('\n');
+ const currentText = currentRaw
+ // precede setext continuation with 4 spaces so it isn't a setext
+ .replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1')
+ .replace(/^ {0,3}>[ \t]?/gm, '');
+ raw = raw ? `${raw}\n${currentRaw}` : currentRaw;
+ text = text ? `${text}\n${currentText}` : currentText;
+
+ // parse blockquote lines as top level tokens
+ // merge paragraphs if this is a continuation
+ const top = this.lexer.state.top;
+ this.lexer.state.top = true;
+ this.lexer.blockTokens(currentText, tokens, true);
+ this.lexer.state.top = top;
+
+ // if there is no continuation then we are done
+ if (lines.length === 0) {
+ break;
+ }
+
+ const lastToken = tokens[tokens.length - 1];
+
+ if (lastToken?.type === 'code') {
+ // blockquote continuation cannot be preceded by a code block
+ break;
+ } else if (lastToken?.type === 'blockquote') {
+ // include continuation in nested blockquote
+ const oldToken = lastToken as Tokens.Blockquote;
+ const newText = oldToken.raw + '\n' + lines.join('\n');
+ const newToken = this.blockquote(newText)!;
+ tokens[tokens.length - 1] = newToken;
+
+ raw = raw.substring(0, raw.length - oldToken.raw.length) + newToken.raw;
+ text = text.substring(0, text.length - oldToken.text.length) + newToken.text;
+ break;
+ } else if (lastToken?.type === 'list') {
+ // include continuation in nested list
+ const oldToken = lastToken as Tokens.List;
+ const newText = oldToken.raw + '\n' + lines.join('\n');
+ const newToken = this.list(newText)!;
+ tokens[tokens.length - 1] = newToken;
+
+ raw = raw.substring(0, raw.length - lastToken.raw.length) + newToken.raw;
+ text = text.substring(0, text.length - oldToken.raw.length) + newToken.raw;
+ lines = newText.substring(tokens[tokens.length - 1].raw.length).split('\n');
+ continue;
+ }
+ }
+
  return {
  type: 'blockquote',
- raw: cap[0],
+ raw,
  tokens,
  text
  };

diff --git a/test/specs/commonmark/commonmark.0.31.json b/test/specs/commonmark/commonmark.0.31.json
@@ -1887,17 +1887,15 @@
  "example": 236,
  "start_line": 3838,
  "end_line": 3848,
- "section": "Block quotes",
- "shouldFail": true
+ "section": "Block quotes"
  },
  {
  "markdown": "> ```\nfoo\n```\n",
  "html": "<blockquote>\n<pre><code></code></pre>\n</blockquote>\n<p>foo</p>\n<pre><code></code></pre>\n",
  "example": 237,
  "start_line": 3851,
  "end_line": 3861,
- "section": "Block quotes",
- "shouldFail": true
+ "section": "Block quotes"
  },
  {
  "markdown": "> foo\n - bar\n",

diff --git a/test/specs/gfm/commonmark.0.31.json b/test/specs/gfm/commonmark.0.31.json
@@ -1887,17 +1887,15 @@
  "example": 236,
  "start_line": 3838,
  "end_line": 3848,
- "section": "Block quotes",
- "shouldFail": true
+ "section": "Block quotes"
  },
  {
  "markdown": "> ```\nfoo\n```\n",
  "html": "<blockquote>\n<pre><code></code></pre>\n</blockquote>\n<p>foo</p>\n<pre><code></code></pre>\n",
  "example": 237,
  "start_line": 3851,
  "end_line": 3861,
- "section": "Block quotes",
- "shouldFail": true
+ "section": "Block quotes"
  },
  {
  "markdown": "> foo\n - bar\n",

diff --git a/test/unit/marked.test.js b/test/unit/marked.test.js
@@ -18,7 +18,7 @@ describe('marked unit', () => {
 
  assert.strictEqual(tokens[0].type, 'paragraph');
  assert.strictEqual(tokens[2].tokens[0].type, 'paragraph');
- assert.strictEqual(tokens[3].items[0].tokens[0].type, 'text');
+ assert.strictEqual(tokens[4].items[0].tokens[0].type, 'text');
  });
  });
 
@@ -910,6 +910,7 @@ br
  ['text', 'paragraph'],
  ['space', ''],
  ['hr', '---'],
+ ['space', ''],
  ['heading', '# heading'],
  ['text', 'heading'],
  ['code', '```code```'],
@@ -924,6 +925,7 @@ br
  ['blockquote', '> blockquote'],
  ['paragraph', 'blockquote'],
  ['text', 'blockquote'],
+ ['space', ''],
  ['list', '- list'],
  ['list_item', '- list'],
  ['text', 'list'],