You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Have you considered adding a C++ wrapper on top of the C implementation?
md4cpp.h
#pragma once
#include<string_view>
#include<vector>namespaceMd
{
using Char = char;
usingSize = unsigned;
using Offset = unsigned;
using ErrorCode = int;
enumclassTextType
{
// Normal text.
Normal = 0,
// NULL character. CommonMark requires replacing NULL character with// the replacement char U+FFFD, so this allows caller to do that easily.
NullChar,
// Line breaks.// Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE// or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself.
Br, // <br> (hard break)
SoftBr, // '\n' in source text where it is not semantically meaningful (soft break)// Entity.// (a) Named entity, e.g. // (Note MD4C does not have a list of known entities.// Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is// treated as a named entity.)// (b) Numerical entity, e.g. Ӓ// (c) Hexadecimal entity, e.g. ካ//// As MD4C is mostly encoding agnostic, application gets the verbatim// entity text into the MD_PARSER::text_callback().
Entity,
// Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`).// If it is inside MD_BLOCK_CODE, it includes spaces for indentation and// '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this// kind of text.
Core,
// Text is a raw HTML. If it is contents of a raw HTML block (i.e. not// an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used.// The text contains verbatim '\n' for the new lines.
Html,
// Text is inside an equation. This is processed the same way as inlined code// spans (`code`).
LatexMath
};
structWord
{
std::string_view symbols;
TextType type;
};
using Text = std::vector<Word>;
/* Alignment enumeration. */enumclassAlign
{
Default = 0, /* When unspecified. */
Left,
Center,
Right
};
namespaceFlags
{
constexprunsigned CollapseWhitespace = 0x0001; /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */constexprunsigned PermissiveAtxHeaders = 0x0002; /* Do not require space in ATX headers ( ###header ) */constexprunsigned PermissiveUrlAutolinks = 0x0004; /* Recognize URLs as autolinks even without '<', '>' */constexprunsigned PermissiveEmailAutolinks = 0x0008; /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */constexprunsigned NoIndentedCodeblocks = 0x0010; /* Disable indented code blocks. (Only fenced code works.) */constexprunsigned NoHtmlBlocks = 0x0020; /* Disable raw HTML blocks. */constexprunsigned NoHtmlSpans = 0x0040; /* Disable raw HTML (inline). */constexprunsigned Tables = 0x0100; /* Enable tables extension. */constexprunsigned StrikeThrough = 0x0200; /* Enable strikethrough extension. */constexprunsigned PermissiveWwwAutolinks = 0x0400; /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */constexprunsigned TaskLists = 0x0800; /* Enable task list extension. */constexprunsigned LatexMathSpans = 0x1000; /* Enable $ and $$ containing LaTeX equations. */constexprunsigned WikiLinks = 0x2000; /* Enable wiki links extension. */constexprunsigned Underline = 0x4000; /* Enable underline extension (and disables '_' for normal emphasis). */constexprunsigned HardSoftBreaks = 0x8000; /* Force all soft breaks to act as hard breaks. */constexprunsigned PermissiveAutolinks = (Flags::PermissiveEmailAutolinks | Flags::PermissiveUrlAutolinks | Flags::PermissiveWwwAutolinks);
constexprunsigned NoHtml = (Flags::NoHtmlBlocks | Flags::NoHtmlSpans);
}
/* Convenient sets of flags corresponding to well-known Markdown dialects. * * Note we may only support subset of features of the referred dialect. * The constant just enables those extensions which bring us as close as * possible given what features we implement. * * ABI compatibility note: Meaning of these can change in time as new * extensions, bringing the dialect closer to the original, are implemented.*/namespaceDialects
{
constexprunsigned CommonMark = 0;
constexprunsigned GitHub = (Flags::PermissiveAutolinks | Flags::Tables | Flags::StrikeThrough | Flags::TaskLists);
}
classIParser
{
public:virtual~IParser();
ErrorCode Parse(std::string_view text, unsigned dialect = Dialects::CommonMark);
protected:virtualintPrint(Word) = 0;
virtualvoidDebugLog(std::string_view) = 0;
////////////////////////////////////////////////////// Blocks////////////////////////////////////////////////////virtual ErrorCode EnterBlock_Doc() = 0;
virtual ErrorCode LeaveBlock_Doc() = 0;
// <blockquote>...</blockquote>virtual ErrorCode EnterBlock_Quote() = 0;
virtual ErrorCode LeaveBlock_Quote() = 0;
// <ul>...</ul>virtual ErrorCode EnterBlock_Ul(bool isTight, Char mark) = 0;
virtual ErrorCode LeaveBlock_Ul(bool isTight, Char mark) = 0;
// <ol>...</ol>virtual ErrorCode EnterBlock_Ol(unsigned start, bool isTight, Char markDelimiter) = 0;
virtual ErrorCode LeaveBlock_Ol(unsigned start, bool isTight, Char markDelimiter) = 0;
// <li>...</li>virtual ErrorCode EnterBlock_Li(bool isTask, Char taskMark, Offset taskMarkOffset) = 0;
virtual ErrorCode LeaveBlock_Li(bool isTask, Char taskMark, Offset taskMarkOffset) = 0;
// <hr>virtual ErrorCode EnterBlock_Hr() = 0;
virtual ErrorCode LeaveBlock_Hr() = 0;
// <h1>...</h1> (for levels up to 6)virtual ErrorCode EnterBlock_H(unsigned level) = 0;
virtual ErrorCode LeaveBlock_H(unsigned level) = 0;
// <pre><code>...</code></pre>virtual ErrorCode EnterBlock_Code(const Text& info, const Text& lang, Char fenceChar) = 0;
virtual ErrorCode LeaveBlock_Code(const Text& info, const Text& lang, Char fenceChar) = 0;
// Raw HTML block. This itself does not correspond to any particular HTML// tag. The contents of it _is_ raw HTML source intended to be put// in verbatim form to the HTML output.virtual ErrorCode EnterBlock_Html() = 0;
virtual ErrorCode LeaveBlock_Html() = 0;
// <p>...</p>virtual ErrorCode EnterBlock_P() = 0;
virtual ErrorCode LeaveBlock_P() = 0;
// <table>...</table> and its contents.virtual ErrorCode EnterBlock_Table(unsigned colCount, unsigned headRowCount, unsigned bodyRowCount) = 0;
virtual ErrorCode LeaveBlock_Table(unsigned colCount, unsigned headRowCount, unsigned bodyRowCount) = 0;
virtual ErrorCode EnterBlock_TableHeader() = 0;
virtual ErrorCode LeaveBlock_TableHeader() = 0;
virtual ErrorCode EnterBlock_TableBody() = 0;
virtual ErrorCode LeaveBlock_TableBody() = 0;
virtual ErrorCode EnterBlock_TableRow() = 0;
virtual ErrorCode LeaveBlock_TableRow() = 0;
virtual ErrorCode EnterBlock_TableHeaderData(Align align) = 0;
virtual ErrorCode LeaveBlock_TableHeaderData(Align align) = 0;
virtual ErrorCode EnterBlock_TableBodyData(Align align) = 0;
virtual ErrorCode LeaveBlock_TableBodyData(Align align) = 0;
////////////////////////////////////////////////////// Spans/////////////////////////////////////////////////////* <em>...</em> */virtual ErrorCode EnterSpan_Em() = 0;
virtual ErrorCode LeaveSpan_Em() = 0;
/* <strong>...</strong> */virtual ErrorCode EnterSpan_Strong() = 0;
virtual ErrorCode LeaveSpan_Strong() = 0;
// <a href="xxx">...</a>virtual ErrorCode EnterSpan_A(const Text& href, const Text& title, bool isAutoLink) = 0;
virtual ErrorCode LeaveSpan_A(const Text& href, const Text& title, bool isAutoLink) = 0;
// <img src="xxx">...</a>// Note: Image text can contain nested spans and even nested images.// If rendered into ALT attribute of HTML <IMG> tag, it's responsibility// of the parser to deal with it.//virtual ErrorCode EnterSpan_Image(const Text& src, const Text& title) = 0;
virtual ErrorCode LeaveSpan_Image(const Text& src, const Text& title) = 0;
/* <code>...</code> */virtual ErrorCode EnterSpan_Code() = 0;
virtual ErrorCode LeaveSpan_Code() = 0;
// <del>...</del>virtual ErrorCode EnterSpan_Del() = 0;
virtual ErrorCode LeaveSpan_Del() = 0;
// For recognizing inline ($) and display ($$) equationsvirtual ErrorCode EnterSpan_LatexMath() = 0;
virtual ErrorCode LeaveSpan_LatexMath() = 0;
virtual ErrorCode EnterSpan_LatexMathDisplay() = 0;
virtual ErrorCode LeaveSpan_LatexMathDisplay() = 0;
/* Wiki links * Note: Recognized only when MD_FLAG_WIKILINKS is enabled.*/virtual ErrorCode EnterSpan_WikiLink(const Text& target) = 0;
virtual ErrorCode LeaveSpan_WikiLink(const Text& target) = 0;
// <u>...</u>virtual ErrorCode EnterSpan_U() = 0;
virtual ErrorCode LeaveSpan_U() = 0;
friendclassParserCppWrapperImpl_;
};
} // Md
md4cpp.cpp
#include"md4cpp.h"
#include"md4c.h"namespaceMd
{
classParserCppWrapperImpl_
{
public:staticintEnterBlock(MD_BLOCKTYPE type, void* detail, void* userdata)
{
auto& parser = *static_cast<IParser*>(userdata);
switch (type)
{
case MD_BLOCK_DOC:
{
return parser.EnterBlock_Doc();
}
break;
case MD_BLOCK_QUOTE:
{
return parser.EnterBlock_Quote();
}
break;
case MD_BLOCK_UL:
{
MD_BLOCK_UL_DETAIL& d = *static_cast<MD_BLOCK_UL_DETAIL*>(detail);
return parser.EnterBlock_Ul(d.is_tight != 0, d.mark);
}
break;
case MD_BLOCK_OL:
{
MD_BLOCK_OL_DETAIL& d = *static_cast<MD_BLOCK_OL_DETAIL*>(detail);
return parser.EnterBlock_Ol(d.start, d.is_tight != 0, d.mark_delimiter);
}
break;
case MD_BLOCK_LI:
{
MD_BLOCK_LI_DETAIL& d = *static_cast<MD_BLOCK_LI_DETAIL*>(detail);
return parser.EnterBlock_Li(d.is_task != 0, d.task_mark, d.task_mark_offset);
}
break;
case MD_BLOCK_HR:
{
return parser.EnterBlock_Hr();
}
break;
case MD_BLOCK_H:
{
MD_BLOCK_H_DETAIL& d = *static_cast<MD_BLOCK_H_DETAIL*>(detail);
return parser.EnterBlock_H(d.level);
}
break;
case MD_BLOCK_CODE:
{
MD_BLOCK_CODE_DETAIL& d = *static_cast<MD_BLOCK_CODE_DETAIL*>(detail);
return parser.EnterBlock_Code(ToText(d.info), ToText(d.lang), d.fence_char);
}
break;
case MD_BLOCK_HTML:
{
return parser.EnterBlock_Html();
}
break;
case MD_BLOCK_P:
{
return parser.EnterBlock_P();
}
break;
case MD_BLOCK_TABLE:
{
MD_BLOCK_TABLE_DETAIL& d = *static_cast<MD_BLOCK_TABLE_DETAIL*>(detail);
return parser.EnterBlock_Table(d.col_count, d.head_row_count, d.body_row_count);
}
break;
case MD_BLOCK_THEAD:
{
return parser.EnterBlock_TableHeader();
}
break;
case MD_BLOCK_TBODY:
{
return parser.EnterBlock_TableBody();
}
break;
case MD_BLOCK_TR:
{
return parser.EnterBlock_TableRow();
}
break;
case MD_BLOCK_TH:
{
MD_BLOCK_TD_DETAIL& d = *static_cast<MD_BLOCK_TD_DETAIL*>(detail);
return parser.EnterBlock_TableHeaderData(Align(d.align));
}
break;
case MD_BLOCK_TD:
{
MD_BLOCK_TD_DETAIL& d = *static_cast<MD_BLOCK_TD_DETAIL*>(detail);
return parser.EnterBlock_TableBodyData(Align(d.align));
}
break;
default:
return511111115;
}
return511111115;
};
staticintLeaveBlock(MD_BLOCKTYPE type, void* detail, void* userdata)
{
auto& parser = *static_cast<IParser*>(userdata);
switch (type)
{
case MD_BLOCK_DOC:
{
return parser.LeaveBlock_Doc();
}
break;
case MD_BLOCK_QUOTE:
{
return parser.LeaveBlock_Quote();
}
break;
case MD_BLOCK_UL:
{
MD_BLOCK_UL_DETAIL& d = *static_cast<MD_BLOCK_UL_DETAIL*>(detail);
return parser.LeaveBlock_Ul(d.is_tight != 0, d.mark);
}
break;
case MD_BLOCK_OL:
{
MD_BLOCK_OL_DETAIL& d = *static_cast<MD_BLOCK_OL_DETAIL*>(detail);
return parser.LeaveBlock_Ol(d.start, d.is_tight != 0, d.mark_delimiter);
}
break;
case MD_BLOCK_LI:
{
MD_BLOCK_LI_DETAIL& d = *static_cast<MD_BLOCK_LI_DETAIL*>(detail);
return parser.LeaveBlock_Li(d.is_task != 0, d.task_mark, d.task_mark_offset);
}
break;
case MD_BLOCK_HR:
{
return parser.LeaveBlock_Hr();
}
break;
case MD_BLOCK_H:
{
MD_BLOCK_H_DETAIL& d = *static_cast<MD_BLOCK_H_DETAIL*>(detail);
return parser.LeaveBlock_H(d.level);
}
break;
case MD_BLOCK_CODE:
{
MD_BLOCK_CODE_DETAIL& d = *static_cast<MD_BLOCK_CODE_DETAIL*>(detail);
auto err = parser.LeaveBlock_Code(ToText(d.info), ToText(d.lang), d.fence_char);
return err;
}
break;
case MD_BLOCK_HTML:
{
return parser.LeaveBlock_Html();
}
break;
case MD_BLOCK_P:
{
return parser.LeaveBlock_P();
}
break;
case MD_BLOCK_TABLE:
{
MD_BLOCK_TABLE_DETAIL& d = *static_cast<MD_BLOCK_TABLE_DETAIL*>(detail);
return parser.LeaveBlock_Table(d.col_count, d.head_row_count, d.body_row_count);
}
break;
case MD_BLOCK_THEAD:
{
return parser.LeaveBlock_TableHeader();
}
break;
case MD_BLOCK_TBODY:
{
return parser.LeaveBlock_TableBody();
}
break;
case MD_BLOCK_TR:
{
return parser.LeaveBlock_TableRow();
}
break;
case MD_BLOCK_TH:
{
MD_BLOCK_TD_DETAIL& d = *static_cast<MD_BLOCK_TD_DETAIL*>(detail);
return parser.LeaveBlock_TableHeaderData(Align(d.align));
}
break;
case MD_BLOCK_TD:
{
MD_BLOCK_TD_DETAIL& d = *static_cast<MD_BLOCK_TD_DETAIL*>(detail);
return parser.LeaveBlock_TableBodyData(Align(d.align));
}
break;
default:
return611111116;
}
return611111116;
};
staticintEnterSpan(MD_SPANTYPE type, void* detail, void* userdata)
{
auto& parser = *static_cast<IParser*>(userdata);
switch (type)
{
case MD_SPAN_EM:
{
return parser.EnterSpan_Em();
}
break;
case MD_SPAN_STRONG:
{
return parser.EnterSpan_Strong();
}
break;
case MD_SPAN_A:
{
MD_SPAN_A_DETAIL& d = *static_cast<MD_SPAN_A_DETAIL*>(detail);
return parser.EnterSpan_A(ToText(d.href), ToText(d.title), d.is_autolink != 0);
}
break;
case MD_SPAN_IMG:
{
MD_SPAN_IMG_DETAIL& d = *static_cast<MD_SPAN_IMG_DETAIL*>(detail);
return parser.EnterSpan_Image(ToText(d.src), ToText(d.title));
}
break;
case MD_SPAN_CODE:
{
return parser.EnterSpan_Code();
}
break;
case MD_SPAN_DEL:
{
return parser.EnterSpan_Del();
}
break;
case MD_SPAN_LATEXMATH:
{
return parser.EnterSpan_LatexMath();
}
break;
case MD_SPAN_LATEXMATH_DISPLAY:
{
return parser.EnterSpan_LatexMathDisplay();
}
break;
case MD_SPAN_WIKILINK:
{
MD_SPAN_WIKILINK_DETAIL& d = *static_cast<MD_SPAN_WIKILINK_DETAIL*>(detail);
return parser.EnterSpan_WikiLink(ToText(d.target));
}
break;
case MD_SPAN_U:
{
return parser.EnterSpan_U();
}
break;
default:
return711111117;
}
return711111117;
};
staticintLeaveSpan(MD_SPANTYPE type, void* detail, void* userdata)
{
auto& parser = *static_cast<IParser*>(userdata);
switch (type)
{
case MD_SPAN_EM:
{
return parser.LeaveSpan_Em();
}
break;
case MD_SPAN_STRONG:
{
return parser.LeaveSpan_Strong();
}
break;
case MD_SPAN_A:
{
MD_SPAN_A_DETAIL& d = *static_cast<MD_SPAN_A_DETAIL*>(detail);
return parser.LeaveSpan_A(ToText(d.href), ToText(d.title), d.is_autolink != 0);
}
break;
case MD_SPAN_IMG:
{
MD_SPAN_IMG_DETAIL& d = *static_cast<MD_SPAN_IMG_DETAIL*>(detail);
return parser.LeaveSpan_Image(ToText(d.src), ToText(d.title));
}
break;
case MD_SPAN_CODE:
{
return parser.LeaveSpan_Code();
}
break;
case MD_SPAN_DEL:
{
return parser.LeaveSpan_Del();
}
break;
case MD_SPAN_LATEXMATH:
{
return parser.LeaveSpan_LatexMath();
}
break;
case MD_SPAN_LATEXMATH_DISPLAY:
{
return parser.LeaveSpan_LatexMathDisplay();
}
break;
case MD_SPAN_WIKILINK:
{
MD_SPAN_WIKILINK_DETAIL& d = *static_cast<MD_SPAN_WIKILINK_DETAIL*>(detail);
return parser.LeaveSpan_WikiLink(ToText(d.target));
}
break;
case MD_SPAN_U:
{
return parser.LeaveSpan_U();
}
break;
default:
return811111118;
}
return811111118;
};
staticintPrint(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
{
auto& parser = *static_cast<IParser*>(userdata);
Word word = ToWord(type, text, size);
return parser.Print(std::move(word));
};
staticvoidDebugLog(constchar* msg, void* userdata)
{
auto& parser = *static_cast<IParser*>(userdata);
parser.DebugLog(msg);
};
private:static Word ToWord(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size)
{
Word word;
word.type = TextType(type);
word.symbols = std::string_view(text, size);
return word;
}
static Text ToText(const MD_ATTRIBUTE& attr)
{
Text text;
text.reserve(4);
size_t subText = 0;
while(attr.substr_offsets[subText] < attr.size)
{
Word word = ToWord(attr.substr_types[subText],
attr.text + attr.substr_offsets[subText],
attr.substr_offsets[subText + 1] - attr.substr_offsets[subText]);
text.push_back(std::move(word));
subText++;
}
return text;
}
};
IParser::~IParser() = default;
ErrorCode IParser::Parse(std::string_view text, unsigned dialect)
{
MD_PARSER parser;
parser.abi_version = 0;
parser.flags = dialect;
parser.enter_block = &ParserCppWrapperImpl_::EnterBlock;
parser.leave_block = &ParserCppWrapperImpl_::LeaveBlock;
parser.enter_span = &ParserCppWrapperImpl_::EnterSpan;
parser.leave_span = &ParserCppWrapperImpl_::LeaveSpan;
parser.text = &ParserCppWrapperImpl_::Print;
parser.debug_log = &ParserCppWrapperImpl_::DebugLog;
parser.syntax = nullptr;
returnmd_parse(text.data(), text.size(), &parser, this);
}
} // Md
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
Have you considered adding a C++ wrapper on top of the C implementation?
md4cpp.h
md4cpp.cpp
Beta Was this translation helpful? Give feedback.
All reactions