aboutsummaryrefslogtreecommitdiff
path: root/src/mdx/mdx_parser.zig
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-02-27 23:20:10 -0800
committerGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-02-27 23:20:10 -0800
commit50560e169ca39c0b4ec163cb32897baf7620aa69 (patch)
tree83eb721bfd4a0318874c1f69d254a4fd9446512b /src/mdx/mdx_parser.zig
parent36c249e9c1fc6e0000d23ae0055eed54a5437c74 (diff)
downloadbun-50560e169ca39c0b4ec163cb32897baf7620aa69.tar.gz
bun-50560e169ca39c0b4ec163cb32897baf7620aa69.tar.zst
bun-50560e169ca39c0b4ec163cb32897baf7620aa69.zip
WASM
Diffstat (limited to 'src/mdx/mdx_parser.zig')
-rw-r--r--src/mdx/mdx_parser.zig1836
1 files changed, 1836 insertions, 0 deletions
diff --git a/src/mdx/mdx_parser.zig b/src/mdx/mdx_parser.zig
new file mode 100644
index 000000000..cecc8e57f
--- /dev/null
+++ b/src/mdx/mdx_parser.zig
@@ -0,0 +1,1836 @@
+const std = @import("std");
+const logger = @import("../logger.zig");
+const mdx_lexer = @import("./mdx_lexer.zig");
+const Lexer = mdx_lexer.Lexer;
+const importRecord = @import("../import_record.zig");
+const js_ast = @import("../js_ast.zig");
+const JSParser = @import("../js_parser/js_parser.zig").MDXParser;
+const ParseStatementOptions = @import("../js_parser/js_parser.zig").ParseStatementOptions;
+
+const options = @import("../options.zig");
+
+const fs = @import("../fs.zig");
+const _global = @import("../global.zig");
+const string = _global.string;
+const Output = _global.Output;
+const Global = _global.Global;
+const Environment = _global.Environment;
+const strings = _global.strings;
+const MutableString = _global.MutableString;
+const stringZ = _global.stringZ;
+const default_allocator = _global.default_allocator;
+const C = _global.C;
+const expect = std.testing.expect;
+const ImportKind = importRecord.ImportKind;
+const BindingNodeIndex = js_ast.BindingNodeIndex;
+const Define = @import("../defines.zig").Define;
+const js_lexer = @import("../js_lexer.zig");
+const StmtNodeIndex = js_ast.StmtNodeIndex;
+const ExprNodeIndex = js_ast.ExprNodeIndex;
+const ExprNodeList = js_ast.ExprNodeList;
+const StmtNodeList = js_ast.StmtNodeList;
+const BindingNodeList = js_ast.BindingNodeList;
+const ParserOptions = @import("../js_parser/js_parser.zig").Parser.Options;
+const runVisitPassAndFinish = @import("../js_parser/js_parser.zig").Parser.runVisitPassAndFinish;
+const Ref = @import("../ast/base.zig").Ref;
+const assert = std.debug.assert;
+const BabyList = js_ast.BabyList;
+
+const LocRef = js_ast.LocRef;
+const S = js_ast.S;
+const B = js_ast.B;
+const G = js_ast.G;
+const T = mdx_lexer.T;
+const E = js_ast.E;
+const Stmt = js_ast.Stmt;
+const Expr = js_ast.Expr;
+const Binding = js_ast.Binding;
+const Symbol = js_ast.Symbol;
+const Level = js_ast.Op.Level;
+const Op = js_ast.Op;
+const Scope = js_ast.Scope;
+const Range = logger.Range;
+
+pub const Container = struct {
+ ch: u8 = 0,
+ is_loose: bool = false,
+ is_task: bool = false,
+ start: u32 = 0,
+ mark_indent: u32 = 0,
+ contents_indent: u32 = 0,
+ block_index: u32 = 0,
+ task_mark_off: u32 = 0,
+};
+
+pub const Block = struct {
+ tag: Tag = Tag.html,
+ flags: Block.Flags.Set = Block.Flags.Set{},
+ data: u32 = 0,
+ /// Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block.
+ /// LI: Task mark offset in the input doc.
+ /// OL: Start item number.
+ ///
+ line_count: u32 = 0,
+ line_offset: u32 = 0,
+ detail: Block.Detail = Block.Detail{ .none = .{} },
+
+ pub inline fn lines(this: Block, lines_: BabyList(Line)) []Line {
+ return lines_.ptr[this.line_offset .. this.line_offset + this.line_count];
+ }
+
+ pub inline fn verbatimLines(this: Block, lines_: BabyList(Line.Verbatim)) []Line.Verbatim {
+ return lines_.ptr[this.line_offset .. this.line_offset + this.line_count];
+ }
+
+ pub const Data = u32;
+
+ pub const Flags = enum(u3) {
+ container_opener = 0,
+ container_closer = 1,
+ loose_list = 2,
+ setext_header = 3,
+
+ pub const Set = std.enums.EnumSet(Block.Flags);
+ };
+
+ pub inline fn isContainer(this: Block) bool {
+ return this.flags.contains(.container_opener) or this.flags.contains(.container_closer);
+ }
+
+ pub const Tag = enum {
+ /// <body>...</body>
+ doc,
+
+ /// <blockquote>...</blockquote>
+ quote,
+
+ /// <ul>...</ul>
+ ///Detail: Structure ul_detail.
+ ul,
+
+ /// <ol>...</ol>
+ ///Detail: Structure ol_detail.
+ ol,
+
+ /// <li>...</li>
+ ///Detail: Structure li_detail.
+ li,
+
+ /// <hr>
+ hr,
+
+ /// <h1>...</h1> (for levels up to 6)
+ ///Detail: Structure h_detail.
+ h,
+
+ /// <pre><code>...</code></pre>
+ ///Note the text lines within code blocks are terminated with '\n'
+ ///instead of explicit MD_TEXT_BR.
+ code,
+
+ /// Raw HTML block. This itself does not correspond to any particular HTML
+ ///tag. The contents of it _is_ raw HTML source intended to be put
+ ///in verbatim form to the HTML output.
+ html,
+
+ /// <p>...</p>
+ p,
+
+ /// <table>...</table> and its contents.
+ ///Detail: Structure table_detail (for table),
+ /// structure td_detail (for th and td)
+ ///Note all of these are used only if extension MD_FLAG_TABLES is enabled.
+ table,
+ thead,
+ tbody,
+ tr,
+ th,
+ td,
+ };
+
+ pub const UL = struct {
+ tight: bool = false,
+ mark: u8 = '*',
+ };
+
+ pub const OL = struct {
+ start: u32 = 0,
+ tight: bool = false,
+ mark: u8 = '*',
+ };
+
+ pub const LI = struct {
+ /// Can be non-zero only with MD_FLAG_TASKLISTS
+ task: bool = false,
+ /// is_task, then one of 'x', 'X' or ' '. Undefined otherwise.
+ task_mark: u8 = 'x',
+ /// If is_task, then offset in the input of the char between '[' and ']'.
+ task_mark_off: u32 = 0,
+ };
+
+ pub const Header = u4;
+
+ pub const Code = struct {
+ info: Attribute = .{},
+ lang: Attribute = .{},
+ /// character used for fenced code block; or zero for indented code block. *
+ fence: u8 = '`',
+ };
+
+ pub const Table = struct {
+ /// Count of columns in the table.
+ column_count: u32 = 0,
+ /// Count of rows in the table header (currently always 1)
+ head_row_count: u32 = 1,
+ /// Count of rows in the table body
+ body_row_count: u32 = 0,
+ };
+
+ pub const Detail = union {
+ none: void,
+ ul: UL,
+ ol: OL,
+ li: LI,
+ };
+
+ pub const TD = struct {
+ alignment: Align = Align.default,
+ };
+};
+pub const Span = struct {
+ pub const Tag = enum {
+ /// <em>...</em>
+ em,
+
+ /// <strong>...</strong>
+ strong,
+
+ /// <a href="xxx">...</a>
+ /// Detail: Structure a_detail.
+ a,
+
+ /// <img src="xxx">...</a>
+ /// Detail: Structure img_detail.
+ /// Note: Image text can contain nested spans and even nested images.
+ /// If rendered into ALT attribute of HTML <IMG> tag, it's responsibility
+ /// of the parser to deal with it.
+ img,
+
+ /// <code>...</code>
+ code,
+
+ /// <del>...</del>
+ /// Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled.
+ del,
+
+ /// For recognizing inline ($) and display ($$) equations
+ /// Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled.
+ latexmath,
+ latexmath_display,
+
+ /// Wiki links
+ /// Note: Recognized only when MD_FLAG_WIKILINKS is enabled.
+ wikilink,
+
+ /// <u>...</u>
+ /// Note: Recognized only when MD_FLAG_UNDERLINE is enabled.
+ u,
+ };
+
+ pub const Link = struct {
+ src: Attribute = .{},
+ title: Attribute = .{},
+ };
+
+ pub const Image = Link;
+
+ pub const Wikilink = struct {
+ target: Attribute = .{},
+ };
+};
+
+pub const Text = enum {
+ /// Normal text.
+ normal,
+ /// NULL character. CommonMark requires replacing NULL character with
+ /// the replacement char U+FFFD, so this allows caller to do that easily.
+ nullchar,
+ /// Line breaks.
+ /// Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE
+ /// or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself.
+ /// <br> (hard break)
+ br,
+ /// '\n' in source text where it is not semantically meaningful (soft break)
+ softbr,
+ /// Entity.
+ /// (a) Named entity, e.g. &nbsp;
+ /// (Note MD4C does not have a list of known entities.
+ /// Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is
+ /// treated as a named entity.)
+ /// (b) Numerical entity, e.g. &#1234;
+ /// (c) Hexadecimal entity, e.g. &#x12AB;
+ ///
+ /// As MD4C is mostly encoding agnostic, application gets the verbatim
+ /// entity text into the MD_PARSER::text_callback().
+ entity,
+ /// Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`).
+ /// If it is inside MD_BLOCK_CODE, it includes spaces for indentation and
+ /// '\n' for new lines. br and softbr are not sent for this
+ /// kind of text.
+ code,
+ /// Text is a raw HTML. If it is contents of a raw HTML block (i.e. not
+ /// an inline raw HTML), then br and softbr are not used.
+ /// The text contains verbatim '\n' for the new lines.
+ html,
+ /// Text is inside an equation. This is processed the same way as inlined code
+ /// spans (`code`).
+ latexmath,
+};
+pub const Align = enum(u3) {
+ default = 0,
+ left = 1,
+ center = 2,
+ right = 3,
+};
+
+/// String attribute.
+///
+/// This wraps strings which are outside of a normal text flow and which are
+/// propagated within various detailed structures, but which still may contain
+/// string portions of different types like e.g. entities.
+///
+/// So, for example, lets consider this image:
+///
+/// ![image alt text](http://example.org/image.png 'foo &quot; bar')
+///
+/// The image alt text is propagated as a normal text via the MD_PARSER::text()
+/// callback. However, the image title ('foo &quot; bar') is propagated as
+/// MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title.
+///
+/// Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following:
+/// -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0)
+/// -- [1]: "&quot;" (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4)
+/// -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10)
+/// -- [3]: (n/a) (n/a ; substr_offsets[3] == 14)
+///
+/// Note that these invariants are always guaranteed:
+/// -- substr_offsets[0] == 0
+/// -- substr_offsets[LAST+1] == size
+/// -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR
+/// substrings can appear. This could change only of the specification
+/// changes.
+///
+pub const Attribute = struct {
+ text: []const u8 = "",
+ substring: Substring.List = .{},
+};
+pub const Substring = struct {
+ offset: u32,
+ tag: Text,
+
+ pub const List = std.MultiArrayList(Substring);
+ pub const ListPool = ObjectPool(List);
+};
+
+pub const Mark = struct {
+ position: Ref = Ref.None,
+ prev: u32 = std.math.maxInt(u32),
+ next: u32 = std.math.maxInt(u32),
+ ch: u8 = 0,
+ flags: u16 = 0,
+
+ /// Maybe closer.
+ pub const potential_closer = 0x02;
+ /// Maybe opener.
+ pub const potential_opener = 0x01;
+ /// Definitely opener.
+ pub const opener = 0x04;
+ /// Definitely closer.
+ pub const closer = 0x08;
+ /// Resolved in any definite way.
+ pub const resolved = 0x10;
+
+ /// Helper for the "rule of 3". */
+ pub const emph_intraword = 0x20;
+ pub const emph_mod3_0 = 0x40;
+ pub const emph_mod3_1 = 0x80;
+ pub const emph_mod3_2 = (0x40 | 0x80);
+ pub const emph_mod3_mask = (0x40 | 0x80);
+ /// Distinguisher for '<', '>'. */
+ pub const autolink = 0x20;
+ /// For permissive autolinks. */
+ pub const validpermissiveautolink = 0x20;
+ /// For '[' to rule out invalid link labels early */
+ pub const hasnestedbrackets = 0x20;
+
+ /// During analyzes of inline marks, we need to manage some "mark chains",
+ /// of (yet unresolved) openers. This structure holds start/end of the chain.
+ /// The chain internals are then realized through MD_MARK::prev and ::next.
+ pub const Chain = struct {
+ head: u32 = std.math.maxInt(u32),
+ tail: u32 = std.math.maxInt(u32),
+
+ pub const List = struct {
+ data: [13]Chain = [13]Chain{ .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{} },
+ pub inline fn ptr_chain(this: *List) *Chain {
+ return &this.data[0];
+ }
+ pub inline fn tablecellboundaries(this: *List) *Chain {
+ return &this.data[1];
+ }
+ pub inline fn asterisk_openers_extraword_mod3_0(this: *List) *Chain {
+ return &this.data[2];
+ }
+ pub inline fn asterisk_openers_extraword_mod3_1(this: *List) *Chain {
+ return &this.data[3];
+ }
+ pub inline fn asterisk_openers_extraword_mod3_2(this: *List) *Chain {
+ return &this.data[4];
+ }
+ pub inline fn asterisk_openers_intraword_mod3_0(this: *List) *Chain {
+ return &this.data[5];
+ }
+ pub inline fn asterisk_openers_intraword_mod3_1(this: *List) *Chain {
+ return &this.data[6];
+ }
+ pub inline fn asterisk_openers_intraword_mod3_2(this: *List) *Chain {
+ return &this.data[7];
+ }
+ pub inline fn underscore_openers(this: *List) *Chain {
+ return &this.data[8];
+ }
+ pub inline fn tilde_openers_1(this: *List) *Chain {
+ return &this.data[9];
+ }
+ pub inline fn tilde_openers_2(this: *List) *Chain {
+ return &this.data[10];
+ }
+ pub inline fn bracket_openers(this: *List) *Chain {
+ return &this.data[11];
+ }
+ pub inline fn dollar_openers(this: *List) *Chain {
+ return &this.data[12];
+ }
+ };
+ };
+};
+
+pub const Line = struct {
+ beg: u32 = 0,
+ end: u32 = 0,
+
+ pub const Tag = enum(u32) {
+ blank,
+ hr,
+ atx_header,
+ setext_header,
+ setext_underline,
+ indented_code,
+ fenced_code,
+ html,
+ text,
+ table,
+ table_underline,
+ };
+ pub const Analysis = packed struct {
+ tag: Tag = Tag.blank,
+ beg: u32 = 0,
+ end: u32 = 0,
+ indent: u32 = 0,
+ data: u32 = 0,
+
+ pub const blank = Analysis{};
+ pub fn eql(a: Analysis, b: Analysis) bool {
+ return strings.eqlLong(std.mem.asBytes(&a), std.mem.asBytes(&b), false);
+ }
+ };
+
+ pub const Verbatim = struct {
+ line: Line = Line{},
+ indent: u32 = 0,
+ };
+};
+
+pub const MDParser = struct {
+ marks: BabyList(Mark) = .{},
+ chain: Mark.Chain.List = .{},
+ source: logger.Source,
+ flags: Flags.Set = Flags.commonmark,
+ allocator: std.mem.Allocator,
+ mdx: *MDX,
+ mark_char_map: [255]u1 = undefined,
+ doc_ends_with_newline: bool = false,
+ size: u32 = 0,
+
+ lines: BabyList(Line) = .{},
+ verbatim_lines: BabyList(Line.Verbatim) = .{},
+
+ containers: BabyList(Container) = .{},
+ blocks: BabyList(Block) = .{},
+ current_block: ?*Block = null,
+ current_block_index: u32 = 0,
+
+ code_fence_length: u32 = 0,
+ code_indent_offset: u32 = std.math.maxInt(u32),
+ last_line_has_list_loosening_effect: bool = false,
+ last_list_item_starts_with_two_blank_lines: bool = false,
+
+ pub const Flags = enum {
+ /// In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' '
+ collapse_whitespace,
+ /// Do not require space in ATX headers ( ###header )
+ permissive_atxheaders,
+ /// Recognize URLs as autolinks even without '<', '>'
+ permissive_url_autolinks,
+ /// Recognize e-mails as autolinks even without '<', '>' and 'mailto:'
+ permissive_email_autolinks,
+ /// Disable indented code blocks. (Only fenced code works.)
+ noindented_codeblocks,
+ /// Disable raw HTML blocks.
+ no_html_blocks,
+ /// Disable raw HTML (inline).
+ no_html_spans,
+ /// Enable tables extension.
+ tables,
+ /// Enable strikethrough extension.
+ strikethrough,
+ /// Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.')
+ permissive_www_autolinks,
+ /// Enable task list extension.
+ tasklists,
+ /// Enable $ and $$ containing LaTeX equations.
+ latex_mathspans,
+ /// Enable wiki links extension.
+ wikilinks,
+ /// Enable underline extension (and disables '_' for normal emphasis).
+ underline,
+
+ pub const Set = std.enums.EnumSet(Flags);
+ pub const permissive_autolinks = Set.init(.{ .permissive_email_autolinks = true, .permissive_url_autolinks = true });
+ pub const no_email = Set.init(.{ .no_html_blocks = true, .no_html_spans = true });
+ pub const github = Set.init(.{ .tables = true, .permissive_autolinks = true, .strikethrough = true, .tasklists = true });
+ pub const commonmark: i32 = Set{};
+ };
+
+ fn buildCharMap(this: *MDParser) void {
+ @memset(&this.mark_char_map, 0, this.mark_char_map.len);
+
+ this.mark_char_map['\\'] = 1;
+ this.mark_char_map['*'] = 1;
+ this.mark_char_map['_'] = 1;
+ this.mark_char_map['`'] = 1;
+ this.mark_char_map['&'] = 1;
+ this.mark_char_map[';'] = 1;
+ this.mark_char_map['<'] = 1;
+ this.mark_char_map['>'] = 1;
+ this.mark_char_map['['] = 1;
+ this.mark_char_map['!'] = 1;
+ this.mark_char_map[']'] = 1;
+ this.mark_char_map[0] = 1;
+
+ // whitespace
+ this.mark_char_map[' '] = 1;
+ this.mark_char_map['\t'] = 1;
+ this.mark_char_map['\r'] = 1;
+ this.mark_char_map['\n'] = 1;
+
+ // form feed
+ this.mark_char_map[0xC] = 1;
+ // vertical tab
+ this.mark_char_map[0xB] = 1;
+
+ if (this.flags.contains(.strikethrough)) {
+ this.mark_char_map['~'] = 1;
+ }
+
+ if (this.flags.contains(.latex_mathspans)) {
+ this.mark_char_map['$'] = 1;
+ }
+
+ if (this.flags.contains(.permissive_email_autolinks)) {
+ this.mark_char_map['@'] = 1;
+ }
+
+ if (this.flags.contains(.permissive_url_autolinks)) {
+ this.mark_char_map[':'] = 1;
+ }
+
+ if (this.flags.contains(.permissive_www_autolinks)) {
+ this.mark_char_map['.'] = 1;
+ }
+
+ if (this.flags.contains(.tables)) {
+ this.mark_char_map['.'] = 1;
+ }
+ }
+ pub fn init(allocator: std.mem.Allocator, source: logger.Source, flags: Flags.Set, mdx: *MDX) MDParser {
+ var parser = MDParser{
+ .allocator = allocator,
+ .source = source,
+ .flags = flags,
+ .mdx = mdx,
+ .size = @truncate(u32, source.contents.len),
+ };
+ parser.buildCharMap();
+ parser.doc_ends_with_newline = source.contents.len.len > 0 and source.contents[source.contents.len - 1] == '\n';
+ return parser;
+ }
+
+ fn startNewBlock(this: *MDParser, line: *const Line.Analysis) !void {
+ try this.blocks.push(
+ this.allocator,
+ Block{
+ .tag = switch (line.tag) {
+ .hr => Block.Tag.hr,
+ .atx_header, .setext_header => Block.Tag.h,
+ .fenced_code, .indented_code => Block.Tag.code,
+ .text => Block.Tag.p,
+ .html => Block.Tag.html,
+ else => unreachable,
+ },
+ .data = line.data,
+ .line_count = 0,
+ .line_offset = switch (line.tag) {
+ .indented_code, .html, .fenced_code => this.verbatim_lines.len,
+ else => this.lines.len,
+ },
+ },
+ );
+ }
+
+ inline fn charAt(this: *const MDParser, index: u32) u8 {
+ return this.source.contents[index];
+ }
+
+ inline fn isNewline(this: *const MDParser, index: u32) bool {
+ return switch (this.charAt(index)) {
+ '\n', '\r' => true,
+ else => false,
+ };
+ }
+
+ inline fn isAnyOf2(this: *const MDParser, index: u32, comptime first: u8, comptime second: u8) bool {
+ return isAnyOf2_(this.charAt(index), first, second);
+ }
+
+ inline fn isAnyOf2_(char: u8, comptime first: u8, comptime second: u8) bool {
+ return switch (char) {
+ first, second => true,
+ else => false,
+ };
+ }
+
+ inline fn isAnyOf(this: *const MDParser, index: u32, comptime values: []const u8) bool {
+ return isCharAnyOf(this.charAt(index), values);
+ }
+
+ inline fn isCharAnyOf(char: u8, comptime values: []const u8) bool {
+ inline for (values) |val| {
+ if (val == char) return true;
+ }
+ return false;
+ }
+
+ inline fn isBlank(char: u8) bool {
+ return isCharAnyOf(char, &[_]u8{ ' ', '\t' });
+ }
+
+ inline fn isWhitespace(char: u8) bool {
+ return isCharAnyOf(char, &[_]u8{ ' ', '\t', 0xC, 0xB });
+ }
+
+ pub fn getIndent(this: *MDParser, total_indent: u32, beg: u32, end: *u32) u32 {
+ var off = beg;
+ var indent = total_indent;
+ while (off < this.size and isBlank(this.charAt(off))) {
+ if (this.charAt(off) == '\t') {
+ indent = (indent + 4) & ~3;
+ } else {
+ indent += 1;
+ }
+ off += 1;
+ }
+ end.* = off;
+ return indent - total_indent;
+ }
+
+ pub fn isContainerMark(this: *MDParser, indent: u32, beg: u32, end: *u32, container: *Container) bool {
+ var off = beg;
+ var max_end: u32 = undefined;
+
+ if (off >= this.size or indent >= this.code_indent_offset)
+ return false;
+
+ if (this.charAt(off) == '>') {
+ off += 1;
+ container.ch = '>';
+ container.is_loose = false;
+ container.is_task = false;
+ container.mark_indent = indent;
+ container.contents_indent = indent + 1;
+ end.* = off;
+ return true;
+ }
+
+ // Check for list item bullet mark.
+ if (this.isAnyOf(off, "-+*") and (off + 1 >= this.size or isBlank(this.charAt(off + 1)) or this.isNewline(off + 1))) {
+ container.ch = this.charAt(off);
+ container.is_loose = false;
+ container.is_task = false;
+ container.mark_indent = indent;
+ container.contents_indent = indent + 1;
+ end.* = off + 1;
+ return true;
+ }
+
+ // Check for ordered list item marks
+ max_end = @minimum(off + 9, this.size);
+ container.start = 0;
+ while (off < max_end and std.ascii.isDigit(this.charAt(off))) {
+ container.start = container.start * 10 + (this.charAt(off) - '0');
+ off += 1;
+ }
+
+ if (off > beg and
+ off < this.size and
+ (this.isAnyOf2(off, '.', ')')) and
+ (off + 1 >= this.size or
+ this.isBlank(this.charAt(off + 1) or
+ this.isNewline(off + 1))))
+ {
+ container.ch = this.charAt(off);
+ container.is_loose = false;
+ container.is_task = false;
+ container.mark_indent = indent;
+ container.contents_indent = indent + off - beg + 1;
+ end.* = off + 1;
+ return true;
+ }
+
+ return false;
+ }
+
+ fn analyzeLine(this: *MDParser, beg: u32, end: *u32, pivot_line: *const Line.Analysis, line: *Line.Analysis) !void {
+ _ = this;
+ _ = beg;
+ _ = end;
+ _ = pivot_line;
+ _ = line;
+ var off = beg;
+ var hr_killer: u32 = 0;
+ var prev_line_has_list_loosening_effect = this.last_line_has_list_loosening_effect;
+ var container = Container{};
+ _ = hr_killer;
+ _ = prev_line_has_list_loosening_effect;
+ _ = container;
+ var total_indent: u32 = 0;
+ var n_parents: u32 = 0;
+ var n_brothers: u32 = 0;
+ var n_children: u32 = 0;
+
+ // Given the indentation and block quote marks '>', determine how many of
+ // the current containers are our parents.
+ while (n_parents < this.containers.len) {
+ var c: *Container = this.containers.ptr + n_parents;
+
+ if (c.ch == '>' and line.indent < this.code_indent_offset and off < this.size and this.charAt(off) == '>') {
+ off += 1;
+ total_indent += 1;
+ line.indent = this.getIndent(total_indent, off, &off);
+ total_indent += line.indent;
+
+ // The optional 1st space after '>' is part of the block quote mark.
+ line.indent -|= line.indent;
+ line.beg = off;
+ } else if (c.ch != '>' and line.indent >= c.contents_indent) {
+ line.indent -|= c.contents_indent;
+ } else {
+ break;
+ }
+
+ n_parents += 1;
+ }
+
+ if (off >= this.size or this.isNewline(off)) {
+ // Blank line does not need any real indentation to be nested inside a list
+ if (n_brothers + n_children == 0) {
+ while (n_parents < this.containers.len and this.containers.ptr[n_parents].ch == '>') {
+ n_parents += 1;
+ }
+ }
+ }
+
+ while (true) {
+ switch (pivot_line.tag) {
+ .fencedcode => {
+ // Check whether we are fenced code continuation.
+ line.beg = off;
+
+ // We are another MD_LINE_FENCEDCODE unless we are closing fence
+ // which we transform into MD_LINE_BLANK.
+ if (line.indent < this.code_indent_offset) {
+ if (this.isClosingCodeFence(this.charAt(pivot_line.beg), off, &off)) {
+ line.tag = .blank;
+ this.last_line_has_list_loosening_effect = false;
+ break;
+ }
+ }
+
+ // Change indentation accordingly to the initial code fence.
+ if (n_parents == this.containers.len) {
+ line.indent -|= pivot_line.indent;
+ line.tag = .fenced_code;
+ break;
+ }
+ },
+
+ .indentedcode => {},
+ .text => {},
+
+ .html => {},
+ else => {},
+ }
+
+ // Check for blank line.
+ if (off >= this.size or this.isNewline(off)) {
+ if (pivot_line.tag == .indented_code and n_parents == this.containers.len) {
+ line.tag = .indented_code;
+ line.indent -|= this.code_indent_offset;
+ this.last_line_has_list_loosening_effect = false;
+ } else {
+ line.tag = .blank;
+ this.last_line_has_list_loosening_effect = n_parents > 0 and
+ n_brothers + n_children == 0 and
+ this.containers.ptr[n_parents - 1].ch != '>';
+
+ // See https://github.com/mity/md4c/issues/6
+ //
+ // This ugly checking tests we are in (yet empty) list item but
+ // not its very first line (i.e. not the line with the list
+ // item mark).
+ //
+ // If we are such a blank line, then any following non-blank
+ // line which would be part of the list item actually has to
+ // end the list because according to the specification, "a list
+ // item can begin with at most one blank line."
+ //
+ if (n_parents > 0 and this.containers.ptr[n_parents - 1].ch != '>' and n_brothers + n_children == 0 and this.current_block == null and this.blocks.len > 0) {
+ var top_block = this.blocks.last().?;
+ if (top_block.tag == .li) {
+ this.last_list_item_starts_with_two_blank_lines = true;
+ }
+ }
+ }
+ break;
+ } else {
+ // This is the 2nd half of the hack. If the flag is set (i.e. there
+ // was a 2nd blank line at the beginning of the list item) and if
+ // we would otherwise still belong to the list item, we enforce
+ // the end of the list.
+ this.last_line_has_list_loosening_effect = false;
+ if (this.last_list_item_starts_with_two_blank_lines) {
+ if (n_parents > 0 and
+ this.containers.ptr[n_parents - 1].ch != '>' and
+ n_brothers + n_children == 0 and
+ this.current_block == null and this.blocks.len > 1)
+ {
+ var top = this.blocks.last().?;
+ if (top.tag == .li) {
+ n_parents -|= 1;
+ }
+ }
+ this.last_line_has_list_loosening_effect = true;
+ }
+ }
+
+ // Check whether we are Setext underline.
+ if (line.indent < this.code_indent_offset and
+ pivot_line.tag == .text and
+ off < this.size and
+ this.isAnyOf2(off, '=', '-') and
+ n_parents == this.containers.len)
+ {
+ var level: u4 = 0;
+ if (this.isSetextUnderline(off, &off, &level)) {
+ line.tag = .setext_underline;
+ line.data = level;
+ break;
+ }
+ }
+
+ // Check for a thematic break line
+ if (line.indent < this.code_indent_offset and off < this.size and off >= hr_killer and this.isAnyOf(off, "-_*")) {
+ if (this.isHRLine(off, &off, &hr_killer)) {
+ line.tag = .hr;
+ break;
+ }
+ }
+
+ // Check for "brother" container. I.e. whether we are another list item
+ //in already started list.
+ if (n_parents < this.containers.len and n_brothers + n_children == 0) {
+ var tmp: u32 = undefined;
+
+ if (this.isContainerMark(line.indent, off, &tmp, &container) and
+ isContainerCompatible(&this.containers.ptr[n_parents], &container))
+ {
+ pivot_line.* = Line.Analysis.blank;
+ off = tmp;
+
+ total_indent += container.contents_indent - container.mark_indent;
+ line.indent = this.getIndent(total_indent, off, &off);
+ total_indent += line.indent;
+ line.beg = off;
+
+ // Some of the following whitespace actually still belongs to the mark.
+ if (off >= this.size or this.isNewline(off)) {
+ container.contents_indent += 1;
+ } else if (line.indent <= this.code_indent_offset) {
+ container.contents_indent += line.indent;
+ line.indent = 0;
+ } else {
+ container.contents_indent += 1;
+ line.indent -= 1;
+ }
+
+ this.containers.ptr[n_parents].mark_indent = container.mark_indent;
+ this.containers.ptr[n_parents].contents_indent = container.contents_indent;
+ n_brothers += 1;
+ continue;
+ }
+ }
+
+ // Check for indented code
+ // Note: indented code block cannot interrupt a paragrpah
+ if (line.indent >= this.code_indent_offset and
+ (pivot_line.tag == .blank or
+ pivot_line.tag == .indented_code))
+ {
+ line.tag = .indented_code;
+ std.debug.assert(line.indent >= this.code_indent_offset);
+ line.indent -|= this.code_indent_offset;
+ line.data = 0;
+ break;
+ }
+
+ // Check for start of a new container block
+ if (line.indent < this.code_indent_offset and
+ this.isContainerMark(line.indent, off, &off, &container))
+ {
+ if (pivot_line.tag == .text and
+ n_parents == this.n_containers and
+ (off >= this.size or this.isNewline(off)) and
+ container.ch != '>')
+ {
+ // Noop. List mark followed by a blank line cannot interrupt a paragraph.
+ } else if (pivot_line.tag == .text and
+ n_parents == this.containers.len and
+ isAnyOf2_(container.ch, '.', ')'))
+ {
+ // Noop. Ordered list cannot interrupt a paragraph unless the start index is 1.
+ } else {
+ total_indent += container.contents_indent - container.mark_indent;
+ line.indent = this.getIndent(total_indent, off, &off);
+ total_indent += line.indent;
+
+ line.beg = off;
+ line.data = container.ch;
+
+ // Some of the following whitespace actually still belongs to the mark.
+ if (off >= this.size or this.isNewline(off)) {
+ container.contents_indent += 1;
+ } else if (line.indent <= this.code_indent_offset) {
+ container.contents_indent += line.indent;
+ line.indent = 0;
+ } else {
+ container.contents_indent += 1;
+ line.indent -= 1;
+ }
+
+ if (n_brothers + n_children == 0) {
+ pivot_line.* = Line.Analysis.blank;
+ }
+
+ if (n_children == 0) {
+ try this.leaveChildContainers(n_parents + n_brothers);
+ }
+
+ n_children += 1;
+ try this.pushContainer(container);
+ continue;
+ }
+ }
+
+ // heck whether we are table continuation.
+ if (pivot_line.tag == .table and n_parents == this.n_containers) {
+ line.tag = .table;
+ break;
+ }
+
+ // heck for ATX header.
+ if (line.indent < this.code_indent_offset and off < this.size and this.isAnyOf(off, '#')) {
+ var level: u4 = 0;
+ if (this.isATXHeaderLine(off, &line.beg, &off, &level)) {
+ line.tag = .atx_header;
+ line.data = level;
+ break;
+ }
+ }
+
+ // Check whether we are starting code fence.
+ if (off < this.size and this.isAnyOf2(off, '`', '~')) {
+ if (this.isOpeningCodeFence(off, &off)) {
+ line.tag = .fenced_code;
+ line.data = 1;
+ break;
+ }
+ }
+
+ // Check for start of raw HTML block.
+ if (off < this.size and !this.flags.contains(.no_html_blocks) and this.charAt(off) == '<') {}
+
+ // Check for table underline.
+ if (this.flags.contains(.tables) and pivot_line.tag == .text and off < this.size and this.isAnyOf(off, "|-:") and n_parents == this.containers.len) {
+ var col_count: u32 = undefined;
+
+ if (this.current_block != null and this.current_block.?.line_count == 1 and this.isTableUnderline(off, &off, &col_count)) {
+ line.data = col_count;
+ line.tag = .table_underline;
+ break;
+ }
+ }
+
+ // By default, we are normal text line.
+ line.tag = .text;
+ if (pivot_line.tag == .text and n_brothers + n_children == 0) {
+ // lazy continuation
+ n_parents = this.containers.len;
+ }
+
+ // Check for task mark.
+ if (this.flags.contains(.tasklists) and
+ n_brothers + n_children > 0 and
+ off < this.size and
+ isCharAnyOf(this.containers.last().?.ch, "-+*.)"))
+ {
+ var tmp: u32 = off;
+
+ while (tmp < this.size and tmp < off + 3 and isBlank(tmp)) {
+ tmp += 1;
+ }
+
+ if ((tmp + 2 < this.size and
+ this.charAt(tmp) == '[' and
+ this.isAnyOf(tmp + 1, "xX ") and
+ this.charAt(tmp + 2) == ']') and
+ (tmp + 3 == this.size or
+ isBlank(this.charAt(tmp + 3)) or
+ this.isNewline(tmp + 3)))
+ {
+ var task_container: *Container = if (n_children > 0) this.containers.last().? else &container;
+ task_container.is_task = true;
+ task_container.task_mark_off = tmp + 1;
+ off = tmp + 3;
+ while (off < this.size and isWhitespace(this.charAt(off))) {
+ off += 1;
+ }
+ if (off == this.size) break;
+ line.beg = off;
+ }
+ }
+
+ break;
+ }
+
+ // Scan for end of the line.
+ while (off + 3 < this.size and
+ !(strings.eqlComptimeIgnoreLen(this.source.contents.ptr[off..][0..4], "\n\n\n\n") or
+ strings.eqlComptimeIgnoreLen(this.source.contents.ptr[off..][0..4], "\r\n\r\n")))
+ {
+ off += 4;
+ }
+
+ while (off < this.size and !this.isNewline(off)) {
+ off += 1;
+ }
+
+ // Set end of line
+ line.end = off;
+
+ // ut for ATX header, we should exclude the optional trailing mark.
+ if (line.type == .atx_header) {
+ var tmp = line.end;
+ while (tmp > line.beg and this.charAt(tmp - 1) == ' ') {
+ tmp -= 1;
+ }
+
+ while (tmp > line.beg and this.charAt(tmp - 1) == '#') {
+ tmp -= 1;
+ }
+
+ if (tmp == line.beg or this.charAt(tmp - 1) == ' ' or this.flags.contains(.permissive_atxheaders)) {
+ line.end = tmp;
+ }
+ }
+
+ // Trim trailing spaces.
+ switch (line.tag) {
+ .indented_code, .fenced_code => {},
+ else => {
+ while (line.end > line.beg and this.charAt(line.end - 1) == ' ') {
+ line.end -= 1;
+ }
+ },
+ }
+
+ // Eat also the new line
+ if (off < this.size and this.charAt(off) == '\r') {
+ off += 1;
+ }
+
+ if (off < this.size and this.charAt(off) == '\n') {
+ off += 1;
+ }
+
+ end.* = off;
+
+ // If we belong to a list after seeing a blank line, the list is loose.
+ if (prev_line_has_list_loosening_effect and line.tag != .blank and n_parents + n_brothers > 0) {
+ var c: *Container = this.containers.ptr[n_parents + n_brothers - 1];
+ if (c.ch != '>') {
+ var block: *Block = this.blocks.ptr[c.block_index];
+ block.flags.insert(.loose_list);
+ }
+ }
+
+ // Leave any containers we are not part of anymore.
+ if (n_children == 0 and n_parents + n_brothers < this.containers.len) {
+ try this.leaveChildContainers(n_parents + n_brothers);
+ }
+
+ // Enter any container we found a mark for
+ if (n_brothers > 0) {
+ std.debug.assert(n_brothers == 0);
+ try this.pushContainerBytes(
+ Block.Tag.li,
+ this.containers.ptr[n_parents].task_mark_off,
+ if (this.containers.ptr[n_parents].is_task) this.charAt(this.containers.ptr[n_parents].task_mark_off) else 0,
+ Block.Flags.container_closer,
+ );
+ try this.pushContainerBytes(
+ Block.Tag.li,
+ container.task_mark_off,
+ if (container.is_task) this.charAt(container.task_mark_off) else 0,
+ Block.Flags.container_opener,
+ );
+ this.containers.ptr[n_parents].is_task = container.is_task;
+ this.containers.ptr[n_parents].task_mark_off = container.task_mark_off;
+ }
+
+ if (n_children > 0) {
+ try this.enterChildContainers(n_children);
+ }
+ }
+ fn processLine(this: *MDParser, p_pivot_line: **const Line.Analysis, line: *Line.Analysis) !void {
+ var pivot_line = p_pivot_line.*;
+
+ switch (line.tag) {
+ .blank => {
+ // Blank line ends current leaf block.
+ try this.endCurrentBlock();
+ p_pivot_line.* = Line.Analysis.blank;
+ },
+ .hr, .atx_header => {
+ try this.endCurrentBlock();
+
+ // Add our single-line block
+ try this.startNewBlock(line);
+ try this.addLineIntoCurrentBlock(line);
+ try this.endCurrentBlock();
+ p_pivot_line.* = &Line.Analysis.blank;
+ },
+ .setext_underline => {
+ this.current_block.?.tag = .table;
+ this.current_block.?.data = line.data;
+ this.current_block.?.flags.insert(.setext_header);
+ try this.addLineIntoCurrentBlock(line);
+ try this.endCurrentBlock();
+ if (this.current_block == null) {
+ p_pivot_line.* = &Line.Analysis.blank;
+ } else {
+ // This happens if we have consumed all the body as link ref. defs.
+ //and downgraded the underline into start of a new paragraph block.
+ line.tag = .text;
+ p_pivot_line.* = line;
+ }
+ },
+ // MD_LINE_TABLEUNDERLINE changes meaning of the current block.
+ .table_underline => {
+ var current_block = this.current_block.?;
+ std.debug.assert(current_block.line_count == 1);
+ current_block.tag = .table;
+ current_block.data = line.data;
+ std.debug.assert(pivot_line != &Line.Analysis.blank);
+ @intToPtr(*Line.Analysis, @ptrToInt(p_pivot_line.*)).tag = .table;
+ try this.addLineIntoCurrentBlock(line);
+ },
+ else => {
+ // The current block also ends if the line has different type.
+ if (line.tag != pivot_line.tag) {
+ try this.endCurrentBlock();
+ }
+
+ // The current line may start a new block.
+ if (this.current_block == null) {
+ try this.startNewBlock(line);
+ p_pivot_line.* = line;
+ }
+
+ // In all other cases the line is just a continuation of the current block.
+ try this.addLineIntoCurrentBlock(line);
+ },
+ }
+ }
+ fn consumeLinkReferenceDefinitions(this: *MDParser) !void {
+ _ = this;
+ }
+ fn addLineIntoCurrentBlock(this: *MDParser, analysis: *const Line.Analysis) !void {
+ var current_block = this.current_block.?;
+
+ switch (current_block.tag) {
+ .code, .html => {
+ if (current_block.line_count > 0)
+ std.debug.assert(
+ this.verbatim_lines.len == current_block.line_count + current_block.line_offset,
+ );
+ if (current_block.line_count == 0) {
+ current_block.line_offset = this.verbatim_lines.len;
+ }
+
+ try this.verbatim_lines.push(this.allocator, Line.Verbatim{
+ .indent = analysis.indent,
+ .line = .{
+ .beg = analysis.beg,
+ .end = analysis.end,
+ },
+ });
+ },
+ else => {
+ if (current_block.line_count > 0)
+ std.debug.assert(
+ this.lines.len == current_block.line_count + current_block.line_offset,
+ );
+ if (current_block.line_count == 0) {
+ current_block.line_offset = this.lines.len;
+ }
+ this.lines.push(this.allocator, .{ .beg = analysis.beg, .end = analysis.end });
+ },
+ }
+
+ current_block.line_count += 1;
+ }
+ fn endCurrentBlock(this: *MDParser) !void {
+ _ = this;
+
+ var block = this.current_block orelse return;
+ // Check whether there is a reference definition. (We do this here instead
+ // of in md_analyze_line() because reference definition can take multiple
+ // lines.) */
+ if ((block.tag == .p or block.tag == .h) and block.flags.contains(.setext_header)) {
+ var lines = block.lines(this.lines);
+ if (lines[0].beg == '[') {
+ try this.consumeLinkReferenceDefinitions();
+ block = this.current_block orelse return;
+ }
+ }
+
+ if (block.tag == .h and block.flags.contains(.setext_header)) {
+ var n_lines = block.line_count;
+ if (n_lines > 1) {
+ // get rid of the underline
+ if (this.lines.len == block.line_count + block.line_offset) {
+ this.lines.len -= 1;
+ }
+ block.line_count -= 1;
+ } else {
+ // Only the underline has left after eating the ref. defs.
+ // Keep the line as beginning of a new ordinary paragraph. */
+ block.tag = .p;
+ }
+ }
+
+ // Mark we are not building any block anymore.
+ this.current_block = null;
+ this.current_block_index -|= 1;
+ }
+ fn buildRefDefHashTable(this: *MDParser) !void {
+ _ = this;
+ }
+ fn leaveChildContainers(this: *MDParser, keep: u32) !void {
+ _ = this;
+ while (this.containers.len > keep) {
+ var c = this.containers.last().?;
+ var is_ordered_list = false;
+ switch (c.ch) {
+ ')', '.' => {
+ is_ordered_list = true;
+ },
+ '-', '+', '*' => {
+ try this.pushContainerBytes(
+ Block.Tag.li,
+ c.task_mark_off,
+ if (c.is_task) this.charAt(c.task_mark_off) else 0,
+ Block.Flags.container_closer,
+ );
+ try this.pushContainerBytes(
+ if (is_ordered_list) Block.Tag.ol else Block.Tag.ul,
+ c.ch,
+ if (c.is_task) this.charAt(c.task_mark_off) else 0,
+ Block.Flags.container_closer,
+ );
+ },
+ '>' => {
+ try this.pushContainerBytes(
+ Block.Tag.quote,
+ 0,
+ 0,
+ Block.Flags.container_closer,
+ );
+ },
+ else => unreachable,
+ }
+
+ this.containers.len -= 1;
+ }
+ }
+ fn enterChildContainers(this: *MDParser, keep: u32) !void {
+ _ = this;
+ var i: u32 = this.containers.len - keep;
+ while (i < this.containers.len) : (i += 1) {
+ var c: *Container = this.containers.ptr[i];
+ var is_ordered_list = false;
+
+ switch (c.ch) {
+ ')', '.' => {
+ is_ordered_list = true;
+ },
+ '-', '+', '*' => {
+ // Remember offset in ctx.block_bytes so we can revisit the
+ // block if we detect it is a loose list.
+ try this.endCurrentBlock();
+ c.block_index = this.blocks.len;
+
+ try this.pushContainerBytes(
+ if (is_ordered_list) Block.Tag.ol else Block.Tag.ul,
+ c.start,
+ c.ch,
+ Block.Flags.container_opener,
+ );
+ try this.pushContainerBytes(
+ Block.Tag.li,
+ c.task_mark_off,
+ if (c.is_task) this.charAt(c.task_mark_off) else 0,
+ Block.Flags.container_opener,
+ );
+ },
+ '>' => {
+ try this.pushContainerBytes(
+ Block.Tag.quote,
+ 0,
+ 0,
+ Block.Flags.container_opener,
+ );
+ },
+ else => unreachable,
+ }
+ }
+ }
+ fn pushContainer(this: *MDParser, container: Container) !void {
+ try this.containers.push(this.allocator, container);
+ }
+
+ fn processLeafBlock(this: *MDParser, comptime tag: Block.Tag, block: *Block) anyerror!void {
+ const BlockDetailType = comptime switch (tag) {
+ Block.Tag.h => Block.Header,
+ Block.Tag.code => Block.Code,
+ Block.Tag.table => Block.Table,
+ };
+
+ const is_in_tight_list = if (this.containers.len == 0)
+ false
+ else
+ !this.containers.ptr[this.containers.len - 1].is_loose;
+
+ const detail: BlockDetailType = switch (comptime tag) {
+ Block.Tag.h => @truncate(Block.Header, block.data),
+ Block.Tag.code => try this.setupFencedCodeDetail(block),
+ Block.Tag.table => .{
+ .col_count = block.data,
+ .head_row_count = 1,
+ .body_row_count = block.line_count -| 2,
+ },
+ else => void{},
+ };
+
+ if (!is_in_tight_list or comptime tag != .p) {
+ try this.mdx.onEnterBlock(block.tag, BlockDetailType, detail);
+ }
+
+ defer {
+ if (comptime tag == Block.Tag.code) {}
+ }
+ }
+
+ fn pushContainerBytes(this: *MDParser, block_type: Block.Tag, start: u32, data: u32, flag: Block.Flags) !void {
+ try this.endCurrentBlock();
+ var block = Block{
+ .tag = block_type,
+ .line_count = start,
+ .data = data,
+ };
+ block.flags.insert(flag);
+ var prev_block: ?Block = null;
+ if (this.current_block) |curr| {
+ prev_block = curr.*;
+ }
+
+ try this.blocks.push(this.allocator, block);
+ if (prev_block != null) {
+ this.current_block = this.blocks.ptr[this.current_block_index];
+ }
+ }
+ fn processBlock(this: *MDParser, comptime tag: Block.Tag, block: *Block) !void {
+ const detail: Block.Detail =
+ switch (comptime tag) {
+ .ul => Block.Detail{
+ .ul = .{
+ .is_tight = !block.flags.contains(.loose_list),
+ .mark = @truncate(u8, block.data),
+ },
+ },
+ .ol => Block.Detail{
+ .ol = .{
+ .start = block.line_count,
+ .is_tight = !block.flags.contains(.loose_list),
+ .mark_delimiter = @truncate(u8, block.data),
+ },
+ },
+ .li => Block.Detail{
+ .li = .{
+ .is_task = block.data != 0,
+ .task_mark = @truncate(u8, block.data),
+ .task_mark_offset = @intCast(u32, block.line_count),
+ },
+ },
+ else => Block.Detail{ .none = .{} },
+ };
+
+ if (block.flags.contains(.container)) {
+ if (block.flags.contains(.container_closer)) {
+ switch (block.tag) {
+ .li => try this.mdx.onLeaveBlock(tag, Block.LI, detail.li),
+ .ul => try this.mdx.onLeaveBlock(tag, Block.UL, detail.ul),
+ .ol => try this.mdx.onLeaveBlock(tag, Block.OL, detail.ol),
+ else => try this.mdx.onLeaveBlock(block.tag, void, void{}),
+ }
+ this.containers.len -|= switch (block.tag) {
+ .ul, .ol, .blockquote => 1,
+ else => 0,
+ };
+ }
+
+ if (block.flags.contains(.container_opener)) {
+ switch (comptime tag) {
+ .li => try this.mdx.onEnterBlock(tag, Block.LI, detail.li),
+ .ul => try this.mdx.onEnterBlock(tag, Block.UL, detail.ul),
+ .ol => try this.mdx.onEnterBlock(tag, Block.OL, detail.ol),
+ else => try this.mdx.onEnterBlock(block.tag, void, void{}),
+ }
+
+ switch (comptime tag) {
+ .ul, .ol => {
+ this.containers.ptr[this.containers.len].is_loose = block.flags.contains(.loose_list);
+ this.containers.len += 1;
+ },
+ .blockquote => {
+ // This causes that any text in a block quote, even if
+ // nested inside a tight list item, is wrapped with
+ // <p>...</p>. */
+ this.containers.ptr[this.containers.len].is_loose = true;
+ this.containers.len += 1;
+ },
+ else => {},
+ }
+ }
+ } else {
+ try this.processLeafBlock(tag, block);
+ }
+ }
+ fn processAllBlocks(this: *MDParser) !void {
+ _ = this;
+
+ // ctx->containers now is not needed for detection of lists and list items
+ // so we reuse it for tracking what lists are loose or tight. We rely
+ // on the fact the vector is large enough to hold the deepest nesting
+ // level of lists.
+ this.containers.len = 0;
+ var blocks = this.blocks.slice();
+ for (blocks) |*block| {}
+ }
+ fn isContainerCompatible(pivot: *const Container, container: *const Container) bool {
+ // Block quote has no "items" like lists.
+ if (container.ch == '>') return false;
+
+ if (container.ch != pivot.ch)
+ return false;
+
+ if (container.mark_indent > pivot.contents_indent)
+ return false;
+ return true;
+ }
+
+ fn isHRLine(this: *MDParser, beg: u32, end: *u32, hr_killer: *u32) bool {
+ var off = beg + 1;
+ var n: u32 = 1;
+
+ while (off < this.size and (this.charAt(off) == this.charAt(beg) or this.charAt(off) == ' ' or this.charAt(off) == '\t')) {
+ if (this.charAt(off) == this.charAt(beg))
+ n += 1;
+ off += 1;
+ }
+
+ if (n < 3) {
+ hr_killer.* = off;
+ return false;
+ }
+
+ // Nothing else can be present on the line. */
+ if (off < this.size and !this.isNewline(off)) {
+ hr_killer.* = off;
+ return false;
+ }
+
+ end.* = off;
+ return true;
+ }
+
+ fn isSetextUnderline(this: *MDParser, beg: u32, end: *u32, level: *u4) bool {
+ var off = beg + 1;
+ while (off < this.size and this.charAt(off) == this.charAt(beg))
+ off += 1;
+
+ // Optionally, space(s) can follow. */
+ while (off < this.size and this.charAt(off) == ' ')
+ off += 1;
+
+ // But nothing more is allowed on the line.
+ if (off < this.size and !this.isNewline(off))
+ return false;
+ level.* = if (this.charAt(beg) == '=') 1 else 2;
+ end.* = off;
+ return true;
+ }
+
+ fn isATXHeaderLine(this: *MDParser, beg: u32, p_beg: *u32, end: *u32, level: *u4) bool {
+ var n: i32 = undefined;
+ var off: u32 = beg + 1;
+
+ while (off < this.size and this.charAt(off) == '#' and off - beg < 7) {
+ off += 1;
+ }
+ n = off - beg;
+
+ if (n > 6)
+ return false;
+ level.* = @intCast(u4, n);
+
+ if (!(this.flags.contains(.permissive_atxheaders)) and off < this.size and
+ this.charAt(off) != ' ' and this.charAt(off) != '\t' and !this.isNewline(off))
+ return false;
+
+ while (off < this.size and this.charAt(off) == ' ') {
+ off += 1;
+ }
+
+ p_beg.* = off;
+ end.* = off;
+
+ return true;
+ }
+
+ fn isTableUnderline(this: *MDParser, beg: u32, end: *u32, column_column: *u32) bool {
+ _ = this;
+ _ = end;
+ _ = column_column;
+
+ var off = beg;
+ var found_pipe = false;
+ var col_count: u32 = 0;
+
+ if (off < this.size and this.charAt(off) == '|') {
+ found_pipe = true;
+ off += 1;
+ while (off < this.size and isWhitespace(this.charAt(off))) {
+ off += 1;
+ }
+ }
+
+ while (true) {
+ var delimited = false;
+
+ // Cell underline ("-----", ":----", "----:" or ":----:")if(off < this.size and this.charAt(off) == _T(':'))
+ off += 1;
+ if (off >= this.size or this.charAt(off) != '-')
+ return false;
+ while (off < this.size and this.charAt(off) == '-')
+ off += 1;
+ if (off < this.size and this.charAt(off) == ':')
+ off += 1;
+
+ col_count += 1;
+
+ // Pipe delimiter (optional at the end of line). */
+ while (off < this.size and isWhitespace(this.charAt(off)))
+ off += 1;
+ if (off < this.size and this.charAt(off) == '|') {
+ delimited = true;
+ found_pipe = true;
+ off += 1;
+ while (off < this.size and isWhitespace(this.charAt(off)))
+ off += 1;
+ }
+
+ // Success, if we reach end of line.
+ if (off >= this.size or this.isNewline(off))
+ break;
+
+ if (!delimited)
+ return false;
+ }
+
+ if (!found_pipe)
+ return false;
+
+ column_column.* = col_count;
+ end.* = off;
+ return true;
+ }
+
+ fn isOpeningCodeFence(this: *MDParser, beg: u8, end: *u32) bool {
+ var off = beg;
+ const first = this.charAt(beg);
+
+ while (off < this.size and this.charAt(off) == first) {
+ off += 1;
+ }
+
+ // Fence must have at least three characters.
+ if (off - beg < 3)
+ return false;
+
+ // Optionally, space(s) can follow
+ while (off < this.size and this.charAt(off) == ' ') {
+ off += 1;
+ }
+
+ // Optionally, an info string can follow.
+ while (off < this.size and !this.isNewline(this.charAt(off))) {
+ // Backtick-based fence must not contain '`' in the info string.
+ if (first == '`' and this.charAt(off) == '`')
+ return false;
+ off += 1;
+ }
+
+ end.* = off;
+ return true;
+ }
+
+ fn isClosingCodeFence(this: *MDParser, ch: u8, beg: u8, end: *u32) bool {
+ var off = beg;
+
+ defer {
+ end.* = off;
+ }
+
+ while (off < this.size and this.charAt(off) == ch) {
+ off += 1;
+ }
+
+ if (off - beg < this.code_fence_length) {
+ return false;
+ }
+
+ // Optionally, space(s) can follow
+ while (off < this.size and this.charAt(off) == ' ') {
+ off += 1;
+ }
+
+ // But nothing more is allowed on the line.
+ if (off < this.size and !this.isNewline(this.charAt(off)))
+ return false;
+
+ return true;
+ }
+
+ pub fn parse(this: *MDParser) anyerror!void {
+ var pivot_line = &Line.Analysis.blank;
+ var line_buf: [2]Line.Analysis = undefined;
+ var line = &line_buf[0];
+ var offset: u32 = 0;
+
+ try this.mdx.onEnterBlock(.doc, void, void{});
+
+ const len: u32 = this.size;
+ while (offset < len) {
+ if (line == pivot_line) {
+ line = if (line == &line_buf[0]) &line_buf[1] else &line_buf[0];
+ }
+
+ try this.analyzeLine(offset, &offset, pivot_line, line);
+ try this.processLine(&pivot_line, line);
+ }
+
+ this.endCurrentBlock();
+
+ try this.buildRefDefHashTable();
+
+ this.leaveChildContainers(0);
+ this.processAllBlocks();
+ try this.mdx.onLeaveBlock(.doc, void, void{});
+ }
+};
+
+pub const MDX = struct {
+ parser: JSParser,
+ log: *logger.Log,
+ allocator: std.mem.Allocator,
+ stmts: std.ArrayListUnmanaged(js_ast.Stmt) = .{},
+
+ pub const Options = struct {};
+
+ pub fn onEnterBlock(this: *MDX, tag: Block.Tag, comptime Detail: type, detail: Detail) anyerror!void {
+ _ = tag;
+ _ = detail;
+ _ = this;
+ }
+
+ pub fn onLeaveBlock(this: *MDX, tag: Block.Tag, comptime Detail: type, detail: Detail) anyerror!void {
+ _ = tag;
+ _ = detail;
+ _ = this;
+ }
+
+ pub fn onEnterSpan(this: *MDX, tag: Span.Tag, comptime Detail: type, detail: Detail) anyerror!void {
+ _ = tag;
+ _ = detail;
+ _ = this;
+ }
+
+ pub fn onLeaveSpan(this: *MDX, tag: Span.Tag, comptime Detail: type, detail: Detail) anyerror!void {
+ _ = tag;
+ _ = detail;
+ _ = this;
+ }
+
+ pub fn onText(this: *MDX, tag: Text, text: []const u8) anyerror!void {
+ _ = tag;
+ _ = text;
+ _ = this;
+ }
+
+ pub inline fn source(p: *const MDX) *const logger.Source {
+ return &p.lexer.source;
+ }
+
+ pub fn e(_: *MDX, t: anytype, loc: logger.Loc) Expr {
+ const Type = @TypeOf(t);
+ if (@typeInfo(Type) == .Pointer) {
+ return Expr.init(std.meta.Child(Type), t.*, loc);
+ } else {
+ return Expr.init(Type, t, loc);
+ }
+ }
+
+ pub fn s(_: *MDX, t: anytype, loc: logger.Loc) Stmt {
+ const Type = @TypeOf(t);
+ if (@typeInfo(Type) == .Pointer) {
+ return Stmt.init(std.meta.Child(Type), t.*, loc);
+ } else {
+ return Stmt.alloc(Type, t, loc);
+ }
+ }
+
+ pub fn setup(
+ this: *MDX,
+ _options: ParserOptions,
+ log: *logger.Log,
+ source_: *const logger.Source,
+ define: *Define,
+ allocator: std.mem.Allocator,
+ ) !void {
+ try JSParser.init(
+ allocator,
+ log,
+ source_,
+ define,
+ js_lexer.Lexer.initNoAutoStep(log, source_.*, allocator),
+ _options,
+ &this.parser,
+ );
+ this.lexer = try Lexer.init(&this.parser.lexer);
+ this.allocator = allocator;
+ this.log = log;
+ this.stmts = .{};
+ }
+
+ pub fn parse(this: *MDX) !js_ast.Result {
+ try this._parse();
+ return try runVisitPassAndFinish(JSParser, &this.parser, this.stmts.toOwnedSlice(this.allocator));
+ }
+
+ fn run(this: *MDX) anyerror!logger.Loc {
+ _ = this;
+ return logger.Loc.Empty;
+ }
+
+ fn _parse(this: *MDX) anyerror!void {
+ var root_children = std.ArrayListUnmanaged(Expr){};
+ var first_loc = try run(this, &root_children);
+
+ first_loc.start = @maximum(first_loc.start, 0);
+ const args_loc = first_loc;
+ first_loc.start += 1;
+ const body_loc = first_loc;
+
+ // We need to simulate a function that was parsed
+ _ = try this.parser.pushScopeForParsePass(.function_args, args_loc);
+
+ _ = try this.parser.pushScopeForParsePass(.function_body, body_loc);
+
+ const root = this.e(E.JSXElement{
+ .tag = this.e(E.JSXElement.Tag.map.get(E.JSXElement.Tag.main), body_loc),
+ .children = ExprNodeList.fromList(root_children),
+ }, body_loc);
+
+ var root_stmts = try this.allocator.alloc(Stmt, 1);
+ root_stmts[0] = this.s(S.Return{ .value = root }, body_loc);
+
+ try this.stmts.append(
+ this.allocator,
+
+ this.s(S.ExportDefault{
+ .default_name = try this.parser.createDefaultName(args_loc),
+ .value = .{
+ .expr = this.e(E.Arrow{
+ .body = G.FnBody{
+ .stmts = root_stmts,
+ .loc = body_loc,
+ },
+ .args = &[_]G.Arg{},
+ .prefer_expr = true,
+ }, args_loc),
+ },
+ }, args_loc),
+ );
+ }
+};