...

(for levels up to 6) ///Detail: Structure h_detail. h, ///

...

///Note the text lines within code blocks are terminated with '\n' ///instead of explicit MD_TEXT_BR. code, /// Raw HTML block. This itself does not correspond to any particular HTML ///tag. The contents of it _is_ raw HTML source intended to be put ///in verbatim form to the HTML output. html, ///

...

p, /// ...

and its contents. ///Detail: Structure table_detail (for table), /// structure td_detail (for th and td) ///Note all of these are used only if extension MD_FLAG_TABLES is enabled. table, thead, tbody, tr, th, td, }; pub const UL = struct { tight: bool = false, mark: u8 = '*', }; pub const OL = struct { start: u32 = 0, tight: bool = false, mark: u8 = '*', }; pub const LI = struct { /// Can be non-zero only with MD_FLAG_TASKLISTS task: bool = false, /// is_task, then one of 'x', 'X' or ' '. Undefined otherwise. task_mark: u8 = 'x', /// If is_task, then offset in the input of the char between '[' and ']'. task_mark_off: u32 = 0, }; pub const Header = u4; pub const Code = struct { info: Attribute = .{}, lang: Attribute = .{}, /// character used for fenced code block; or zero for indented code block. * fence: u8 = '`', }; pub const Table = struct { /// Count of columns in the table. column_count: u32 = 0, /// Count of rows in the table header (currently always 1) head_row_count: u32 = 1, /// Count of rows in the table body body_row_count: u32 = 0, }; pub const Detail = union { none: void, ul: UL, ol: OL, li: LI, }; pub const TD = struct { alignment: Align = Align.default, }; }; pub const Span = struct { pub const Tag = enum { /// ... em, /// ... strong, /// ... /// Detail: Structure a_detail. a, ///

... /// Detail: Structure img_detail. /// Note: Image text can contain nested spans and even nested images. /// If rendered into ALT attribute of HTML tag, it's responsibility /// of the parser to deal with it. img, /// ... code, /// ~~...~~ /// Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. del, /// For recognizing inline ($) and display ($$) equations /// Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled. latexmath, latexmath_display, /// Wiki links /// Note: Recognized only when MD_FLAG_WIKILINKS is enabled. wikilink, /// ... /// Note: Recognized only when MD_FLAG_UNDERLINE is enabled. u, }; pub const Link = struct { src: Attribute = .{}, title: Attribute = .{}, }; pub const Image = Link; pub const Wikilink = struct { target: Attribute = .{}, }; }; pub const Text = enum { /// Normal text. normal, /// NULL character. CommonMark requires replacing NULL character with /// the replacement char U+FFFD, so this allows caller to do that easily. nullchar, /// Line breaks. /// Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE /// or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. ///
(hard break) br, /// '\n' in source text where it is not semantically meaningful (soft break) softbr, /// Entity. /// (a) Named entity, e.g. /// (Note MD4C does not have a list of known entities. /// Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is /// treated as a named entity.) /// (b) Numerical entity, e.g. Ӓ /// (c) Hexadecimal entity, e.g. ካ /// /// As MD4C is mostly encoding agnostic, application gets the verbatim /// entity text into the MD_PARSER::text_callback(). entity, /// Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`). /// If it is inside MD_BLOCK_CODE, it includes spaces for indentation and /// '\n' for new lines. br and softbr are not sent for this /// kind of text. code, /// Text is a raw HTML. If it is contents of a raw HTML block (i.e. not /// an inline raw HTML), then br and softbr are not used. /// The text contains verbatim '\n' for the new lines. html, /// Text is inside an equation. This is processed the same way as inlined code /// spans (`code`). latexmath, }; pub const Align = enum(u3) { default = 0, left = 1, center = 2, right = 3, }; /// String attribute. /// /// This wraps strings which are outside of a normal text flow and which are /// propagated within various detailed structures, but which still may contain /// string portions of different types like e.g. entities. /// /// So, for example, lets consider this image: /// /// ![image alt text](http://example.org/image.png 'foo " bar') /// /// The image alt text is propagated as a normal text via the MD_PARSER::text() /// callback. However, the image title ('foo " bar') is propagated as /// MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title. /// /// Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following: /// -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0) /// -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4) /// -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10) /// -- [3]: (n/a) (n/a ; substr_offsets[3] == 14) /// /// Note that these invariants are always guaranteed: /// -- substr_offsets[0] == 0 /// -- substr_offsets[LAST+1] == size /// -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR /// substrings can appear. This could change only of the specification /// changes. /// pub const Attribute = struct { text: []const u8 = "", substring: Substring.List = .{}, }; pub const Substring = struct { offset: u32, tag: Text, pub const List = std.MultiArrayList(Substring); pub const ListPool = ObjectPool(List); }; pub const Mark = struct { position: Ref = Ref{}, prev: u32 = std.math.maxInt(u32), next: u32 = std.math.maxInt(u32), ch: u8 = 0, flags: u16 = 0, /// Maybe closer. pub const potential_closer = 0x02; /// Maybe opener. pub const potential_opener = 0x01; /// Definitely opener. pub const opener = 0x04; /// Definitely closer. pub const closer = 0x08; /// Resolved in any definite way. pub const resolved = 0x10; /// Helper for the "rule of 3". */ pub const emph_intraword = 0x20; pub const emph_mod3_0 = 0x40; pub const emph_mod3_1 = 0x80; pub const emph_mod3_2 = (0x40 | 0x80); pub const emph_mod3_mask = (0x40 | 0x80); /// Distinguisher for '<', '>'. */ pub const autolink = 0x20; /// For permissive autolinks. */ pub const validpermissiveautolink = 0x20; /// For '[' to rule out invalid link labels early */ pub const hasnestedbrackets = 0x20; /// During analyzes of inline marks, we need to manage some "mark chains", /// of (yet unresolved) openers. This structure holds start/end of the chain. /// The chain internals are then realized through MD_MARK::prev and ::next. pub const Chain = struct { head: u32 = std.math.maxInt(u32), tail: u32 = std.math.maxInt(u32), pub const List = struct { data: [13]Chain = [13]Chain{ .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{} }, pub inline fn ptr_chain(this: *List) *Chain { return &this.data[0]; } pub inline fn tablecellboundaries(this: *List) *Chain { return &this.data[1]; } pub inline fn asterisk_openers_extraword_mod3_0(this: *List) *Chain { return &this.data[2]; } pub inline fn asterisk_openers_extraword_mod3_1(this: *List) *Chain { return &this.data[3]; } pub inline fn asterisk_openers_extraword_mod3_2(this: *List) *Chain { return &this.data[4]; } pub inline fn asterisk_openers_intraword_mod3_0(this: *List) *Chain { return &this.data[5]; } pub inline fn asterisk_openers_intraword_mod3_1(this: *List) *Chain { return &this.data[6]; } pub inline fn asterisk_openers_intraword_mod3_2(this: *List) *Chain { return &this.data[7]; } pub inline fn underscore_openers(this: *List) *Chain { return &this.data[8]; } pub inline fn tilde_openers_1(this: *List) *Chain { return &this.data[9]; } pub inline fn tilde_openers_2(this: *List) *Chain { return &this.data[10]; } pub inline fn bracket_openers(this: *List) *Chain { return &this.data[11]; } pub inline fn dollar_openers(this: *List) *Chain { return &this.data[12]; } }; }; }; pub const Line = struct { beg: u32 = 0, end: u32 = 0, pub const Tag = enum(u32) { blank, hr, atx_header, setext_header, setext_underline, indented_code, fenced_code, html, text, table, table_underline, }; pub const Analysis = packed struct { tag: Tag = Tag.blank, beg: u32 = 0, end: u32 = 0, indent: u32 = 0, data: u32 = 0, pub const blank = Analysis{}; pub fn eql(a: Analysis, b: Analysis) bool { return strings.eqlLong(std.mem.asBytes(&a), std.mem.asBytes(&b), false); } }; pub const Verbatim = struct { line: Line = Line{}, indent: u32 = 0, }; }; pub const MDParser = struct { marks: BabyList(Mark) = .{}, chain: Mark.Chain.List = .{}, source: logger.Source, flags: Flags.Set = Flags.commonmark, allocator: std.mem.Allocator, mdx: *MDX, mark_char_map: [255]u1 = undefined, doc_ends_with_newline: bool = false, size: u32 = 0, lines: BabyList(Line) = .{}, verbatim_lines: BabyList(Line.Verbatim) = .{}, containers: BabyList(Container) = .{}, blocks: BabyList(Block) = .{}, current_block: ?*Block = null, current_block_index: u32 = 0, code_fence_length: u32 = 0, code_indent_offset: u32 = std.math.maxInt(u32), last_line_has_list_loosening_effect: bool = false, last_list_item_starts_with_two_blank_lines: bool = false, pub const Flags = enum { /// In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' collapse_whitespace, /// Do not require space in ATX headers ( ###header ) permissive_atxheaders, /// Recognize URLs as autolinks even without '<', '>' permissive_url_autolinks, /// Recognize e-mails as autolinks even without '<', '>' and 'mailto:' permissive_email_autolinks, /// Disable indented code blocks. (Only fenced code works.) noindented_codeblocks, /// Disable raw HTML blocks. no_html_blocks, /// Disable raw HTML (inline). no_html_spans, /// Enable tables extension. tables, /// Enable strikethrough extension. strikethrough, /// Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') permissive_www_autolinks, /// Enable task list extension. tasklists, /// Enable $ and $$ containing LaTeX equations. latex_mathspans, /// Enable wiki links extension. wikilinks, /// Enable underline extension (and disables '_' for normal emphasis). underline, pub const Set = std.enums.EnumSet(Flags); pub const permissive_autolinks = Set.init(.{ .permissive_email_autolinks = true, .permissive_url_autolinks = true }); pub const no_email = Set.init(.{ .no_html_blocks = true, .no_html_spans = true }); pub const github = Set.init(.{ .tables = true, .permissive_autolinks = true, .strikethrough = true, .tasklists = true }); pub const commonmark: i32 = Set{}; }; fn buildCharMap(this: *MDParser) void { @memset(&this.mark_char_map, 0, this.mark_char_map.len); this.mark_char_map['\\'] = 1; this.mark_char_map['*'] = 1; this.mark_char_map['_'] = 1; this.mark_char_map['`'] = 1; this.mark_char_map['&'] = 1; this.mark_char_map[';'] = 1; this.mark_char_map['<'] = 1; this.mark_char_map['>'] = 1; this.mark_char_map['['] = 1; this.mark_char_map['!'] = 1; this.mark_char_map[']'] = 1; this.mark_char_map[0] = 1; // whitespace this.mark_char_map[' '] = 1; this.mark_char_map['\t'] = 1; this.mark_char_map['\r'] = 1; this.mark_char_map['\n'] = 1; // form feed this.mark_char_map[0xC] = 1; // vertical tab this.mark_char_map[0xB] = 1; if (this.flags.contains(.strikethrough)) { this.mark_char_map['~'] = 1; } if (this.flags.contains(.latex_mathspans)) { this.mark_char_map['$'] = 1; } if (this.flags.contains(.permissive_email_autolinks)) { this.mark_char_map['@'] = 1; } if (this.flags.contains(.permissive_url_autolinks)) { this.mark_char_map[':'] = 1; } if (this.flags.contains(.permissive_www_autolinks)) { this.mark_char_map['.'] = 1; } if (this.flags.contains(.tables)) { this.mark_char_map['.'] = 1; } } pub fn init(allocator: std.mem.Allocator, source: logger.Source, flags: Flags.Set, mdx: *MDX) MDParser { var parser = MDParser{ .allocator = allocator, .source = source, .flags = flags, .mdx = mdx, .size = @truncate(u32, source.contents.len), }; parser.buildCharMap(); parser.doc_ends_with_newline = source.contents.len.len > 0 and source.contents[source.contents.len - 1] == '\n'; return parser; } fn startNewBlock(this: *MDParser, line: *const Line.Analysis) !void { try this.blocks.push( this.allocator, Block{ .tag = switch (line.tag) { .hr => Block.Tag.hr, .atx_header, .setext_header => Block.Tag.h, .fenced_code, .indented_code => Block.Tag.code, .text => Block.Tag.p, .html => Block.Tag.html, else => unreachable, }, .data = line.data, .line_count = 0, .line_offset = switch (line.tag) { .indented_code, .html, .fenced_code => this.verbatim_lines.len, else => this.lines.len, }, }, ); } inline fn charAt(this: *const MDParser, index: u32) u8 { return this.source.contents[index]; } inline fn isNewline(this: *const MDParser, index: u32) bool { return switch (this.charAt(index)) { '\n', '\r' => true, else => false, }; } inline fn isAnyOf2(this: *const MDParser, index: u32, comptime first: u8, comptime second: u8) bool { return isAnyOf2_(this.charAt(index), first, second); } inline fn isAnyOf2_(char: u8, comptime first: u8, comptime second: u8) bool { return switch (char) { first, second => true, else => false, }; } inline fn isAnyOf(this: *const MDParser, index: u32, comptime values: []const u8) bool { return isCharAnyOf(this.charAt(index), values); } inline fn isCharAnyOf(char: u8, comptime values: []const u8) bool { inline for (values) |val| { if (val == char) return true; } return false; } inline fn isBlank(char: u8) bool { return isCharAnyOf(char, &[_]u8{ ' ', '\t' }); } inline fn isWhitespace(char: u8) bool { return isCharAnyOf(char, &[_]u8{ ' ', '\t', 0xC, 0xB }); } pub fn getIndent(this: *MDParser, total_indent: u32, beg: u32, end: *u32) u32 { var off = beg; var indent = total_indent; while (off < this.size and isBlank(this.charAt(off))) { if (this.charAt(off) == '\t') { indent = (indent + 4) & ~3; } else { indent += 1; } off += 1; } end.* = off; return indent - total_indent; } pub fn isContainerMark(this: *MDParser, indent: u32, beg: u32, end: *u32, container: *Container) bool { var off = beg; var max_end: u32 = undefined; if (off >= this.size or indent >= this.code_indent_offset) return false; if (this.charAt(off) == '>') { off += 1; container.ch = '>'; container.is_loose = false; container.is_task = false; container.mark_indent = indent; container.contents_indent = indent + 1; end.* = off; return true; } // Check for list item bullet mark. if (this.isAnyOf(off, "-+*") and (off + 1 >= this.size or isBlank(this.charAt(off + 1)) or this.isNewline(off + 1))) { container.ch = this.charAt(off); container.is_loose = false; container.is_task = false; container.mark_indent = indent; container.contents_indent = indent + 1; end.* = off + 1; return true; } // Check for ordered list item marks max_end = @minimum(off + 9, this.size); container.start = 0; while (off < max_end and std.ascii.isDigit(this.charAt(off))) { container.start = container.start * 10 + (this.charAt(off) - '0'); off += 1; } if (off > beg and off < this.size and (this.isAnyOf2(off, '.', ')')) and (off + 1 >= this.size or this.isBlank(this.charAt(off + 1) or this.isNewline(off + 1)))) { container.ch = this.charAt(off); container.is_loose = false; container.is_task = false; container.mark_indent = indent; container.contents_indent = indent + off - beg + 1; end.* = off + 1; return true; } return false; } fn analyzeLine(this: *MDParser, beg: u32, end: *u32, pivot_line: *const Line.Analysis, line: *Line.Analysis) !void { _ = this; _ = beg; _ = end; _ = pivot_line; _ = line; var off = beg; var hr_killer: u32 = 0; var prev_line_has_list_loosening_effect = this.last_line_has_list_loosening_effect; var container = Container{}; _ = hr_killer; _ = prev_line_has_list_loosening_effect; _ = container; var total_indent: u32 = 0; var n_parents: u32 = 0; var n_brothers: u32 = 0; var n_children: u32 = 0; // Given the indentation and block quote marks '>', determine how many of // the current containers are our parents. while (n_parents < this.containers.len) { var c: *Container = this.containers.ptr + n_parents; if (c.ch == '>' and line.indent < this.code_indent_offset and off < this.size and this.charAt(off) == '>') { off += 1; total_indent += 1; line.indent = this.getIndent(total_indent, off, &off); total_indent += line.indent; // The optional 1st space after '>' is part of the block quote mark. line.indent -|= line.indent; line.beg = off; } else if (c.ch != '>' and line.indent >= c.contents_indent) { line.indent -|= c.contents_indent; } else { break; } n_parents += 1; } if (off >= this.size or this.isNewline(off)) { // Blank line does not need any real indentation to be nested inside a list if (n_brothers + n_children == 0) { while (n_parents < this.containers.len and this.containers.ptr[n_parents].ch == '>') { n_parents += 1; } } } while (true) { switch (pivot_line.tag) { .fencedcode => { // Check whether we are fenced code continuation. line.beg = off; // We are another MD_LINE_FENCEDCODE unless we are closing fence // which we transform into MD_LINE_BLANK. if (line.indent < this.code_indent_offset) { if (this.isClosingCodeFence(this.charAt(pivot_line.beg), off, &off)) { line.tag = .blank; this.last_line_has_list_loosening_effect = false; break; } } // Change indentation accordingly to the initial code fence. if (n_parents == this.containers.len) { line.indent -|= pivot_line.indent; line.tag = .fenced_code; break; } }, .indentedcode => {}, .text => {}, .html => {}, else => {}, } // Check for blank line. if (off >= this.size or this.isNewline(off)) { if (pivot_line.tag == .indented_code and n_parents == this.containers.len) { line.tag = .indented_code; line.indent -|= this.code_indent_offset; this.last_line_has_list_loosening_effect = false; } else { line.tag = .blank; this.last_line_has_list_loosening_effect = n_parents > 0 and n_brothers + n_children == 0 and this.containers.ptr[n_parents - 1].ch != '>'; // See https://github.com/mity/md4c/issues/6 // // This ugly checking tests we are in (yet empty) list item but // not its very first line (i.e. not the line with the list // item mark). // // If we are such a blank line, then any following non-blank // line which would be part of the list item actually has to // end the list because according to the specification, "a list // item can begin with at most one blank line." // if (n_parents > 0 and this.containers.ptr[n_parents - 1].ch != '>' and n_brothers + n_children == 0 and this.current_block == null and this.blocks.len > 0) { var top_block = this.blocks.last().?; if (top_block.tag == .li) { this.last_list_item_starts_with_two_blank_lines = true; } } } break; } else { // This is the 2nd half of the hack. If the flag is set (i.e. there // was a 2nd blank line at the beginning of the list item) and if // we would otherwise still belong to the list item, we enforce // the end of the list. this.last_line_has_list_loosening_effect = false; if (this.last_list_item_starts_with_two_blank_lines) { if (n_parents > 0 and this.containers.ptr[n_parents - 1].ch != '>' and n_brothers + n_children == 0 and this.current_block == null and this.blocks.len > 1) { var top = this.blocks.last().?; if (top.tag == .li) { n_parents -|= 1; } } this.last_line_has_list_loosening_effect = true; } } // Check whether we are Setext underline. if (line.indent < this.code_indent_offset and pivot_line.tag == .text and off < this.size and this.isAnyOf2(off, '=', '-') and n_parents == this.containers.len) { var level: u4 = 0; if (this.isSetextUnderline(off, &off, &level)) { line.tag = .setext_underline; line.data = level; break; } } // Check for a thematic break line if (line.indent < this.code_indent_offset and off < this.size and off >= hr_killer and this.isAnyOf(off, "-_*")) { if (this.isHRLine(off, &off, &hr_killer)) { line.tag = .hr; break; } } // Check for "brother" container. I.e. whether we are another list item //in already started list. if (n_parents < this.containers.len and n_brothers + n_children == 0) { var tmp: u32 = undefined; if (this.isContainerMark(line.indent, off, &tmp, &container) and isContainerCompatible(&this.containers.ptr[n_parents], &container)) { pivot_line.* = Line.Analysis.blank; off = tmp; total_indent += container.contents_indent - container.mark_indent; line.indent = this.getIndent(total_indent, off, &off); total_indent += line.indent; line.beg = off; // Some of the following whitespace actually still belongs to the mark. if (off >= this.size or this.isNewline(off)) { container.contents_indent += 1; } else if (line.indent <= this.code_indent_offset) { container.contents_indent += line.indent; line.indent = 0; } else { container.contents_indent += 1; line.indent -= 1; } this.containers.ptr[n_parents].mark_indent = container.mark_indent; this.containers.ptr[n_parents].contents_indent = container.contents_indent; n_brothers += 1; continue; } } // Check for indented code // Note: indented code block cannot interrupt a paragrpah if (line.indent >= this.code_indent_offset and (pivot_line.tag == .blank or pivot_line.tag == .indented_code)) { line.tag = .indented_code; std.debug.assert(line.indent >= this.code_indent_offset); line.indent -|= this.code_indent_offset; line.data = 0; break; } // Check for start of a new container block if (line.indent < this.code_indent_offset and this.isContainerMark(line.indent, off, &off, &container)) { if (pivot_line.tag == .text and n_parents == this.n_containers and (off >= this.size or this.isNewline(off)) and container.ch != '>') { // Noop. List mark followed by a blank line cannot interrupt a paragraph. } else if (pivot_line.tag == .text and n_parents == this.containers.len and isAnyOf2_(container.ch, '.', ')')) { // Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. } else { total_indent += container.contents_indent - container.mark_indent; line.indent = this.getIndent(total_indent, off, &off); total_indent += line.indent; line.beg = off; line.data = container.ch; // Some of the following whitespace actually still belongs to the mark. if (off >= this.size or this.isNewline(off)) { container.contents_indent += 1; } else if (line.indent <= this.code_indent_offset) { container.contents_indent += line.indent; line.indent = 0; } else { container.contents_indent += 1; line.indent -= 1; } if (n_brothers + n_children == 0) { pivot_line.* = Line.Analysis.blank; } if (n_children == 0) { try this.leaveChildContainers(n_parents + n_brothers); } n_children += 1; try this.pushContainer(container); continue; } } // heck whether we are table continuation. if (pivot_line.tag == .table and n_parents == this.n_containers) { line.tag = .table; break; } // heck for ATX header. if (line.indent < this.code_indent_offset and off < this.size and this.isAnyOf(off, '#')) { var level: u4 = 0; if (this.isATXHeaderLine(off, &line.beg, &off, &level)) { line.tag = .atx_header; line.data = level; break; } } // Check whether we are starting code fence. if (off < this.size and this.isAnyOf2(off, '`', '~')) { if (this.isOpeningCodeFence(off, &off)) { line.tag = .fenced_code; line.data = 1; break; } } // Check for start of raw HTML block. if (off < this.size and !this.flags.contains(.no_html_blocks) and this.charAt(off) == '<') {} // Check for table underline. if (this.flags.contains(.tables) and pivot_line.tag == .text and off < this.size and this.isAnyOf(off, "|-:") and n_parents == this.containers.len) { var col_count: u32 = undefined; if (this.current_block != null and this.current_block.?.line_count == 1 and this.isTableUnderline(off, &off, &col_count)) { line.data = col_count; line.tag = .table_underline; break; } } // By default, we are normal text line. line.tag = .text; if (pivot_line.tag == .text and n_brothers + n_children == 0) { // lazy continuation n_parents = this.containers.len; } // Check for task mark. if (this.flags.contains(.tasklists) and n_brothers + n_children > 0 and off < this.size and isCharAnyOf(this.containers.last().?.ch, "-+*.)")) { var tmp: u32 = off; while (tmp < this.size and tmp < off + 3 and isBlank(tmp)) { tmp += 1; } if ((tmp + 2 < this.size and this.charAt(tmp) == '[' and this.isAnyOf(tmp + 1, "xX ") and this.charAt(tmp + 2) == ']') and (tmp + 3 == this.size or isBlank(this.charAt(tmp + 3)) or this.isNewline(tmp + 3))) { var task_container: *Container = if (n_children > 0) this.containers.last().? else &container; task_container.is_task = true; task_container.task_mark_off = tmp + 1; off = tmp + 3; while (off < this.size and isWhitespace(this.charAt(off))) { off += 1; } if (off == this.size) break; line.beg = off; } } break; } // Scan for end of the line. while (off + 3 < this.size and !(strings.eqlComptimeIgnoreLen(this.source.contents.ptr[off..][0..4], "\n\n\n\n") or strings.eqlComptimeIgnoreLen(this.source.contents.ptr[off..][0..4], "\r\n\r\n"))) { off += 4; } while (off < this.size and !this.isNewline(off)) { off += 1; } // Set end of line line.end = off; // ut for ATX header, we should exclude the optional trailing mark. if (line.type == .atx_header) { var tmp = line.end; while (tmp > line.beg and this.charAt(tmp - 1) == ' ') { tmp -= 1; } while (tmp > line.beg and this.charAt(tmp - 1) == '#') { tmp -= 1; } if (tmp == line.beg or this.charAt(tmp - 1) == ' ' or this.flags.contains(.permissive_atxheaders)) { line.end = tmp; } } // Trim trailing spaces. switch (line.tag) { .indented_code, .fenced_code => {}, else => { while (line.end > line.beg and this.charAt(line.end - 1) == ' ') { line.end -= 1; } }, } // Eat also the new line if (off < this.size and this.charAt(off) == '\r') { off += 1; } if (off < this.size and this.charAt(off) == '\n') { off += 1; } end.* = off; // If we belong to a list after seeing a blank line, the list is loose. if (prev_line_has_list_loosening_effect and line.tag != .blank and n_parents + n_brothers > 0) { var c: *Container = this.containers.ptr[n_parents + n_brothers - 1]; if (c.ch != '>') { var block: *Block = this.blocks.ptr[c.block_index]; block.flags.insert(.loose_list); } } // Leave any containers we are not part of anymore. if (n_children == 0 and n_parents + n_brothers < this.containers.len) { try this.leaveChildContainers(n_parents + n_brothers); } // Enter any container we found a mark for if (n_brothers > 0) { std.debug.assert(n_brothers == 0); try this.pushContainerBytes( Block.Tag.li, this.containers.ptr[n_parents].task_mark_off, if (this.containers.ptr[n_parents].is_task) this.charAt(this.containers.ptr[n_parents].task_mark_off) else 0, Block.Flags.container_closer, ); try this.pushContainerBytes( Block.Tag.li, container.task_mark_off, if (container.is_task) this.charAt(container.task_mark_off) else 0, Block.Flags.container_opener, ); this.containers.ptr[n_parents].is_task = container.is_task; this.containers.ptr[n_parents].task_mark_off = container.task_mark_off; } if (n_children > 0) { try this.enterChildContainers(n_children); } } fn processLine(this: *MDParser, p_pivot_line: **const Line.Analysis, line: *Line.Analysis) !void { var pivot_line = p_pivot_line.*; switch (line.tag) { .blank => { // Blank line ends current leaf block. try this.endCurrentBlock(); p_pivot_line.* = Line.Analysis.blank; }, .hr, .atx_header => { try this.endCurrentBlock(); // Add our single-line block try this.startNewBlock(line); try this.addLineIntoCurrentBlock(line); try this.endCurrentBlock(); p_pivot_line.* = &Line.Analysis.blank; }, .setext_underline => { this.current_block.?.tag = .table; this.current_block.?.data = line.data; this.current_block.?.flags.insert(.setext_header); try this.addLineIntoCurrentBlock(line); try this.endCurrentBlock(); if (this.current_block == null) { p_pivot_line.* = &Line.Analysis.blank; } else { // This happens if we have consumed all the body as link ref. defs. //and downgraded the underline into start of a new paragraph block. line.tag = .text; p_pivot_line.* = line; } }, // MD_LINE_TABLEUNDERLINE changes meaning of the current block. .table_underline => { var current_block = this.current_block.?; std.debug.assert(current_block.line_count == 1); current_block.tag = .table; current_block.data = line.data; std.debug.assert(pivot_line != &Line.Analysis.blank); @intToPtr(*Line.Analysis, @ptrToInt(p_pivot_line.*)).tag = .table; try this.addLineIntoCurrentBlock(line); }, else => { // The current block also ends if the line has different type. if (line.tag != pivot_line.tag) { try this.endCurrentBlock(); } // The current line may start a new block. if (this.current_block == null) { try this.startNewBlock(line); p_pivot_line.* = line; } // In all other cases the line is just a continuation of the current block. try this.addLineIntoCurrentBlock(line); }, } } fn consumeLinkReferenceDefinitions(this: *MDParser) !void { _ = this; } fn addLineIntoCurrentBlock(this: *MDParser, analysis: *const Line.Analysis) !void { var current_block = this.current_block.?; switch (current_block.tag) { .code, .html => { if (current_block.line_count > 0) std.debug.assert( this.verbatim_lines.len == current_block.line_count + current_block.line_offset, ); if (current_block.line_count == 0) { current_block.line_offset = this.verbatim_lines.len; } try this.verbatim_lines.push(this.allocator, Line.Verbatim{ .indent = analysis.indent, .line = .{ .beg = analysis.beg, .end = analysis.end, }, }); }, else => { if (current_block.line_count > 0) std.debug.assert( this.lines.len == current_block.line_count + current_block.line_offset, ); if (current_block.line_count == 0) { current_block.line_offset = this.lines.len; } this.lines.push(this.allocator, .{ .beg = analysis.beg, .end = analysis.end }); }, } current_block.line_count += 1; } fn endCurrentBlock(this: *MDParser) !void { _ = this; var block = this.current_block orelse return; // Check whether there is a reference definition. (We do this here instead // of in md_analyze_line() because reference definition can take multiple // lines.) */ if ((block.tag == .p or block.tag == .h) and block.flags.contains(.setext_header)) { var lines = block.lines(this.lines); if (lines[0].beg == '[') { try this.consumeLinkReferenceDefinitions(); block = this.current_block orelse return; } } if (block.tag == .h and block.flags.contains(.setext_header)) { var n_lines = block.line_count; if (n_lines > 1) { // get rid of the underline if (this.lines.len == block.line_count + block.line_offset) { this.lines.len -= 1; } block.line_count -= 1; } else { // Only the underline has left after eating the ref. defs. // Keep the line as beginning of a new ordinary paragraph. */ block.tag = .p; } } // Mark we are not building any block anymore. this.current_block = null; this.current_block_index -|= 1; } fn buildRefDefHashTable(this: *MDParser) !void { _ = this; } fn leaveChildContainers(this: *MDParser, keep: u32) !void { _ = this; while (this.containers.len > keep) { var c = this.containers.last().?; var is_ordered_list = false; switch (c.ch) { ')', '.' => { is_ordered_list = true; }, '-', '+', '*' => { try this.pushContainerBytes( Block.Tag.li, c.task_mark_off, if (c.is_task) this.charAt(c.task_mark_off) else 0, Block.Flags.container_closer, ); try this.pushContainerBytes( if (is_ordered_list) Block.Tag.ol else Block.Tag.ul, c.ch, if (c.is_task) this.charAt(c.task_mark_off) else 0, Block.Flags.container_closer, ); }, '>' => { try this.pushContainerBytes( Block.Tag.quote, 0, 0, Block.Flags.container_closer, ); }, else => unreachable, } this.containers.len -= 1; } } fn enterChildContainers(this: *MDParser, keep: u32) !void { _ = this; var i: u32 = this.containers.len - keep; while (i < this.containers.len) : (i += 1) { var c: *Container = this.containers.ptr[i]; var is_ordered_list = false; switch (c.ch) { ')', '.' => { is_ordered_list = true; }, '-', '+', '*' => { // Remember offset in ctx.block_bytes so we can revisit the // block if we detect it is a loose list. try this.endCurrentBlock(); c.block_index = this.blocks.len; try this.pushContainerBytes( if (is_ordered_list) Block.Tag.ol else Block.Tag.ul, c.start, c.ch, Block.Flags.container_opener, ); try this.pushContainerBytes( Block.Tag.li, c.task_mark_off, if (c.is_task) this.charAt(c.task_mark_off) else 0, Block.Flags.container_opener, ); }, '>' => { try this.pushContainerBytes( Block.Tag.quote, 0, 0, Block.Flags.container_opener, ); }, else => unreachable, } } } fn pushContainer(this: *MDParser, container: Container) !void { try this.containers.push(this.allocator, container); } const LeafBlockDetail = union { none: void, h: Block.Header, code: Block.Code, table: Block.Table, }; fn processLeafBlockWithType(this: *MDParser, comptime tag: Block.Tag, block: *Block) anyerror!void { const BlockDetailType = comptime switch (tag) { Block.Tag.h => Block.Header, Block.Tag.code => Block.Code, Block.Tag.table => Block.Table, else => void, }; const is_in_tight_list = if (this.containers.len == 0) false else !this.containers.ptr[this.containers.len - 1].is_loose; const detail: BlockDetailType = switch (comptime tag) { Block.Tag.h => @truncate(Block.Header, block.data), Block.Tag.code => try this.setupFencedCodeDetail(block), Block.Tag.table => .{ .col_count = block.data, .head_row_count = 1, .body_row_count = block.line_count -| 2, }, else => void{}, }; if (!is_in_tight_list or comptime tag != .p) { try this.mdx.onEnterBlock(block.tag, BlockDetailType, detail); } defer { if (comptime tag == Block.Tag.code) {} } } fn processLeafBlock(this: *MDParser, block: *Block) anyerror!void { return switch (block.tag) { .doc => try this.processLeafBlockWithType(Block.Tag.doc, block), .quote => try this.processLeafBlockWithType(Block.Tag.quote, block), .ul => try this.processLeafBlockWithType(Block.Tag.ul, block), .ol => try this.processLeafBlockWithType(Block.Tag.ol, block), .li => try this.processLeafBlockWithType(Block.Tag.li, block), .hr => try this.processLeafBlockWithType(Block.Tag.hr, block), .h => try this.processLeafBlockWithType(Block.Tag.h, block), .code => try this.processLeafBlockWithType(Block.Tag.code, block), .html => try this.processLeafBlockWithType(Block.Tag.html, block), .p => try this.processLeafBlockWithType(Block.Tag.p, block), .table => try this.processLeafBlockWithType(Block.Tag.table, block), .thead => try this.processLeafBlockWithType(Block.Tag.thead, block), .tbody => try this.processLeafBlockWithType(Block.Tag.tbody, block), .tr => try this.processLeafBlockWithType(Block.Tag.tr, block), .th => try this.processLeafBlockWithType(Block.Tag.th, block), .td => try this.processLeafBlockWithType(Block.Tag.td, block), }; } fn pushContainerBytes(this: *MDParser, block_type: Block.Tag, start: u32, data: u32, flag: Block.Flags) !void { try this.endCurrentBlock(); var block = Block{ .tag = block_type, .line_count = start, .data = data, }; block.flags.insert(flag); var prev_block: ?Block = null; if (this.current_block) |curr| { prev_block = curr.*; } try this.blocks.push(this.allocator, block); if (prev_block != null) { this.current_block = this.blocks.ptr[this.current_block_index]; } } fn processAllBlocks(this: *MDParser) !void { _ = this; // ctx->containers now is not needed for detection of lists and list items // so we reuse it for tracking what lists are loose or tight. We rely // on the fact the vector is large enough to hold the deepest nesting // level of lists. this.containers.len = 0; var blocks = this.blocks.slice(); for (blocks) |*block| { const detail: Block.Detail = switch (block.tag) { .ul => Block.Detail{ .ul = .{ .is_tight = !block.flags.contains(.loose_list), .mark = @truncate(u8, block.data), }, }, .ol => Block.Detail{ .ol = .{ .start = block.line_count, .is_tight = !block.flags.contains(.loose_list), .mark_delimiter = @truncate(u8, block.data), }, }, .li => Block.Detail{ .li = .{ .is_task = block.data != 0, .task_mark = @truncate(u8, block.data), .task_mark_offset = @intCast(u32, block.line_count), }, }, else => Block.Detail{ .none = .{} }, }; if (block.flags.contains(.container)) { if (block.flags.contains(.container_closer)) { switch (block.tag) { .li => try this.mdx.onLeaveBlock(block.tag, Block.LI, detail.li), .ul => try this.mdx.onLeaveBlock(block.tag, Block.UL, detail.ul), .ol => try this.mdx.onLeaveBlock(block.tag, Block.OL, detail.ol), else => try this.mdx.onLeaveBlock(block.tag, void, void{}), } this.containers.len -|= switch (block.tag) { .ul, .ol, .blockquote => 1, else => 0, }; } if (block.flags.contains(.container_opener)) { switch (block.tag) { .li => try this.mdx.onEnterBlock(block.tag, Block.LI, detail.li), .ul => try this.mdx.onEnterBlock(block.tag, Block.UL, detail.ul), .ol => try this.mdx.onEnterBlock(block.tag, Block.OL, detail.ol), else => try this.mdx.onEnterBlock(block.tag, void, void{}), } switch (block.tag) { .ul, .ol => { this.containers.ptr[this.containers.len].is_loose = block.flags.contains(.loose_list); this.containers.len += 1; }, .blockquote => { // This causes that any text in a block quote, even if // nested inside a tight list item, is wrapped with //

...

. */ this.containers.ptr[this.containers.len].is_loose = true; this.containers.len += 1; }, else => {}, } } } else { try this.processLeafBlock(block); } } } fn isContainerCompatible(pivot: *const Container, container: *const Container) bool { // Block quote has no "items" like lists. if (container.ch == '>') return false; if (container.ch != pivot.ch) return false; if (container.mark_indent > pivot.contents_indent) return false; return true; } fn isHRLine(this: *MDParser, beg: u32, end: *u32, hr_killer: *u32) bool { var off = beg + 1; var n: u32 = 1; while (off < this.size and (this.charAt(off) == this.charAt(beg) or this.charAt(off) == ' ' or this.charAt(off) == '\t')) { if (this.charAt(off) == this.charAt(beg)) n += 1; off += 1; } if (n < 3) { hr_killer.* = off; return false; } // Nothing else can be present on the line. */ if (off < this.size and !this.isNewline(off)) { hr_killer.* = off; return false; } end.* = off; return true; } fn isSetextUnderline(this: *MDParser, beg: u32, end: *u32, level: *u4) bool { var off = beg + 1; while (off < this.size and this.charAt(off) == this.charAt(beg)) off += 1; // Optionally, space(s) can follow. */ while (off < this.size and this.charAt(off) == ' ') off += 1; // But nothing more is allowed on the line. if (off < this.size and !this.isNewline(off)) return false; level.* = if (this.charAt(beg) == '=') 1 else 2; end.* = off; return true; } fn isATXHeaderLine(this: *MDParser, beg: u32, p_beg: *u32, end: *u32, level: *u4) bool { var n: i32 = undefined; var off: u32 = beg + 1; while (off < this.size and this.charAt(off) == '#' and off - beg < 7) { off += 1; } n = off - beg; if (n > 6) return false; level.* = @intCast(u4, n); if (!(this.flags.contains(.permissive_atxheaders)) and off < this.size and this.charAt(off) != ' ' and this.charAt(off) != '\t' and !this.isNewline(off)) return false; while (off < this.size and this.charAt(off) == ' ') { off += 1; } p_beg.* = off; end.* = off; return true; } fn isTableUnderline(this: *MDParser, beg: u32, end: *u32, column_column: *u32) bool { _ = this; _ = end; _ = column_column; var off = beg; var found_pipe = false; var col_count: u32 = 0; if (off < this.size and this.charAt(off) == '|') { found_pipe = true; off += 1; while (off < this.size and isWhitespace(this.charAt(off))) { off += 1; } } while (true) { var delimited = false; // Cell underline ("-----", ":----", "----:" or ":----:")if(off < this.size and this.charAt(off) == _T(':')) off += 1; if (off >= this.size or this.charAt(off) != '-') return false; while (off < this.size and this.charAt(off) == '-') off += 1; if (off < this.size and this.charAt(off) == ':') off += 1; col_count += 1; // Pipe delimiter (optional at the end of line). */ while (off < this.size and isWhitespace(this.charAt(off))) off += 1; if (off < this.size and this.charAt(off) == '|') { delimited = true; found_pipe = true; off += 1; while (off < this.size and isWhitespace(this.charAt(off))) off += 1; } // Success, if we reach end of line. if (off >= this.size or this.isNewline(off)) break; if (!delimited) return false; } if (!found_pipe) return false; column_column.* = col_count; end.* = off; return true; } fn isOpeningCodeFence(this: *MDParser, beg: u8, end: *u32) bool { var off = beg; const first = this.charAt(beg); while (off < this.size and this.charAt(off) == first) { off += 1; } // Fence must have at least three characters. if (off - beg < 3) return false; // Optionally, space(s) can follow while (off < this.size and this.charAt(off) == ' ') { off += 1; } // Optionally, an info string can follow. while (off < this.size and !this.isNewline(this.charAt(off))) { // Backtick-based fence must not contain '`' in the info string. if (first == '`' and this.charAt(off) == '`') return false; off += 1; } end.* = off; return true; } fn isClosingCodeFence(this: *MDParser, ch: u8, beg: u8, end: *u32) bool { var off = beg; defer { end.* = off; } while (off < this.size and this.charAt(off) == ch) { off += 1; } if (off - beg < this.code_fence_length) { return false; } // Optionally, space(s) can follow while (off < this.size and this.charAt(off) == ' ') { off += 1; } // But nothing more is allowed on the line. if (off < this.size and !this.isNewline(this.charAt(off))) return false; return true; } pub fn parse(this: *MDParser) anyerror!void { var pivot_line = &Line.Analysis.blank; var line_buf: [2]Line.Analysis = undefined; var line = &line_buf[0]; var offset: u32 = 0; try this.mdx.onEnterBlock(.doc, void, void{}); const len: u32 = this.size; while (offset < len) { if (line == pivot_line) { line = if (line == &line_buf[0]) &line_buf[1] else &line_buf[0]; } try this.analyzeLine(offset, &offset, pivot_line, line); try this.processLine(&pivot_line, line); } this.endCurrentBlock(); try this.buildRefDefHashTable(); this.leaveChildContainers(0); this.processAllBlocks(); try this.mdx.onLeaveBlock(.doc, void, void{}); } }; pub const MDX = struct { parser: JSParser, log: *logger.Log, allocator: std.mem.Allocator, stmts: std.ArrayListUnmanaged(js_ast.Stmt) = .{}, pub const Options = struct {}; pub fn onEnterBlock(this: *MDX, tag: Block.Tag, comptime Detail: type, detail: Detail) anyerror!void { _ = tag; _ = detail; _ = this; } pub fn onLeaveBlock(this: *MDX, tag: Block.Tag, comptime Detail: type, detail: Detail) anyerror!void { _ = tag; _ = detail; _ = this; } pub fn onEnterSpan(this: *MDX, tag: Span.Tag, comptime Detail: type, detail: Detail) anyerror!void { _ = tag; _ = detail; _ = this; } pub fn onLeaveSpan(this: *MDX, tag: Span.Tag, comptime Detail: type, detail: Detail) anyerror!void { _ = tag; _ = detail; _ = this; } pub fn onText(this: *MDX, tag: Text, text: []const u8) anyerror!void { _ = tag; _ = text; _ = this; } pub inline fn source(p: *const MDX) *const logger.Source { return &p.lexer.source; } pub fn e(_: *MDX, t: anytype, loc: logger.Loc) Expr { const Type = @TypeOf(t); if (@typeInfo(Type) == .Pointer) { return Expr.init(std.meta.Child(Type), t.*, loc); } else { return Expr.init(Type, t, loc); } } pub fn s(_: *MDX, t: anytype, loc: logger.Loc) Stmt { const Type = @TypeOf(t); if (@typeInfo(Type) == .Pointer) { return Stmt.init(std.meta.Child(Type), t.*, loc); } else { return Stmt.alloc(Type, t, loc); } } pub fn setup( this: *MDX, _options: ParserOptions, log: *logger.Log, source_: *const logger.Source, define: *Define, allocator: std.mem.Allocator, ) !void { try JSParser.init( allocator, log, source_, define, js_lexer.Lexer.initNoAutoStep(log, source_.*, allocator), _options, &this.parser, ); this.lexer = try Lexer.init(&this.parser.lexer); this.allocator = allocator; this.log = log; this.stmts = .{}; } pub fn parse(this: *MDX) !js_ast.Result { try this._parse(); return try runVisitPassAndFinish(JSParser, &this.parser, this.stmts.toOwnedSlice(this.allocator)); } fn run(this: *MDX) anyerror!logger.Loc { _ = this; return logger.Loc.Empty; } fn _parse(this: *MDX) anyerror!void { var root_children = std.ArrayListUnmanaged(Expr){}; var first_loc = try run(this, &root_children); first_loc.start = @maximum(first_loc.start, 0); const args_loc = first_loc; first_loc.start += 1; const body_loc = first_loc; // We need to simulate a function that was parsed _ = try this.parser.pushScopeForParsePass(.function_args, args_loc); _ = try this.parser.pushScopeForParsePass(.function_body, body_loc); const root = this.e(E.JSXElement{ .tag = this.e(E.JSXElement.Tag.map.get(E.JSXElement.Tag.main), body_loc), .children = ExprNodeList.fromList(root_children), }, body_loc); var root_stmts = try this.allocator.alloc(Stmt, 1); root_stmts[0] = this.s(S.Return{ .value = root }, body_loc); try this.stmts.append( this.allocator, this.s(S.ExportDefault{ .default_name = try this.parser.createDefaultName(args_loc), .value = .{ .expr = this.e(E.Arrow{ .body = G.FnBody{ .stmts = root_stmts, .loc = body_loc, }, .args = &[_]G.Arg{}, .prefer_expr = true, }, args_loc), }, }, args_loc), ); } };