diff options
author | 2023-05-14 06:13:39 -0700 | |
---|---|---|
committer | 2023-05-14 06:13:39 -0700 | |
commit | 893f70fee4a62b7729abc17257aee89a2dce0069 (patch) | |
tree | 9a1da782efdadc8ab9869293f6fd9cd64e5b4ba7 /src/mdx/mdx_parser.zig | |
parent | 7f25aa9e0864e95aad72ee85d475a03aee68bfb4 (diff) | |
download | bun-893f70fee4a62b7729abc17257aee89a2dce0069.tar.gz bun-893f70fee4a62b7729abc17257aee89a2dce0069.tar.zst bun-893f70fee4a62b7729abc17257aee89a2dce0069.zip |
Single-file standalone Bun executables (#2879)
* Add LIEF
* Compile LIEF
* Implement support for embedding files on macOS
* proof of concept
* Add zstd
* Implement runtime support
* Move some code around
* Update .gitmodules
* Upgrade zig
https://github.com/ziglang/zig/pull/15278
* leftover
* leftover
* delete dead code
* Fix extname
* Revert "Upgrade zig"
This reverts commit dd968f30bffb6c06e34302645a3a4468c957fb4e.
* Revert "leftover"
This reverts commit 7664de7686276cfba431103847d35b9270433dee.
* Revert "leftover"
This reverts commit 498005be06a8a1747d48824310e5a020b1f90d97.
* various fixes
* it works!
* leftover
* Make `zig build` a little faster
* give up on code signing support
* Support Linux & macOS
* Finish removing LIEF
* few more
* Add zstd to list of deps
* make it pretty
---------
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
Diffstat (limited to 'src/mdx/mdx_parser.zig')
-rw-r--r-- | src/mdx/mdx_parser.zig | 1835 |
1 files changed, 0 insertions, 1835 deletions
diff --git a/src/mdx/mdx_parser.zig b/src/mdx/mdx_parser.zig deleted file mode 100644 index b3dcdb91f..000000000 --- a/src/mdx/mdx_parser.zig +++ /dev/null @@ -1,1835 +0,0 @@ -const std = @import("std"); -const logger = @import("root").bun.logger; -const mdx_lexer = @import("./mdx_lexer.zig"); -const Lexer = mdx_lexer.Lexer; -const importRecord = @import("../import_record.zig"); -const js_ast = bun.JSAst; -const JSParser = @import("../js_parser/js_parser.zig").MDXParser; -const ParseStatementOptions = @import("../js_parser/js_parser.zig").ParseStatementOptions; - -const options = @import("../options.zig"); - -const fs = @import("../fs.zig"); -const bun = @import("root").bun; -const string = bun.string; -const Output = bun.Output; -const Global = bun.Global; -const Environment = bun.Environment; -const strings = bun.strings; -const MutableString = bun.MutableString; -const stringZ = bun.stringZ; -const default_allocator = bun.default_allocator; -const C = bun.C; -const expect = std.testing.expect; -const ImportKind = importRecord.ImportKind; -const BindingNodeIndex = js_ast.BindingNodeIndex; -const Define = @import("../defines.zig").Define; -const js_lexer = bun.js_lexer; -const StmtNodeIndex = js_ast.StmtNodeIndex; -const ExprNodeIndex = js_ast.ExprNodeIndex; -const ExprNodeList = js_ast.ExprNodeList; -const StmtNodeList = js_ast.StmtNodeList; -const BindingNodeList = js_ast.BindingNodeList; -const ParserOptions = @import("../js_parser/js_parser.zig").Parser.Options; -const runVisitPassAndFinish = @import("../js_parser/js_parser.zig").Parser.runVisitPassAndFinish; -const Ref = @import("../ast/base.zig").Ref; -const assert = std.debug.assert; -const BabyList = js_ast.BabyList; - -const LocRef = js_ast.LocRef; -const S = js_ast.S; -const B = js_ast.B; -const G = js_ast.G; -const T = mdx_lexer.T; -const E = js_ast.E; -const Stmt = js_ast.Stmt; -const Expr = js_ast.Expr; -const Binding = js_ast.Binding; -const Symbol = js_ast.Symbol; -const Level = js_ast.Op.Level; -const Op = js_ast.Op; -const Scope = js_ast.Scope; -const Range = logger.Range; - -pub const Container = struct { - ch: u8 = 0, - is_loose: bool = false, - is_task: bool = false, - start: u32 = 0, - mark_indent: u32 = 0, - contents_indent: u32 = 0, - block_index: u32 = 0, - task_mark_off: u32 = 0, -}; - -pub const Block = struct { - tag: Tag = Tag.html, - flags: Block.Flags.Set = Block.Flags.Set{}, - data: u32 = 0, - /// Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block. - /// LI: Task mark offset in the input doc. - /// OL: Start item number. - /// - line_count: u32 = 0, - line_offset: u32 = 0, - detail: Block.Detail = Block.Detail{ .none = .{} }, - - pub inline fn lines(this: Block, lines_: BabyList(Line)) []Line { - return lines_.ptr[this.line_offset .. this.line_offset + this.line_count]; - } - - pub inline fn verbatimLines(this: Block, lines_: BabyList(Line.Verbatim)) []Line.Verbatim { - return lines_.ptr[this.line_offset .. this.line_offset + this.line_count]; - } - - pub const Data = u32; - - pub const Flags = enum(u3) { - container_opener = 0, - container_closer = 1, - loose_list = 2, - setext_header = 3, - - pub const Set = std.enums.EnumSet(Block.Flags); - }; - - pub inline fn isContainer(this: Block) bool { - return this.flags.contains(.container_opener) or this.flags.contains(.container_closer); - } - - pub const Tag = enum { - /// <body>...</body> - doc, - - /// <blockquote>...</blockquote> - quote, - - /// <ul>...</ul> - ///Detail: Structure ul_detail. - ul, - - /// <ol>...</ol> - ///Detail: Structure ol_detail. - ol, - - /// <li>...</li> - ///Detail: Structure li_detail. - li, - - /// <hr> - hr, - - /// <h1>...</h1> (for levels up to 6) - ///Detail: Structure h_detail. - h, - - /// <pre><code>...</code></pre> - ///Note the text lines within code blocks are terminated with '\n' - ///instead of explicit MD_TEXT_BR. - code, - - /// Raw HTML block. This itself does not correspond to any particular HTML - ///tag. The contents of it _is_ raw HTML source intended to be put - ///in verbatim form to the HTML output. - html, - - /// <p>...</p> - p, - - /// <table>...</table> and its contents. - ///Detail: Structure table_detail (for table), - /// structure td_detail (for th and td) - ///Note all of these are used only if extension MD_FLAG_TABLES is enabled. - table, - thead, - tbody, - tr, - th, - td, - }; - - pub const UL = struct { - tight: bool = false, - mark: u8 = '*', - }; - - pub const OL = struct { - start: u32 = 0, - tight: bool = false, - mark: u8 = '*', - }; - - pub const LI = struct { - /// Can be non-zero only with MD_FLAG_TASKLISTS - task: bool = false, - /// is_task, then one of 'x', 'X' or ' '. Undefined otherwise. - task_mark: u8 = 'x', - /// If is_task, then offset in the input of the char between '[' and ']'. - task_mark_off: u32 = 0, - }; - - pub const Header = u4; - - pub const Code = struct { - info: Attribute = .{}, - lang: Attribute = .{}, - /// character used for fenced code block; or zero for indented code block. * - fence: u8 = '`', - }; - - pub const Table = struct { - /// Count of columns in the table. - column_count: u32 = 0, - /// Count of rows in the table header (currently always 1) - head_row_count: u32 = 1, - /// Count of rows in the table body - body_row_count: u32 = 0, - }; - - pub const Detail = union { - none: void, - ul: UL, - ol: OL, - li: LI, - }; - - pub const TD = struct { - alignment: Align = Align.default, - }; -}; -pub const Span = struct { - pub const Tag = enum { - /// <em>...</em> - em, - - /// <strong>...</strong> - strong, - - /// <a href="xxx">...</a> - /// Detail: Structure a_detail. - a, - - /// <img src="xxx">...</a> - /// Detail: Structure img_detail. - /// Note: Image text can contain nested spans and even nested images. - /// If rendered into ALT attribute of HTML <IMG> tag, it's responsibility - /// of the parser to deal with it. - img, - - /// <code>...</code> - code, - - /// <del>...</del> - /// Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. - del, - - /// For recognizing inline ($) and display ($$) equations - /// Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled. - latexmath, - latexmath_display, - - /// Wiki links - /// Note: Recognized only when MD_FLAG_WIKILINKS is enabled. - wikilink, - - /// <u>...</u> - /// Note: Recognized only when MD_FLAG_UNDERLINE is enabled. - u, - }; - - pub const Link = struct { - src: Attribute = .{}, - title: Attribute = .{}, - }; - - pub const Image = Link; - - pub const Wikilink = struct { - target: Attribute = .{}, - }; -}; - -pub const Text = enum { - /// Normal text. - normal, - /// NULL character. CommonMark requires replacing NULL character with - /// the replacement char U+FFFD, so this allows caller to do that easily. - nullchar, - /// Line breaks. - /// Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE - /// or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. - /// <br> (hard break) - br, - /// '\n' in source text where it is not semantically meaningful (soft break) - softbr, - /// Entity. - /// (a) Named entity, e.g. - /// (Note MD4C does not have a list of known entities. - /// Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is - /// treated as a named entity.) - /// (b) Numerical entity, e.g. Ӓ - /// (c) Hexadecimal entity, e.g. ካ - /// - /// As MD4C is mostly encoding agnostic, application gets the verbatim - /// entity text into the MD_PARSER::text_callback(). - entity, - /// Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`). - /// If it is inside MD_BLOCK_CODE, it includes spaces for indentation and - /// '\n' for new lines. br and softbr are not sent for this - /// kind of text. - code, - /// Text is a raw HTML. If it is contents of a raw HTML block (i.e. not - /// an inline raw HTML), then br and softbr are not used. - /// The text contains verbatim '\n' for the new lines. - html, - /// Text is inside an equation. This is processed the same way as inlined code - /// spans (`code`). - latexmath, -}; -pub const Align = enum(u3) { - default = 0, - left = 1, - center = 2, - right = 3, -}; - -/// String attribute. -/// -/// This wraps strings which are outside of a normal text flow and which are -/// propagated within various detailed structures, but which still may contain -/// string portions of different types like e.g. entities. -/// -/// So, for example, lets consider this image: -/// -///  -/// -/// The image alt text is propagated as a normal text via the MD_PARSER::text() -/// callback. However, the image title ('foo " bar') is propagated as -/// MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title. -/// -/// Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following: -/// -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0) -/// -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4) -/// -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10) -/// -- [3]: (n/a) (n/a ; substr_offsets[3] == 14) -/// -/// Note that these invariants are always guaranteed: -/// -- substr_offsets[0] == 0 -/// -- substr_offsets[LAST+1] == size -/// -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR -/// substrings can appear. This could change only of the specification -/// changes. -/// -pub const Attribute = struct { - text: []const u8 = "", - substring: Substring.List = .{}, -}; -pub const Substring = struct { - offset: u32, - tag: Text, - - pub const List = std.MultiArrayList(Substring); - pub const ListPool = ObjectPool(List); -}; - -pub const Mark = struct { - position: Ref = Ref.None, - prev: u32 = std.math.maxInt(u32), - next: u32 = std.math.maxInt(u32), - ch: u8 = 0, - flags: u16 = 0, - - /// Maybe closer. - pub const potential_closer = 0x02; - /// Maybe opener. - pub const potential_opener = 0x01; - /// Definitely opener. - pub const opener = 0x04; - /// Definitely closer. - pub const closer = 0x08; - /// Resolved in any definite way. - pub const resolved = 0x10; - - /// Helper for the "rule of 3". */ - pub const emph_intraword = 0x20; - pub const emph_mod3_0 = 0x40; - pub const emph_mod3_1 = 0x80; - pub const emph_mod3_2 = (0x40 | 0x80); - pub const emph_mod3_mask = (0x40 | 0x80); - /// Distinguisher for '<', '>'. */ - pub const autolink = 0x20; - /// For permissive autolinks. */ - pub const validpermissiveautolink = 0x20; - /// For '[' to rule out invalid link labels early */ - pub const hasnestedbrackets = 0x20; - - /// During analyzes of inline marks, we need to manage some "mark chains", - /// of (yet unresolved) openers. This structure holds start/end of the chain. - /// The chain internals are then realized through MD_MARK::prev and ::next. - pub const Chain = struct { - head: u32 = std.math.maxInt(u32), - tail: u32 = std.math.maxInt(u32), - - pub const List = struct { - data: [13]Chain = [13]Chain{ .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{}, .{} }, - pub inline fn ptr_chain(this: *List) *Chain { - return &this.data[0]; - } - pub inline fn tablecellboundaries(this: *List) *Chain { - return &this.data[1]; - } - pub inline fn asterisk_openers_extraword_mod3_0(this: *List) *Chain { - return &this.data[2]; - } - pub inline fn asterisk_openers_extraword_mod3_1(this: *List) *Chain { - return &this.data[3]; - } - pub inline fn asterisk_openers_extraword_mod3_2(this: *List) *Chain { - return &this.data[4]; - } - pub inline fn asterisk_openers_intraword_mod3_0(this: *List) *Chain { - return &this.data[5]; - } - pub inline fn asterisk_openers_intraword_mod3_1(this: *List) *Chain { - return &this.data[6]; - } - pub inline fn asterisk_openers_intraword_mod3_2(this: *List) *Chain { - return &this.data[7]; - } - pub inline fn underscore_openers(this: *List) *Chain { - return &this.data[8]; - } - pub inline fn tilde_openers_1(this: *List) *Chain { - return &this.data[9]; - } - pub inline fn tilde_openers_2(this: *List) *Chain { - return &this.data[10]; - } - pub inline fn bracket_openers(this: *List) *Chain { - return &this.data[11]; - } - pub inline fn dollar_openers(this: *List) *Chain { - return &this.data[12]; - } - }; - }; -}; - -pub const Line = struct { - beg: u32 = 0, - end: u32 = 0, - - pub const Tag = enum(u32) { - blank, - hr, - atx_header, - setext_header, - setext_underline, - indented_code, - fenced_code, - html, - text, - table, - table_underline, - }; - pub const Analysis = packed struct { - tag: Tag = Tag.blank, - beg: u32 = 0, - end: u32 = 0, - indent: u32 = 0, - data: u32 = 0, - - pub const blank = Analysis{}; - pub fn eql(a: Analysis, b: Analysis) bool { - return strings.eqlLong(std.mem.asBytes(&a), std.mem.asBytes(&b), false); - } - }; - - pub const Verbatim = struct { - line: Line = Line{}, - indent: u32 = 0, - }; -}; - -pub const MDParser = struct { - marks: BabyList(Mark) = .{}, - chain: Mark.Chain.List = .{}, - source: logger.Source, - flags: Flags.Set = Flags.commonmark, - allocator: std.mem.Allocator, - mdx: *MDX, - mark_char_map: [255]u1 = undefined, - doc_ends_with_newline: bool = false, - size: u32 = 0, - - lines: BabyList(Line) = .{}, - verbatim_lines: BabyList(Line.Verbatim) = .{}, - - containers: BabyList(Container) = .{}, - blocks: BabyList(Block) = .{}, - current_block: ?*Block = null, - current_block_index: u32 = 0, - - code_fence_length: u32 = 0, - code_indent_offset: u32 = std.math.maxInt(u32), - last_line_has_list_loosening_effect: bool = false, - last_list_item_starts_with_two_blank_lines: bool = false, - - pub const Flags = enum { - /// In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' - collapse_whitespace, - /// Do not require space in ATX headers ( ###header ) - permissive_atxheaders, - /// Recognize URLs as autolinks even without '<', '>' - permissive_url_autolinks, - /// Recognize e-mails as autolinks even without '<', '>' and 'mailto:' - permissive_email_autolinks, - /// Disable indented code blocks. (Only fenced code works.) - noindented_codeblocks, - /// Disable raw HTML blocks. - no_html_blocks, - /// Disable raw HTML (inline). - no_html_spans, - /// Enable tables extension. - tables, - /// Enable strikethrough extension. - strikethrough, - /// Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') - permissive_www_autolinks, - /// Enable task list extension. - tasklists, - /// Enable $ and $$ containing LaTeX equations. - latex_mathspans, - /// Enable wiki links extension. - wikilinks, - /// Enable underline extension (and disables '_' for normal emphasis). - underline, - - pub const Set = std.enums.EnumSet(Flags); - pub const permissive_autolinks = Set.init(.{ .permissive_email_autolinks = true, .permissive_url_autolinks = true }); - pub const no_email = Set.init(.{ .no_html_blocks = true, .no_html_spans = true }); - pub const github = Set.init(.{ .tables = true, .permissive_autolinks = true, .strikethrough = true, .tasklists = true }); - pub const commonmark: i32 = Set{}; - }; - - fn buildCharMap(this: *MDParser) void { - @memset(&this.mark_char_map, 0, this.mark_char_map.len); - - this.mark_char_map['\\'] = 1; - this.mark_char_map['*'] = 1; - this.mark_char_map['_'] = 1; - this.mark_char_map['`'] = 1; - this.mark_char_map['&'] = 1; - this.mark_char_map[';'] = 1; - this.mark_char_map['<'] = 1; - this.mark_char_map['>'] = 1; - this.mark_char_map['['] = 1; - this.mark_char_map['!'] = 1; - this.mark_char_map[']'] = 1; - this.mark_char_map[0] = 1; - - // whitespace - this.mark_char_map[' '] = 1; - this.mark_char_map['\t'] = 1; - this.mark_char_map['\r'] = 1; - this.mark_char_map['\n'] = 1; - - // form feed - this.mark_char_map[0xC] = 1; - // vertical tab - this.mark_char_map[0xB] = 1; - - if (this.flags.contains(.strikethrough)) { - this.mark_char_map['~'] = 1; - } - - if (this.flags.contains(.latex_mathspans)) { - this.mark_char_map['$'] = 1; - } - - if (this.flags.contains(.permissive_email_autolinks)) { - this.mark_char_map['@'] = 1; - } - - if (this.flags.contains(.permissive_url_autolinks)) { - this.mark_char_map[':'] = 1; - } - - if (this.flags.contains(.permissive_www_autolinks)) { - this.mark_char_map['.'] = 1; - } - - if (this.flags.contains(.tables)) { - this.mark_char_map['.'] = 1; - } - } - pub fn init(allocator: std.mem.Allocator, source: logger.Source, flags: Flags.Set, mdx: *MDX) MDParser { - var parser = MDParser{ - .allocator = allocator, - .source = source, - .flags = flags, - .mdx = mdx, - .size = @truncate(u32, source.contents.len), - }; - parser.buildCharMap(); - parser.doc_ends_with_newline = source.contents.len.len > 0 and source.contents[source.contents.len - 1] == '\n'; - return parser; - } - - fn startNewBlock(this: *MDParser, line: *const Line.Analysis) !void { - try this.blocks.push( - this.allocator, - Block{ - .tag = switch (line.tag) { - .hr => Block.Tag.hr, - .atx_header, .setext_header => Block.Tag.h, - .fenced_code, .indented_code => Block.Tag.code, - .text => Block.Tag.p, - .html => Block.Tag.html, - else => unreachable, - }, - .data = line.data, - .line_count = 0, - .line_offset = switch (line.tag) { - .indented_code, .html, .fenced_code => this.verbatim_lines.len, - else => this.lines.len, - }, - }, - ); - } - - inline fn charAt(this: *const MDParser, index: u32) u8 { - return this.source.contents[index]; - } - - inline fn isNewline(this: *const MDParser, index: u32) bool { - return switch (this.charAt(index)) { - '\n', '\r' => true, - else => false, - }; - } - - inline fn isAnyOf2(this: *const MDParser, index: u32, comptime first: u8, comptime second: u8) bool { - return isAnyOf2_(this.charAt(index), first, second); - } - - inline fn isAnyOf2_(char: u8, comptime first: u8, comptime second: u8) bool { - return switch (char) { - first, second => true, - else => false, - }; - } - - inline fn isAnyOf(this: *const MDParser, index: u32, comptime values: []const u8) bool { - return isCharAnyOf(this.charAt(index), values); - } - - inline fn isCharAnyOf(char: u8, comptime values: []const u8) bool { - inline for (values) |val| { - if (val == char) return true; - } - return false; - } - - inline fn isBlank(char: u8) bool { - return isCharAnyOf(char, &[_]u8{ ' ', '\t' }); - } - - inline fn isWhitespace(char: u8) bool { - return isCharAnyOf(char, &[_]u8{ ' ', '\t', 0xC, 0xB }); - } - - pub fn getIndent(this: *MDParser, total_indent: u32, beg: u32, end: *u32) u32 { - var off = beg; - var indent = total_indent; - while (off < this.size and isBlank(this.charAt(off))) { - if (this.charAt(off) == '\t') { - indent = (indent + 4) & ~3; - } else { - indent += 1; - } - off += 1; - } - end.* = off; - return indent - total_indent; - } - - pub fn isContainerMark(this: *MDParser, indent: u32, beg: u32, end: *u32, container: *Container) bool { - var off = beg; - var max_end: u32 = undefined; - - if (off >= this.size or indent >= this.code_indent_offset) - return false; - - if (this.charAt(off) == '>') { - off += 1; - container.ch = '>'; - container.is_loose = false; - container.is_task = false; - container.mark_indent = indent; - container.contents_indent = indent + 1; - end.* = off; - return true; - } - - // Check for list item bullet mark. - if (this.isAnyOf(off, "-+*") and (off + 1 >= this.size or isBlank(this.charAt(off + 1)) or this.isNewline(off + 1))) { - container.ch = this.charAt(off); - container.is_loose = false; - container.is_task = false; - container.mark_indent = indent; - container.contents_indent = indent + 1; - end.* = off + 1; - return true; - } - - // Check for ordered list item marks - max_end = @min(off + 9, this.size); - container.start = 0; - while (off < max_end and std.ascii.isDigit(this.charAt(off))) { - container.start = container.start * 10 + (this.charAt(off) - '0'); - off += 1; - } - - if (off > beg and - off < this.size and - (this.isAnyOf2(off, '.', ')')) and - (off + 1 >= this.size or - this.isBlank(this.charAt(off + 1) or - this.isNewline(off + 1)))) - { - container.ch = this.charAt(off); - container.is_loose = false; - container.is_task = false; - container.mark_indent = indent; - container.contents_indent = indent + off - beg + 1; - end.* = off + 1; - return true; - } - - return false; - } - - fn analyzeLine(this: *MDParser, beg: u32, end: *u32, pivot_line: *const Line.Analysis, line: *Line.Analysis) !void { - _ = this; - _ = beg; - _ = end; - _ = pivot_line; - _ = line; - var off = beg; - var hr_killer: u32 = 0; - var prev_line_has_list_loosening_effect = this.last_line_has_list_loosening_effect; - var container = Container{}; - _ = hr_killer; - _ = prev_line_has_list_loosening_effect; - _ = container; - var total_indent: u32 = 0; - var n_parents: u32 = 0; - var n_brothers: u32 = 0; - var n_children: u32 = 0; - - // Given the indentation and block quote marks '>', determine how many of - // the current containers are our parents. - while (n_parents < this.containers.len) { - var c: *Container = this.containers.ptr + n_parents; - - if (c.ch == '>' and line.indent < this.code_indent_offset and off < this.size and this.charAt(off) == '>') { - off += 1; - total_indent += 1; - line.indent = this.getIndent(total_indent, off, &off); - total_indent += line.indent; - - // The optional 1st space after '>' is part of the block quote mark. - line.indent -|= line.indent; - line.beg = off; - } else if (c.ch != '>' and line.indent >= c.contents_indent) { - line.indent -|= c.contents_indent; - } else { - break; - } - - n_parents += 1; - } - - if (off >= this.size or this.isNewline(off)) { - // Blank line does not need any real indentation to be nested inside a list - if (n_brothers + n_children == 0) { - while (n_parents < this.containers.len and this.containers.ptr[n_parents].ch == '>') { - n_parents += 1; - } - } - } - - while (true) { - switch (pivot_line.tag) { - .fencedcode => { - // Check whether we are fenced code continuation. - line.beg = off; - - // We are another MD_LINE_FENCEDCODE unless we are closing fence - // which we transform into MD_LINE_BLANK. - if (line.indent < this.code_indent_offset) { - if (this.isClosingCodeFence(this.charAt(pivot_line.beg), off, &off)) { - line.tag = .blank; - this.last_line_has_list_loosening_effect = false; - break; - } - } - - // Change indentation accordingly to the initial code fence. - if (n_parents == this.containers.len) { - line.indent -|= pivot_line.indent; - line.tag = .fenced_code; - break; - } - }, - - .indentedcode => {}, - .text => {}, - - .html => {}, - else => {}, - } - - // Check for blank line. - if (off >= this.size or this.isNewline(off)) { - if (pivot_line.tag == .indented_code and n_parents == this.containers.len) { - line.tag = .indented_code; - line.indent -|= this.code_indent_offset; - this.last_line_has_list_loosening_effect = false; - } else { - line.tag = .blank; - this.last_line_has_list_loosening_effect = n_parents > 0 and - n_brothers + n_children == 0 and - this.containers.ptr[n_parents - 1].ch != '>'; - - // See https://github.com/mity/md4c/issues/6 - // - // This ugly checking tests we are in (yet empty) list item but - // not its very first line (i.e. not the line with the list - // item mark). - // - // If we are such a blank line, then any following non-blank - // line which would be part of the list item actually has to - // end the list because according to the specification, "a list - // item can begin with at most one blank line." - // - if (n_parents > 0 and this.containers.ptr[n_parents - 1].ch != '>' and n_brothers + n_children == 0 and this.current_block == null and this.blocks.len > 0) { - var top_block = this.blocks.last().?; - if (top_block.tag == .li) { - this.last_list_item_starts_with_two_blank_lines = true; - } - } - } - break; - } else { - // This is the 2nd half of the hack. If the flag is set (i.e. there - // was a 2nd blank line at the beginning of the list item) and if - // we would otherwise still belong to the list item, we enforce - // the end of the list. - this.last_line_has_list_loosening_effect = false; - if (this.last_list_item_starts_with_two_blank_lines) { - if (n_parents > 0 and - this.containers.ptr[n_parents - 1].ch != '>' and - n_brothers + n_children == 0 and - this.current_block == null and this.blocks.len > 1) - { - var top = this.blocks.last().?; - if (top.tag == .li) { - n_parents -|= 1; - } - } - this.last_line_has_list_loosening_effect = true; - } - } - - // Check whether we are Setext underline. - if (line.indent < this.code_indent_offset and - pivot_line.tag == .text and - off < this.size and - this.isAnyOf2(off, '=', '-') and - n_parents == this.containers.len) - { - var level: u4 = 0; - if (this.isSetextUnderline(off, &off, &level)) { - line.tag = .setext_underline; - line.data = level; - break; - } - } - - // Check for a thematic break line - if (line.indent < this.code_indent_offset and off < this.size and off >= hr_killer and this.isAnyOf(off, "-_*")) { - if (this.isHRLine(off, &off, &hr_killer)) { - line.tag = .hr; - break; - } - } - - // Check for "brother" container. I.e. whether we are another list item - //in already started list. - if (n_parents < this.containers.len and n_brothers + n_children == 0) { - var tmp: u32 = undefined; - - if (this.isContainerMark(line.indent, off, &tmp, &container) and - isContainerCompatible(&this.containers.ptr[n_parents], &container)) - { - pivot_line.* = Line.Analysis.blank; - off = tmp; - - total_indent += container.contents_indent - container.mark_indent; - line.indent = this.getIndent(total_indent, off, &off); - total_indent += line.indent; - line.beg = off; - - // Some of the following whitespace actually still belongs to the mark. - if (off >= this.size or this.isNewline(off)) { - container.contents_indent += 1; - } else if (line.indent <= this.code_indent_offset) { - container.contents_indent += line.indent; - line.indent = 0; - } else { - container.contents_indent += 1; - line.indent -= 1; - } - - this.containers.ptr[n_parents].mark_indent = container.mark_indent; - this.containers.ptr[n_parents].contents_indent = container.contents_indent; - n_brothers += 1; - continue; - } - } - - // Check for indented code - // Note: indented code block cannot interrupt a paragrpah - if (line.indent >= this.code_indent_offset and - (pivot_line.tag == .blank or - pivot_line.tag == .indented_code)) - { - line.tag = .indented_code; - std.debug.assert(line.indent >= this.code_indent_offset); - line.indent -|= this.code_indent_offset; - line.data = 0; - break; - } - - // Check for start of a new container block - if (line.indent < this.code_indent_offset and - this.isContainerMark(line.indent, off, &off, &container)) - { - if (pivot_line.tag == .text and - n_parents == this.n_containers and - (off >= this.size or this.isNewline(off)) and - container.ch != '>') - { - // Noop. List mark followed by a blank line cannot interrupt a paragraph. - } else if (pivot_line.tag == .text and - n_parents == this.containers.len and - isAnyOf2_(container.ch, '.', ')')) - { - // Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. - } else { - total_indent += container.contents_indent - container.mark_indent; - line.indent = this.getIndent(total_indent, off, &off); - total_indent += line.indent; - - line.beg = off; - line.data = container.ch; - - // Some of the following whitespace actually still belongs to the mark. - if (off >= this.size or this.isNewline(off)) { - container.contents_indent += 1; - } else if (line.indent <= this.code_indent_offset) { - container.contents_indent += line.indent; - line.indent = 0; - } else { - container.contents_indent += 1; - line.indent -= 1; - } - - if (n_brothers + n_children == 0) { - pivot_line.* = Line.Analysis.blank; - } - - if (n_children == 0) { - try this.leaveChildContainers(n_parents + n_brothers); - } - - n_children += 1; - try this.pushContainer(container); - continue; - } - } - - // heck whether we are table continuation. - if (pivot_line.tag == .table and n_parents == this.n_containers) { - line.tag = .table; - break; - } - - // heck for ATX header. - if (line.indent < this.code_indent_offset and off < this.size and this.isAnyOf(off, '#')) { - var level: u4 = 0; - if (this.isATXHeaderLine(off, &line.beg, &off, &level)) { - line.tag = .atx_header; - line.data = level; - break; - } - } - - // Check whether we are starting code fence. - if (off < this.size and this.isAnyOf2(off, '`', '~')) { - if (this.isOpeningCodeFence(off, &off)) { - line.tag = .fenced_code; - line.data = 1; - break; - } - } - - // Check for start of raw HTML block. - if (off < this.size and !this.flags.contains(.no_html_blocks) and this.charAt(off) == '<') {} - - // Check for table underline. - if (this.flags.contains(.tables) and pivot_line.tag == .text and off < this.size and this.isAnyOf(off, "|-:") and n_parents == this.containers.len) { - var col_count: u32 = undefined; - - if (this.current_block != null and this.current_block.?.line_count == 1 and this.isTableUnderline(off, &off, &col_count)) { - line.data = col_count; - line.tag = .table_underline; - break; - } - } - - // By default, we are normal text line. - line.tag = .text; - if (pivot_line.tag == .text and n_brothers + n_children == 0) { - // lazy continuation - n_parents = this.containers.len; - } - - // Check for task mark. - if (this.flags.contains(.tasklists) and - n_brothers + n_children > 0 and - off < this.size and - isCharAnyOf(this.containers.last().?.ch, "-+*.)")) - { - var tmp: u32 = off; - - while (tmp < this.size and tmp < off + 3 and isBlank(tmp)) { - tmp += 1; - } - - if ((tmp + 2 < this.size and - this.charAt(tmp) == '[' and - this.isAnyOf(tmp + 1, "xX ") and - this.charAt(tmp + 2) == ']') and - (tmp + 3 == this.size or - isBlank(this.charAt(tmp + 3)) or - this.isNewline(tmp + 3))) - { - var task_container: *Container = if (n_children > 0) this.containers.last().? else &container; - task_container.is_task = true; - task_container.task_mark_off = tmp + 1; - off = tmp + 3; - while (off < this.size and isWhitespace(this.charAt(off))) { - off += 1; - } - if (off == this.size) break; - line.beg = off; - } - } - - break; - } - - // Scan for end of the line. - while (!(strings.hasPrefixComptime(this.source.contents.ptr[off..], "\n\n\n\n") or - strings.hasPrefixComptime(this.source.contents.ptr[off..], "\r\n\r\n"))) - { - off += 4; - } - - while (off < this.size and !this.isNewline(off)) { - off += 1; - } - - // Set end of line - line.end = off; - - // ut for ATX header, we should exclude the optional trailing mark. - if (line.type == .atx_header) { - var tmp = line.end; - while (tmp > line.beg and this.charAt(tmp - 1) == ' ') { - tmp -= 1; - } - - while (tmp > line.beg and this.charAt(tmp - 1) == '#') { - tmp -= 1; - } - - if (tmp == line.beg or this.charAt(tmp - 1) == ' ' or this.flags.contains(.permissive_atxheaders)) { - line.end = tmp; - } - } - - // Trim trailing spaces. - switch (line.tag) { - .indented_code, .fenced_code => {}, - else => { - while (line.end > line.beg and this.charAt(line.end - 1) == ' ') { - line.end -= 1; - } - }, - } - - // Eat also the new line - if (off < this.size and this.charAt(off) == '\r') { - off += 1; - } - - if (off < this.size and this.charAt(off) == '\n') { - off += 1; - } - - end.* = off; - - // If we belong to a list after seeing a blank line, the list is loose. - if (prev_line_has_list_loosening_effect and line.tag != .blank and n_parents + n_brothers > 0) { - var c: *Container = this.containers.ptr[n_parents + n_brothers - 1]; - if (c.ch != '>') { - var block: *Block = this.blocks.ptr[c.block_index]; - block.flags.insert(.loose_list); - } - } - - // Leave any containers we are not part of anymore. - if (n_children == 0 and n_parents + n_brothers < this.containers.len) { - try this.leaveChildContainers(n_parents + n_brothers); - } - - // Enter any container we found a mark for - if (n_brothers > 0) { - std.debug.assert(n_brothers == 0); - try this.pushContainerBytes( - Block.Tag.li, - this.containers.ptr[n_parents].task_mark_off, - if (this.containers.ptr[n_parents].is_task) this.charAt(this.containers.ptr[n_parents].task_mark_off) else 0, - Block.Flags.container_closer, - ); - try this.pushContainerBytes( - Block.Tag.li, - container.task_mark_off, - if (container.is_task) this.charAt(container.task_mark_off) else 0, - Block.Flags.container_opener, - ); - this.containers.ptr[n_parents].is_task = container.is_task; - this.containers.ptr[n_parents].task_mark_off = container.task_mark_off; - } - - if (n_children > 0) { - try this.enterChildContainers(n_children); - } - } - fn processLine(this: *MDParser, p_pivot_line: **const Line.Analysis, line: *Line.Analysis) !void { - var pivot_line = p_pivot_line.*; - - switch (line.tag) { - .blank => { - // Blank line ends current leaf block. - try this.endCurrentBlock(); - p_pivot_line.* = Line.Analysis.blank; - }, - .hr, .atx_header => { - try this.endCurrentBlock(); - - // Add our single-line block - try this.startNewBlock(line); - try this.addLineIntoCurrentBlock(line); - try this.endCurrentBlock(); - p_pivot_line.* = &Line.Analysis.blank; - }, - .setext_underline => { - this.current_block.?.tag = .table; - this.current_block.?.data = line.data; - this.current_block.?.flags.insert(.setext_header); - try this.addLineIntoCurrentBlock(line); - try this.endCurrentBlock(); - if (this.current_block == null) { - p_pivot_line.* = &Line.Analysis.blank; - } else { - // This happens if we have consumed all the body as link ref. defs. - //and downgraded the underline into start of a new paragraph block. - line.tag = .text; - p_pivot_line.* = line; - } - }, - // MD_LINE_TABLEUNDERLINE changes meaning of the current block. - .table_underline => { - var current_block = this.current_block.?; - std.debug.assert(current_block.line_count == 1); - current_block.tag = .table; - current_block.data = line.data; - std.debug.assert(pivot_line != &Line.Analysis.blank); - @intToPtr(*Line.Analysis, @ptrToInt(p_pivot_line.*)).tag = .table; - try this.addLineIntoCurrentBlock(line); - }, - else => { - // The current block also ends if the line has different type. - if (line.tag != pivot_line.tag) { - try this.endCurrentBlock(); - } - - // The current line may start a new block. - if (this.current_block == null) { - try this.startNewBlock(line); - p_pivot_line.* = line; - } - - // In all other cases the line is just a continuation of the current block. - try this.addLineIntoCurrentBlock(line); - }, - } - } - fn consumeLinkReferenceDefinitions(this: *MDParser) !void { - _ = this; - } - fn addLineIntoCurrentBlock(this: *MDParser, analysis: *const Line.Analysis) !void { - var current_block = this.current_block.?; - - switch (current_block.tag) { - .code, .html => { - if (current_block.line_count > 0) - std.debug.assert( - this.verbatim_lines.len == current_block.line_count + current_block.line_offset, - ); - if (current_block.line_count == 0) { - current_block.line_offset = this.verbatim_lines.len; - } - - try this.verbatim_lines.push(this.allocator, Line.Verbatim{ - .indent = analysis.indent, - .line = .{ - .beg = analysis.beg, - .end = analysis.end, - }, - }); - }, - else => { - if (current_block.line_count > 0) - std.debug.assert( - this.lines.len == current_block.line_count + current_block.line_offset, - ); - if (current_block.line_count == 0) { - current_block.line_offset = this.lines.len; - } - this.lines.push(this.allocator, .{ .beg = analysis.beg, .end = analysis.end }); - }, - } - - current_block.line_count += 1; - } - fn endCurrentBlock(this: *MDParser) !void { - _ = this; - - var block = this.current_block orelse return; - // Check whether there is a reference definition. (We do this here instead - // of in md_analyze_line() because reference definition can take multiple - // lines.) */ - if ((block.tag == .p or block.tag == .h) and block.flags.contains(.setext_header)) { - var lines = block.lines(this.lines); - if (lines[0].beg == '[') { - try this.consumeLinkReferenceDefinitions(); - block = this.current_block orelse return; - } - } - - if (block.tag == .h and block.flags.contains(.setext_header)) { - var n_lines = block.line_count; - if (n_lines > 1) { - // get rid of the underline - if (this.lines.len == block.line_count + block.line_offset) { - this.lines.len -= 1; - } - block.line_count -= 1; - } else { - // Only the underline has left after eating the ref. defs. - // Keep the line as beginning of a new ordinary paragraph. */ - block.tag = .p; - } - } - - // Mark we are not building any block anymore. - this.current_block = null; - this.current_block_index -|= 1; - } - fn buildRefDefHashTable(this: *MDParser) !void { - _ = this; - } - fn leaveChildContainers(this: *MDParser, keep: u32) !void { - _ = this; - while (this.containers.len > keep) { - var c = this.containers.last().?; - var is_ordered_list = false; - switch (c.ch) { - ')', '.' => { - is_ordered_list = true; - }, - '-', '+', '*' => { - try this.pushContainerBytes( - Block.Tag.li, - c.task_mark_off, - if (c.is_task) this.charAt(c.task_mark_off) else 0, - Block.Flags.container_closer, - ); - try this.pushContainerBytes( - if (is_ordered_list) Block.Tag.ol else Block.Tag.ul, - c.ch, - if (c.is_task) this.charAt(c.task_mark_off) else 0, - Block.Flags.container_closer, - ); - }, - '>' => { - try this.pushContainerBytes( - Block.Tag.quote, - 0, - 0, - Block.Flags.container_closer, - ); - }, - else => unreachable, - } - - this.containers.len -= 1; - } - } - fn enterChildContainers(this: *MDParser, keep: u32) !void { - _ = this; - var i: u32 = this.containers.len - keep; - while (i < this.containers.len) : (i += 1) { - var c: *Container = this.containers.ptr[i]; - var is_ordered_list = false; - - switch (c.ch) { - ')', '.' => { - is_ordered_list = true; - }, - '-', '+', '*' => { - // Remember offset in ctx.block_bytes so we can revisit the - // block if we detect it is a loose list. - try this.endCurrentBlock(); - c.block_index = this.blocks.len; - - try this.pushContainerBytes( - if (is_ordered_list) Block.Tag.ol else Block.Tag.ul, - c.start, - c.ch, - Block.Flags.container_opener, - ); - try this.pushContainerBytes( - Block.Tag.li, - c.task_mark_off, - if (c.is_task) this.charAt(c.task_mark_off) else 0, - Block.Flags.container_opener, - ); - }, - '>' => { - try this.pushContainerBytes( - Block.Tag.quote, - 0, - 0, - Block.Flags.container_opener, - ); - }, - else => unreachable, - } - } - } - fn pushContainer(this: *MDParser, container: Container) !void { - try this.containers.push(this.allocator, container); - } - - fn processLeafBlock(this: *MDParser, comptime tag: Block.Tag, block: *Block) anyerror!void { - const BlockDetailType = comptime switch (tag) { - Block.Tag.h => Block.Header, - Block.Tag.code => Block.Code, - Block.Tag.table => Block.Table, - }; - - const is_in_tight_list = if (this.containers.len == 0) - false - else - !this.containers.ptr[this.containers.len - 1].is_loose; - - const detail: BlockDetailType = switch (comptime tag) { - Block.Tag.h => @truncate(Block.Header, block.data), - Block.Tag.code => try this.setupFencedCodeDetail(block), - Block.Tag.table => .{ - .col_count = block.data, - .head_row_count = 1, - .body_row_count = block.line_count -| 2, - }, - else => {}, - }; - - if (!is_in_tight_list or comptime tag != .p) { - try this.mdx.onEnterBlock(block.tag, BlockDetailType, detail); - } - - defer { - if (comptime tag == Block.Tag.code) {} - } - } - - fn pushContainerBytes(this: *MDParser, block_type: Block.Tag, start: u32, data: u32, flag: Block.Flags) !void { - try this.endCurrentBlock(); - var block = Block{ - .tag = block_type, - .line_count = start, - .data = data, - }; - block.flags.insert(flag); - var prev_block: ?Block = null; - if (this.current_block) |curr| { - prev_block = curr.*; - } - - try this.blocks.push(this.allocator, block); - if (prev_block != null) { - this.current_block = this.blocks.ptr[this.current_block_index]; - } - } - fn processBlock(this: *MDParser, comptime tag: Block.Tag, block: *Block) !void { - const detail: Block.Detail = - switch (comptime tag) { - .ul => Block.Detail{ - .ul = .{ - .is_tight = !block.flags.contains(.loose_list), - .mark = @truncate(u8, block.data), - }, - }, - .ol => Block.Detail{ - .ol = .{ - .start = block.line_count, - .is_tight = !block.flags.contains(.loose_list), - .mark_delimiter = @truncate(u8, block.data), - }, - }, - .li => Block.Detail{ - .li = .{ - .is_task = block.data != 0, - .task_mark = @truncate(u8, block.data), - .task_mark_offset = @intCast(u32, block.line_count), - }, - }, - else => Block.Detail{ .none = .{} }, - }; - - if (block.flags.contains(.container)) { - if (block.flags.contains(.container_closer)) { - switch (block.tag) { - .li => try this.mdx.onLeaveBlock(tag, Block.LI, detail.li), - .ul => try this.mdx.onLeaveBlock(tag, Block.UL, detail.ul), - .ol => try this.mdx.onLeaveBlock(tag, Block.OL, detail.ol), - else => try this.mdx.onLeaveBlock(block.tag, void, {}), - } - this.containers.len -|= switch (block.tag) { - .ul, .ol, .blockquote => 1, - else => 0, - }; - } - - if (block.flags.contains(.container_opener)) { - switch (comptime tag) { - .li => try this.mdx.onEnterBlock(tag, Block.LI, detail.li), - .ul => try this.mdx.onEnterBlock(tag, Block.UL, detail.ul), - .ol => try this.mdx.onEnterBlock(tag, Block.OL, detail.ol), - else => try this.mdx.onEnterBlock(block.tag, void, {}), - } - - switch (comptime tag) { - .ul, .ol => { - this.containers.ptr[this.containers.len].is_loose = block.flags.contains(.loose_list); - this.containers.len += 1; - }, - .blockquote => { - // This causes that any text in a block quote, even if - // nested inside a tight list item, is wrapped with - // <p>...</p>. */ - this.containers.ptr[this.containers.len].is_loose = true; - this.containers.len += 1; - }, - else => {}, - } - } - } else { - try this.processLeafBlock(tag, block); - } - } - fn processAllBlocks(this: *MDParser) !void { - _ = this; - - // ctx->containers now is not needed for detection of lists and list items - // so we reuse it for tracking what lists are loose or tight. We rely - // on the fact the vector is large enough to hold the deepest nesting - // level of lists. - this.containers.len = 0; - var blocks = this.blocks.slice(); - for (&blocks) |*block| {} - } - fn isContainerCompatible(pivot: *const Container, container: *const Container) bool { - // Block quote has no "items" like lists. - if (container.ch == '>') return false; - - if (container.ch != pivot.ch) - return false; - - if (container.mark_indent > pivot.contents_indent) - return false; - return true; - } - - fn isHRLine(this: *MDParser, beg: u32, end: *u32, hr_killer: *u32) bool { - var off = beg + 1; - var n: u32 = 1; - - while (off < this.size and (this.charAt(off) == this.charAt(beg) or this.charAt(off) == ' ' or this.charAt(off) == '\t')) { - if (this.charAt(off) == this.charAt(beg)) - n += 1; - off += 1; - } - - if (n < 3) { - hr_killer.* = off; - return false; - } - - // Nothing else can be present on the line. */ - if (off < this.size and !this.isNewline(off)) { - hr_killer.* = off; - return false; - } - - end.* = off; - return true; - } - - fn isSetextUnderline(this: *MDParser, beg: u32, end: *u32, level: *u4) bool { - var off = beg + 1; - while (off < this.size and this.charAt(off) == this.charAt(beg)) - off += 1; - - // Optionally, space(s) can follow. */ - while (off < this.size and this.charAt(off) == ' ') - off += 1; - - // But nothing more is allowed on the line. - if (off < this.size and !this.isNewline(off)) - return false; - level.* = if (this.charAt(beg) == '=') 1 else 2; - end.* = off; - return true; - } - - fn isATXHeaderLine(this: *MDParser, beg: u32, p_beg: *u32, end: *u32, level: *u4) bool { - var n: i32 = undefined; - var off: u32 = beg + 1; - - while (off < this.size and this.charAt(off) == '#' and off - beg < 7) { - off += 1; - } - n = off - beg; - - if (n > 6) - return false; - level.* = @intCast(u4, n); - - if (!(this.flags.contains(.permissive_atxheaders)) and off < this.size and - this.charAt(off) != ' ' and this.charAt(off) != '\t' and !this.isNewline(off)) - return false; - - while (off < this.size and this.charAt(off) == ' ') { - off += 1; - } - - p_beg.* = off; - end.* = off; - - return true; - } - - fn isTableUnderline(this: *MDParser, beg: u32, end: *u32, column_column: *u32) bool { - _ = this; - _ = end; - _ = column_column; - - var off = beg; - var found_pipe = false; - var col_count: u32 = 0; - - if (off < this.size and this.charAt(off) == '|') { - found_pipe = true; - off += 1; - while (off < this.size and isWhitespace(this.charAt(off))) { - off += 1; - } - } - - while (true) { - var delimited = false; - - // Cell underline ("-----", ":----", "----:" or ":----:")if(off < this.size and this.charAt(off) == _T(':')) - off += 1; - if (off >= this.size or this.charAt(off) != '-') - return false; - while (off < this.size and this.charAt(off) == '-') - off += 1; - if (off < this.size and this.charAt(off) == ':') - off += 1; - - col_count += 1; - - // Pipe delimiter (optional at the end of line). */ - while (off < this.size and isWhitespace(this.charAt(off))) - off += 1; - if (off < this.size and this.charAt(off) == '|') { - delimited = true; - found_pipe = true; - off += 1; - while (off < this.size and isWhitespace(this.charAt(off))) - off += 1; - } - - // Success, if we reach end of line. - if (off >= this.size or this.isNewline(off)) - break; - - if (!delimited) - return false; - } - - if (!found_pipe) - return false; - - column_column.* = col_count; - end.* = off; - return true; - } - - fn isOpeningCodeFence(this: *MDParser, beg: u8, end: *u32) bool { - var off = beg; - const first = this.charAt(beg); - - while (off < this.size and this.charAt(off) == first) { - off += 1; - } - - // Fence must have at least three characters. - if (off - beg < 3) - return false; - - // Optionally, space(s) can follow - while (off < this.size and this.charAt(off) == ' ') { - off += 1; - } - - // Optionally, an info string can follow. - while (off < this.size and !this.isNewline(this.charAt(off))) { - // Backtick-based fence must not contain '`' in the info string. - if (first == '`' and this.charAt(off) == '`') - return false; - off += 1; - } - - end.* = off; - return true; - } - - fn isClosingCodeFence(this: *MDParser, ch: u8, beg: u8, end: *u32) bool { - var off = beg; - - defer { - end.* = off; - } - - while (off < this.size and this.charAt(off) == ch) { - off += 1; - } - - if (off - beg < this.code_fence_length) { - return false; - } - - // Optionally, space(s) can follow - while (off < this.size and this.charAt(off) == ' ') { - off += 1; - } - - // But nothing more is allowed on the line. - if (off < this.size and !this.isNewline(this.charAt(off))) - return false; - - return true; - } - - pub fn parse(this: *MDParser) anyerror!void { - var pivot_line = &Line.Analysis.blank; - var line_buf: [2]Line.Analysis = undefined; - var line = &line_buf[0]; - var offset: u32 = 0; - - try this.mdx.onEnterBlock(.doc, void, {}); - - const len: u32 = this.size; - while (offset < len) { - if (line == pivot_line) { - line = if (line == &line_buf[0]) &line_buf[1] else &line_buf[0]; - } - - try this.analyzeLine(offset, &offset, pivot_line, line); - try this.processLine(&pivot_line, line); - } - - this.endCurrentBlock(); - - try this.buildRefDefHashTable(); - - this.leaveChildContainers(0); - this.processAllBlocks(); - try this.mdx.onLeaveBlock(.doc, void, {}); - } -}; - -pub const MDX = struct { - parser: JSParser, - log: *logger.Log, - allocator: std.mem.Allocator, - stmts: std.ArrayListUnmanaged(js_ast.Stmt) = .{}, - - pub const Options = struct {}; - - pub fn onEnterBlock(this: *MDX, tag: Block.Tag, comptime Detail: type, detail: Detail) anyerror!void { - _ = tag; - _ = detail; - _ = this; - } - - pub fn onLeaveBlock(this: *MDX, tag: Block.Tag, comptime Detail: type, detail: Detail) anyerror!void { - _ = tag; - _ = detail; - _ = this; - } - - pub fn onEnterSpan(this: *MDX, tag: Span.Tag, comptime Detail: type, detail: Detail) anyerror!void { - _ = tag; - _ = detail; - _ = this; - } - - pub fn onLeaveSpan(this: *MDX, tag: Span.Tag, comptime Detail: type, detail: Detail) anyerror!void { - _ = tag; - _ = detail; - _ = this; - } - - pub fn onText(this: *MDX, tag: Text, text: []const u8) anyerror!void { - _ = tag; - _ = text; - _ = this; - } - - pub inline fn source(p: *const MDX) *const logger.Source { - return &p.lexer.source; - } - - pub fn e(_: *MDX, t: anytype, loc: logger.Loc) Expr { - const Type = @TypeOf(t); - if (@typeInfo(Type) == .Pointer) { - return Expr.init(std.meta.Child(Type), t.*, loc); - } else { - return Expr.init(Type, t, loc); - } - } - - pub fn s(_: *MDX, t: anytype, loc: logger.Loc) Stmt { - const Type = @TypeOf(t); - if (@typeInfo(Type) == .Pointer) { - return Stmt.init(std.meta.Child(Type), t.*, loc); - } else { - return Stmt.alloc(Type, t, loc); - } - } - - pub fn setup( - this: *MDX, - _options: ParserOptions, - log: *logger.Log, - source_: *const logger.Source, - define: *Define, - allocator: std.mem.Allocator, - ) !void { - try JSParser.init( - allocator, - log, - source_, - define, - js_lexer.Lexer.initNoAutoStep(log, source_.*, allocator), - _options, - &this.parser, - ); - this.lexer = try Lexer.init(&this.parser.lexer); - this.allocator = allocator; - this.log = log; - this.stmts = .{}; - } - - pub fn parse(this: *MDX) !js_ast.Result { - try this._parse(); - return try runVisitPassAndFinish(JSParser, &this.parser, try this.stmts.toOwnedSlice(this.allocator)); - } - - fn run(this: *MDX) anyerror!logger.Loc { - _ = this; - return logger.Loc.Empty; - } - - fn _parse(this: *MDX) anyerror!void { - var root_children = std.ArrayListUnmanaged(Expr){}; - var first_loc = try run(this, &root_children); - - first_loc.start = @max(first_loc.start, 0); - const args_loc = first_loc; - first_loc.start += 1; - const body_loc = first_loc; - - // We need to simulate a function that was parsed - _ = try this.parser.pushScopeForParsePass(.function_args, args_loc); - - _ = try this.parser.pushScopeForParsePass(.function_body, body_loc); - - const root = this.e(E.JSXElement{ - .tag = this.e(E.JSXElement.Tag.map.get(E.JSXElement.Tag.main), body_loc), - .children = ExprNodeList.fromList(root_children), - }, body_loc); - - var root_stmts = try this.allocator.alloc(Stmt, 1); - root_stmts[0] = this.s(S.Return{ .value = root }, body_loc); - - try this.stmts.append( - this.allocator, - - this.s(S.ExportDefault{ - .default_name = try this.parser.createDefaultName(args_loc), - .value = .{ - .expr = this.e(E.Arrow{ - .body = G.FnBody{ - .stmts = root_stmts, - .loc = body_loc, - }, - .args = &[_]G.Arg{}, - .prefer_expr = true, - }, args_loc), - }, - }, args_loc), - ); - } -}; |