2 files changed, 372 insertions, 10 deletions
diff --git a/src/js_ast.zig b/src/js_ast.zig
index e2717a752..965c17b46 100644
--- a/src/js_ast.zig
+++ b/src/js_ast.zig
@@ -268,6 +268,13 @@ pub fn BabyList(comptime Type: type) type {
             this.update(list_);
         }
 
+        pub fn fetchPush(this: *ListType, allocator: std.mem.Allocator, value: Type) OOM!*Type {
+            var list_ = this.list();
+            try list_.append(allocator, value);
+            this.update(list_);
+            return this.ptr[list_.items.len - 1];
+        }
+
         pub inline fn slice(this: ListType) []Type {
             @setRuntimeSafety(false);
             return this.ptr[0..this.len];
diff --git a/src/mdx/mdx_parser.zig b/src/mdx/mdx_parser.zig
index 6b35e638e..93dee1980 100644
--- a/src/mdx/mdx_parser.zig
+++ b/src/mdx/mdx_parser.zig
@@ -71,8 +71,17 @@ pub const Block = struct {
     /// OL:     Start item number.
     ///
     line_count: u32 = 0,
+    line_offset: u32 = 0,
     detail: Block.Detail = Block.Detail{ .none = .{} },
 
+    pub inline fn lines(this: Block, lines_: BabyList(Line)) []Line {
+        return lines_.ptr[this.line_offset .. this.line_offset + this.line_count];
+    }
+
+    pub inline fn verbatimLines(this: Block, lines_: BabyList(Line.Verbatim)) []Line.Verbatim {
+        return lines_.ptr[this.line_offset .. this.line_offset + this.line_count];
+    }
+
     pub const Data = u32;
 
     pub const Flags = enum(u3) {
@@ -159,9 +168,7 @@ pub const Block = struct {
         task_mark_off: u32 = 0,
     };
 
-    pub const Header = struct {
-        level: u3 = 0,
-    };
+    pub const Header = u4;
 
     pub const Code = struct {
         info: Attribute = .{},
@@ -322,6 +329,7 @@ pub const Substring = struct {
     tag: Text,
 
     pub const List = std.MultiArrayList(Substring);
+    pub const ListPool = ObjectPool(List);
 };
 
 pub const Mark = struct {
@@ -454,9 +462,13 @@ pub const MDParser = struct {
     doc_ends_with_newline: bool = false,
     size: u32 = 0,
 
+    lines: BabyList(Line) = .{},
+    verbatim_lines: BabyList(Line.Verbatim) = .{},
+
     containers: BabyList(Container) = .{},
     blocks: BabyList(Block) = .{},
     current_block: ?*Block = null,
+    current_block_index: u32 = 0,
 
     code_fence_length: u32 = 0,
     code_indent_offset: u32 = std.math.maxInt(u32),
@@ -564,9 +576,26 @@ pub const MDParser = struct {
         return parser;
     }
 
-    fn startNewBlock(this: *MDParser, line: *Line.Analysis) !void {
-        _ = this;
-        _ = line;
+    fn startNewBlock(this: *MDParser, line: *const Line.Analysis) !void {
+        try this.blocks.push(
+            this.allocator,
+            Block{
+                .tag = switch (line.tag) {
+                    .hr => Block.Tag.hr,
+                    .atx_header, .setext_header => Block.Tag.h,
+                    .fenced_code, .indented_code => Block.Tag.code,
+                    .text => Block.Tag.p,
+                    .html => Block.Tag.html,
+                    else => unreachable,
+                },
+                .data = line.data,
+                .line_count = 0,
+                .line_offset = switch (line.tag) {
+                    .indented_code, .html, .fenced_code => this.verbatim_lines.len,
+                    else => this.lines.len,
+                },
+            },
+        );
     }
 
     inline fn charAt(this: *const MDParser, index: u32) u8 {
@@ -1137,7 +1166,7 @@ pub const MDParser = struct {
             },
             // MD_LINE_TABLEUNDERLINE changes meaning of the current block.
             .table_underline => {
-                var current_block = &this.current_block.?;
+                var current_block = this.current_block.?;
                 std.debug.assert(current_block.line_count == 1);
                 current_block.tag = .table;
                 current_block.data = line.data;
@@ -1162,8 +1191,77 @@ pub const MDParser = struct {
             },
         }
     }
+    fn consumeLinkReferenceDefinitions(this: *MDParser) !void {
+        _ = this;
+    }
+    fn addLineIntoCurrentBlock(this: *MDParser, analysis: *const Line.Analysis) !void {
+        var current_block = this.current_block.?;
+
+        switch (current_block.tag) {
+            .code, .html => {
+                if (current_block.line_count > 0)
+                    std.debug.assert(
+                        this.verbatim_lines.len == current_block.line_count + current_block.line_offset,
+                    );
+                if (current_block.line_count == 0) {
+                    current_block.line_offset = this.verbatim_lines.len;
+                }
+
+                try this.verbatim_lines.push(this.allocator, Line.Verbatim{
+                    .indent = analysis.indent,
+                    .line = .{
+                        .beg = analysis.beg,
+                        .end = analysis.end,
+                    },
+                });
+            },
+            else => {
+                if (current_block.line_count > 0)
+                    std.debug.assert(
+                        this.lines.len == current_block.line_count + current_block.line_offset,
+                    );
+                if (current_block.line_count == 0) {
+                    current_block.line_offset = this.lines.len;
+                }
+                this.lines.push(this.allocator, .{ .beg = analysis.beg, .end = analysis.end });
+            },
+        }
+
+        current_block.line_count += 1;
+    }
     fn endCurrentBlock(this: *MDParser) !void {
         _ = this;
+
+        var block = this.current_block orelse return;
+        // Check whether there is a reference definition. (We do this here instead
+        // of in md_analyze_line() because reference definition can take multiple
+        // lines.) */
+        if ((block.tag == .p or block.tag == .h) and block.flags.contains(.setext_header)) {
+            var lines = block.lines(this.lines);
+            if (lines[0].beg == '[') {
+                try this.consumeLinkReferenceDefinitions();
+                block = this.current_block orelse return;
+            }
+        }
+
+        if (block.tag == .h and block.flags.contains(.setext_header)) {
+            var n_lines = block.line_count;
+            if (n_lines > 1) {
+                // get rid of the underline
+                if (this.lines.len == block.line_count + block.line_offset) {
+                    this.lines.len -= 1;
+                }
+                block.line_count -= 1;
+            } else {
+                // Only the underline has left after eating the ref. defs.
+                // Keep the line as beginning of a new ordinary paragraph. */
+                block.tag = .p;
+            }
+        }
+
+        // Mark we are not building any block anymore.
+        this.current_block = null;
+        this.current_block_index -|= 1;
     }
     fn buildRefDefHashTable(this: *MDParser) !void {
         _ = this;
@@ -1217,7 +1315,7 @@ pub const MDParser = struct {
                     is_ordered_list = true;
                 },
                 '-', '+', '*' => {
-                    //  Remember offset in ctx->block_bytes so we can revisit the
+                    //  Remember offset in ctx.block_bytes so we can revisit the
                     // block if we detect it is a loose list.
                     try this.endCurrentBlock();
                     c.block_index = this.blocks.len;
@@ -1250,8 +1348,160 @@ pub const MDParser = struct {
     fn pushContainer(this: *MDParser, container: Container) !void {
         try this.containers.push(this.allocator, container);
     }
+
+    const LeafBlockDetail = union {
+        none: void,
+        h: Block.Header,
+        code: Block.Code,
+        table: Block.Table,
+    };
+
+    fn processLeafBlockWithType(this: *MDParser, comptime tag: Block.Tag, block: *Block) anyerror!void {
+        const BlockDetailType = comptime switch (tag) {
+            Block.Tag.h => Block.Header,
+            Block.Tag.code => Block.Code,
+            Block.Tag.table => Block.Table,
+            else => void,
+        };
+
+        const is_in_tight_list = if (this.containers.len == 0)
+            false
+        else
+            !this.containers.ptr[this.containers.len - 1].is_loose;
+
+        const detail: BlockDetailType = switch (comptime tag) {
+            Block.Tag.h => @truncate(Block.Header, block.data),
+            Block.Tag.code => try this.setupFencedCodeDetail(block),
+            Block.Tag.table => .{
+                .col_count = block.data,
+                .head_row_count = 1,
+                .body_row_count = block.line_count -| 2,
+            },
+            else => void{},
+        };
+
+        if (!is_in_tight_list or comptime tag != .p) {
+            try this.mdx.onEnterBlock(block.tag, BlockDetailType, detail);
+        }
+
+        defer {
+            if (comptime tag == Block.Tag.code) {}
+        }
+    }
+    fn processLeafBlock(this: *MDParser, block: *Block) anyerror!void {
+        return switch (block.tag) {
+            .doc => try this.processLeafBlockWithType(Block.Tag.doc, block),
+            .quote => try this.processLeafBlockWithType(Block.Tag.quote, block),
+            .ul => try this.processLeafBlockWithType(Block.Tag.ul, block),
+            .ol => try this.processLeafBlockWithType(Block.Tag.ol, block),
+            .li => try this.processLeafBlockWithType(Block.Tag.li, block),
+            .hr => try this.processLeafBlockWithType(Block.Tag.hr, block),
+            .h => try this.processLeafBlockWithType(Block.Tag.h, block),
+            .code => try this.processLeafBlockWithType(Block.Tag.code, block),
+            .html => try this.processLeafBlockWithType(Block.Tag.html, block),
+            .p => try this.processLeafBlockWithType(Block.Tag.p, block),
+            .table => try this.processLeafBlockWithType(Block.Tag.table, block),
+            .thead => try this.processLeafBlockWithType(Block.Tag.thead, block),
+            .tbody => try this.processLeafBlockWithType(Block.Tag.tbody, block),
+            .tr => try this.processLeafBlockWithType(Block.Tag.tr, block),
+            .th => try this.processLeafBlockWithType(Block.Tag.th, block),
+            .td => try this.processLeafBlockWithType(Block.Tag.td, block),
+        };
+    }
+    fn pushContainerBytes(this: *MDParser, block_type: Block.Tag, start: u32, data: u32, flag: Block.Flags) !void {
+        try this.endCurrentBlock();
+        var block = Block{
+            .tag = block_type,
+            .line_count = start,
+            .data = data,
+        };
+        block.flags.insert(flag);
+        var prev_block: ?Block = null;
+        if (this.current_block) |curr| {
+            prev_block = curr.*;
+        }
+
+        try this.blocks.push(this.allocator, block);
+        if (prev_block != null) {
+            this.current_block = this.blocks.ptr[this.current_block_index];
+        }
+    }
     fn processAllBlocks(this: *MDParser) !void {
         _ = this;
+
+        // ctx->containers now is not needed for detection of lists and list items
+        // so we reuse it for tracking what lists are loose or tight. We rely
+        // on the fact the vector is large enough to hold the deepest nesting
+        // level of lists.
+        this.containers.len = 0;
+        var blocks = this.blocks.slice();
+        for (blocks) |*block| {
+            const detail: Block.Detail =
+                switch (block.tag) {
+                .ul => Block.Detail{
+                    .ul = .{
+                        .is_tight = !block.flags.contains(.loose_list),
+                        .mark = @truncate(u8, block.data),
+                    },
+                },
+                .ol => Block.Detail{
+                    .ol = .{
+                        .start = block.line_count,
+                        .is_tight = !block.flags.contains(.loose_list),
+                        .mark_delimiter = @truncate(u8, block.data),
+                    },
+                },
+                .li => Block.Detail{
+                    .li = .{
+                        .is_task = block.data != 0,
+                        .task_mark = @truncate(u8, block.data),
+                        .task_mark_offset = @intCast(u32, block.line_count),
+                    },
+                },
+                else => Block.Detail{ .none = .{} },
+            };
+
+            if (block.flags.contains(.container)) {
+                if (block.flags.contains(.container_closer)) {
+                    switch (block.tag) {
+                        .li => try this.mdx.onLeaveBlock(block.tag, Block.LI, detail.li),
+                        .ul => try this.mdx.onLeaveBlock(block.tag, Block.UL, detail.ul),
+                        .ol => try this.mdx.onLeaveBlock(block.tag, Block.OL, detail.ol),
+                        else => try this.mdx.onLeaveBlock(block.tag, void, void{}),
+                    }
+                    this.containers.len -|= switch (block.tag) {
+                        .ul, .ol, .blockquote => 1,
+                        else => 0,
+                    };
+                }
+
+                if (block.flags.contains(.container_opener)) {
+                    switch (block.tag) {
+                        .li => try this.mdx.onEnterBlock(block.tag, Block.LI, detail.li),
+                        .ul => try this.mdx.onEnterBlock(block.tag, Block.UL, detail.ul),
+                        .ol => try this.mdx.onEnterBlock(block.tag, Block.OL, detail.ol),
+                        else => try this.mdx.onEnterBlock(block.tag, void, void{}),
+                    }
+
+                    switch (block.tag) {
+                        .ul, .ol => {
+                            this.containers.ptr[this.containers.len].is_loose = block.flags.contains(.loose_list);
+                            this.containers.len += 1;
+                        },
+                        .blockquote => {
+                            //  This causes that any text in a block quote, even if
+                            // nested inside a tight list item, is wrapped with
+                            // <p>...</p>. */
+                            this.containers.ptr[this.containers.len].is_loose = true;
+                            this.containers.len += 1;
+                        },
+                        else => {},
+                    }
+                }
+            } else {
+                try this.processLeafBlock(block);
+            }
+        }
     }
     fn isContainerCompatible(pivot: *const Container, container: *const Container) bool {
         // Block quote has no "items" like lists.
@@ -1265,7 +1515,76 @@ pub const MDParser = struct {
         return true;
     }
 
-    pub fn isTableUnderline(this: *MDParser, beg: u32, end: *u32, column_column: *u32) bool {
+    fn isHRLine(this: *MDParser, beg: u32, end: *u32, hr_killer: *u32) bool {
+        var off = beg + 1;
+        var n: u32 = 1;
+
+        while (off < this.size and (this.charAt(off) == this.charAt(beg) or this.charAt(off) == ' ' or this.charAt(off) == '\t')) {
+            if (this.charAt(off) == this.charAt(beg))
+                n += 1;
+            off += 1;
+        }
+
+        if (n < 3) {
+            hr_killer.* = off;
+            return false;
+        }
+
+        // Nothing else can be present on the line. */
+        if (off < this.size and !this.isNewline(off)) {
+            hr_killer.* = off;
+            return false;
+        }
+
+        end.* = off;
+        return true;
+    }
+
+    fn isSetextUnderline(this: *MDParser, beg: u32, end: *u32, level: *u4) bool {
+        var off = beg + 1;
+        while (off < this.size and this.charAt(off) == this.charAt(beg))
+            off += 1;
+
+        // Optionally, space(s) can follow. */
+        while (off < this.size and this.charAt(off) == ' ')
+            off += 1;
+
+        // But nothing more is allowed on the line.
+        if (off < this.size and !this.isNewline(off))
+            return false;
+        level.* = if (this.charAt(beg) == '=') 1 else 2;
+        end.* = off;
+        return true;
+    }
+
+    fn isATXHeaderLine(this: *MDParser, beg: u32, p_beg: *u32, end: *u32, level: *u4) bool {
+        var n: i32 = undefined;
+        var off: u32 = beg + 1;
+
+        while (off < this.size and this.charAt(off) == '#' and off - beg < 7) {
+            off += 1;
+        }
+        n = off - beg;
+
+        if (n > 6)
+            return false;
+        level.* = @intCast(u4, n);
+
+        if (!(this.flags.contains(.permissive_atxheaders)) and off < this.size and
+            this.charAt(off) != ' ' and this.charAt(off) != '\t' and !this.isNewline(off))
+            return false;
+
+        while (off < this.size and this.charAt(off) == ' ') {
+            off += 1;
+        }
+
+        p_beg.* = off;
+        end.* = off;
+
+        return true;
+    }
+
+    fn isTableUnderline(this: *MDParser, beg: u32, end: *u32, column_column: *u32) bool {
         _ = this;
         _ = end;
         _ = column_column;
@@ -1277,14 +1596,50 @@ pub const MDParser = struct {
         if (off < this.size and this.charAt(off) == '|') {
             found_pipe = true;
             off += 1;
-            while (off < this.size and this.charAt(off) == ' ') {
+            while (off < this.size and isWhitespace(this.charAt(off))) {
                 off += 1;
             }
         }
 
         while (true) {
             var delimited = false;
+
+            // Cell underline ("-----", ":----", "----:" or ":----:")if(off < this.size  and  this.charAt(off) == _T(':'))
+            off += 1;
+            if (off >= this.size or this.charAt(off) != '-')
+                return false;
+            while (off < this.size and this.charAt(off) == '-')
+                off += 1;
+            if (off < this.size and this.charAt(off) == ':')
+                off += 1;
+
+            col_count += 1;
+
+            // Pipe delimiter (optional at the end of line). */
+            while (off < this.size and isWhitespace(this.charAt(off)))
+                off += 1;
+            if (off < this.size and this.charAt(off) == '|') {
+                delimited = true;
+                found_pipe = true;
+                off += 1;
+                while (off < this.size and isWhitespace(this.charAt(off)))
+                    off += 1;
+            }
+
+            // Success, if we reach end of line.
+            if (off >= this.size or this.isNewline(off))
+                break;
+
+            if (!delimited)
+                return false;
         }
+
+        if (!found_pipe)
+            return false;
+
+        column_column.* = col_count;
+        end.* = off;
+        return true;
     }
 
     fn isOpeningCodeFence(this: *MDParser, beg: u8, end: *u32) bool {