From 33372697e5f5d1848582488c25a036fa387b60e5 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 01/30] sourcepos: fix(?) HtmlInline.

Pretty unsure about the "+ extra" but we'll see!
---
 src/parser/inlines.rs  | 39 ++++++++++++++++++++++++---------------
 src/tests/sourcepos.rs |  1 -
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index 0e1d0d46..cfe2bb54 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -191,10 +191,10 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         let new_inl: Option<&'a AstNode<'a>> = match c {
             '\0' => return false,
             '\r' | '\n' => Some(self.handle_newline()),
-            '`' => Some(self.handle_backticks()),
+            '`' => Some(self.handle_backticks(&node_ast.line_offsets)),
             '\\' => Some(self.handle_backslash()),
             '&' => Some(self.handle_entity()),
-            '<' => Some(self.handle_pointy_brace()),
+            '<' => Some(self.handle_pointy_brace(&node_ast.line_offsets)),
             ':' => {
                 let mut res = None;
 
@@ -288,7 +288,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
             '^' if self.options.extension.superscript && !self.within_brackets => {
                 Some(self.handle_delim(b'^'))
             }
-            '$' => Some(self.handle_dollars()),
+            '$' => Some(self.handle_dollars(&node_ast.line_offsets)),
             '|' if self.options.extension.spoiler => Some(self.handle_delim(b'|')),
             _ => {
                 let endpos = self.find_special_char();
@@ -603,11 +603,13 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         self.input.len()
     }
 
-    fn adjust_node_newlines(&mut self, node: &'a AstNode<'a>, matchlen: usize, extra: usize) {
-        if !self.options.render.sourcepos {
-            return;
-        }
-
+    fn adjust_node_newlines(
+        &mut self,
+        node: &'a AstNode<'a>,
+        matchlen: usize,
+        extra: usize,
+        parent_line_offsets: &[usize],
+    ) {
         let (newlines, since_newline) =
             count_newlines(&self.input[self.pos - matchlen - extra..self.pos - extra]);
 
@@ -615,7 +617,9 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
             self.line += newlines;
             let node_ast = &mut node.data.borrow_mut();
             node_ast.sourcepos.end.line += newlines;
-            node_ast.sourcepos.end.column = since_newline;
+            let adjusted_line = self.line - node_ast.sourcepos.start.line;
+            node_ast.sourcepos.end.column =
+                parent_line_offsets[adjusted_line] + since_newline + extra;
             self.column_offset = -(self.pos as isize) + since_newline as isize + extra as isize;
         }
     }
@@ -684,7 +688,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn handle_backticks(&mut self) -> &'a AstNode<'a> {
+    pub fn handle_backticks(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
         let openticks = self.take_while(b'`');
         let startpos = self.pos;
         let endpos = self.scan_to_closing_backtick(openticks);
@@ -703,7 +707,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
                 };
                 let node =
                     self.make_inline(NodeValue::Code(code), startpos, endpos - openticks - 1);
-                self.adjust_node_newlines(node, endpos - startpos, openticks);
+                self.adjust_node_newlines(node, endpos - startpos, openticks, parent_line_offsets);
                 node
             }
         }
@@ -781,7 +785,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
     }
 
     // Heuristics used from https://pandoc.org/MANUAL.html#extension-tex_math_dollars
-    pub fn handle_dollars(&mut self) -> &'a AstNode<'a> {
+    pub fn handle_dollars(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
         if self.options.extension.math_dollars || self.options.extension.math_code {
             let opendollars = self.take_while(b'$');
             let mut code_math = false;
@@ -849,7 +853,12 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
                         startpos,
                         endpos - fence_length - 1,
                     );
-                    self.adjust_node_newlines(node, endpos - startpos, fence_length);
+                    self.adjust_node_newlines(
+                        node,
+                        endpos - startpos,
+                        fence_length,
+                        parent_line_offsets,
+                    );
                     node
                 }
             }
@@ -1332,7 +1341,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         self.handle_autolink_with(node, autolink::www_match)
     }
 
-    pub fn handle_pointy_brace(&mut self) -> &'a AstNode<'a> {
+    pub fn handle_pointy_brace(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
         self.pos += 1;
 
         if let Some(matchlen) = scanners::autolink_uri(&self.input[self.pos..]) {
@@ -1426,7 +1435,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
                 self.pos - matchlen - 1,
                 self.pos - 1,
             );
-            self.adjust_node_newlines(inl, matchlen, 1);
+            self.adjust_node_newlines(inl, matchlen, 1, parent_line_offsets);
             return inl;
         }
 
diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs
index a18da6b3..d66bb83c 100644
--- a/src/tests/sourcepos.rs
+++ b/src/tests/sourcepos.rs
@@ -418,7 +418,6 @@ fn node_values() -> HashMap<NodeValueDiscriminants, TestCase> {
                     | DescriptionItem // end is 4:0
                     | DescriptionTerm // end is 3:0
                     | DescriptionDetails // end is 4:0
-                    | HtmlInline // end is 1:31 but should be 3:14
                     | LineBreak // start is 1:15 but should be 1:13
                     | Code // is 1:8-1:12 but should be 1:7-1:13
                     | ThematicBreak // end is 4:0

From 4729f60f655bfd39af021e01c53f52222aad4d4e Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 02/30] math: fix sourcepos test, mark #[ignore].

For consistency with other inlines, we should include the opening
and closing $ / $$ / $`; there shouldn't be characters in the source
document not covered by sourcepos.
---
 src/tests/math.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/tests/math.rs b/src/tests/math.rs
index 4136bc76..5920c50d 100644
--- a/src/tests/math.rs
+++ b/src/tests/math.rs
@@ -134,6 +134,7 @@ fn math_unrecognized_syntax_non_roundtrip(markdown: &str, html: &str) {
 }
 
 #[test]
+#[ignore]
 fn sourcepos() {
     assert_ast_match!(
         [extension.math_dollars, extension.math_code],
@@ -148,14 +149,14 @@ fn sourcepos() {
         "```\n",
         (document (1:1-9:3) [
             (paragraph (1:1-1:29) [
-                (math (1:2-1:4))
+                (math (1:1-1:5))
                 (text (1:6-1:10) " and ")
-                (math (1:13-1:15))
+                (math (1:11-1:17))
                 (text (1:18-1:22) " and ")
-                (math (1:25-1:27))
+                (math (1:23-1:29))
             ])
             (paragraph (3:1-5:2) [
-                (math (3:3-5:0))
+                (math (3:1-5:2))
             ])
             (code_block (7:1-9:3))
         ])

From 062865ca55d570c94855d9a5cb7ea3e9f1278200 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 03/30] inlines: fix Code sourcepos.

---
 src/parser/inlines.rs    | 22 +++++++++++++++-------
 src/tests/regressions.rs | 13 +++++++++++++
 src/tests/sourcepos.rs   |  1 -
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index cfe2bb54..ab8483f6 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -689,25 +689,33 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
     }
 
     pub fn handle_backticks(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
-        let openticks = self.take_while(b'`');
         let startpos = self.pos;
+        let openticks = self.take_while(b'`');
         let endpos = self.scan_to_closing_backtick(openticks);
 
         match endpos {
             None => {
-                self.pos = startpos;
-                self.make_inline(NodeValue::Text("`".repeat(openticks)), self.pos, self.pos)
+                self.pos = startpos + openticks;
+                self.make_inline(
+                    NodeValue::Text("`".repeat(openticks)),
+                    startpos,
+                    self.pos - 1,
+                )
             }
             Some(endpos) => {
-                let buf = &self.input[startpos..endpos - openticks];
+                let buf = &self.input[startpos + openticks..endpos - openticks];
                 let buf = strings::normalize_code(buf);
                 let code = NodeCode {
                     num_backticks: openticks,
                     literal: String::from_utf8(buf).unwrap(),
                 };
-                let node =
-                    self.make_inline(NodeValue::Code(code), startpos, endpos - openticks - 1);
-                self.adjust_node_newlines(node, endpos - startpos, openticks, parent_line_offsets);
+                let node = self.make_inline(NodeValue::Code(code), startpos, endpos - 1);
+                self.adjust_node_newlines(
+                    node,
+                    endpos - startpos - openticks,
+                    openticks,
+                    parent_line_offsets,
+                );
                 node
             }
         }
diff --git a/src/tests/regressions.rs b/src/tests/regressions.rs
index f0827134..a2456aac 100644
--- a/src/tests/regressions.rs
+++ b/src/tests/regressions.rs
@@ -133,3 +133,16 @@ fn sourcepos_para() {
 fn gemoji() {
     html_opts!([extension.shortcodes], ":x:", "<p>❌</p>\n");
 }
+
+#[test]
+fn sourcepos_lone_backtick() {
+    assert_ast_match!(
+        [],
+        "``\n",
+        (document (1:1-1:2) [
+            (paragraph (1:1-1:2) [
+                (text (1:1-1:2) "``")
+            ])
+        ])
+    );
+}
diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs
index d66bb83c..8f138c3a 100644
--- a/src/tests/sourcepos.rs
+++ b/src/tests/sourcepos.rs
@@ -419,7 +419,6 @@ fn node_values() -> HashMap<NodeValueDiscriminants, TestCase> {
                     | DescriptionTerm // end is 3:0
                     | DescriptionDetails // end is 4:0
                     | LineBreak // start is 1:15 but should be 1:13
-                    | Code // is 1:8-1:12 but should be 1:7-1:13
                     | ThematicBreak // end is 4:0
                     | Link // inconsistent between link types
                     | Math // is 3:2-3:6 but should be 3:1-3:7

From 888e521f93fd021e04eab01cdde84f60248a8a3e Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 04/30] inlines: fix (one) LineBreak sourcepos.

Need to check the other kind.
---
 src/parser/inlines.rs  | 2 +-
 src/tests/sourcepos.rs | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index ab8483f6..52ae348c 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -633,7 +633,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
             self.pos += 1;
         }
         let inl = if nlpos > 1 && self.input[nlpos - 1] == b' ' && self.input[nlpos - 2] == b' ' {
-            self.make_inline(NodeValue::LineBreak, nlpos, self.pos - 1)
+            self.make_inline(NodeValue::LineBreak, nlpos - 2, self.pos - 1)
         } else {
             self.make_inline(NodeValue::SoftBreak, nlpos, self.pos - 1)
         };
diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs
index 8f138c3a..06b919fc 100644
--- a/src/tests/sourcepos.rs
+++ b/src/tests/sourcepos.rs
@@ -418,7 +418,6 @@ fn node_values() -> HashMap<NodeValueDiscriminants, TestCase> {
                     | DescriptionItem // end is 4:0
                     | DescriptionTerm // end is 3:0
                     | DescriptionDetails // end is 4:0
-                    | LineBreak // start is 1:15 but should be 1:13
                     | ThematicBreak // end is 4:0
                     | Link // inconsistent between link types
                     | Math // is 3:2-3:6 but should be 3:1-3:7

From f8abcacf0bc1b8518e75ee0eddf810500b8e8dfb Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 05/30] tests: test other LineBreak sourcepos.

---
 src/tests/sourcepos.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs
index 06b919fc..4b703af8 100644
--- a/src/tests/sourcepos.rs
+++ b/src/tests/sourcepos.rs
@@ -266,7 +266,10 @@ hello world
 );
 
 const SOFT_BREAK: TestCase = (&[sourcepos!((1:13-1:13))], "stuff before\nstuff after");
-const LINE_BREAK: TestCase = (&[sourcepos!((1:13-1:15))], "stuff before  \nstuff after");
+const LINE_BREAK: TestCase = (
+    &[sourcepos!((1:13-1:15)), sourcepos!((4:13-4:14))],
+    "stuff before  \nstuff after\n\nstuff before\\\nstuff after\n",
+);
 
 const CODE: TestCase = (&[sourcepos!((1:7-1:13))], "hello `world`");
 

From 447443bb60bb76ec1ffda5ef2d6352e9d2706a42 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 06/30] inlines: fix basic autolinks. (Tests failing other
 links.)

---
 src/parser/inlines.rs  | 4 ++--
 src/tests/sourcepos.rs | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index 52ae348c..1016e74a 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -1990,8 +1990,8 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
                 url: String::from_utf8(strings::clean_autolink(url, kind)).unwrap(),
                 title: String::new(),
             }),
-            start_column + 1,
-            end_column + 1,
+            start_column,
+            end_column,
         );
         inl.append(self.make_inline(
             NodeValue::Text(String::from_utf8(entity::unescape_html(url)).unwrap()),
diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs
index 4b703af8..4811f4f2 100644
--- a/src/tests/sourcepos.rs
+++ b/src/tests/sourcepos.rs
@@ -422,7 +422,6 @@ fn node_values() -> HashMap<NodeValueDiscriminants, TestCase> {
                     | DescriptionTerm // end is 3:0
                     | DescriptionDetails // end is 4:0
                     | ThematicBreak // end is 4:0
-                    | Link // inconsistent between link types
                     | Math // is 3:2-3:6 but should be 3:1-3:7
                     | Raw // unparseable
             )

From 68b2b9ae2d5d9b36c7115cbce7ad4ff43e8db4f0 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 07/30] inlines: fix sourcepos for colon/w autolinks; add tests
 (email still broken).

---
 src/parser/inlines.rs  | 30 +++++++++++++++++++++++++-----
 src/tests/sourcepos.rs |  4 ++++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index 1016e74a..e3dc237b 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -1304,18 +1304,19 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         if !self.options.parse.relaxed_autolinks && self.within_brackets {
             return None;
         }
-        let (post, mut reverse, skip) = f(
+        let startpos = self.pos;
+        let (post, need_reverse, skip) = f(
             self.arena,
             self.input,
             self.pos,
             self.options.parse.relaxed_autolinks,
         )?;
 
-        self.pos += skip - reverse;
+        self.pos += skip - need_reverse;
 
-        // We need to "rewind" by `reverse` chars, which should be in one or
-        // more Text nodes beforehand. Typically the chars will *all* be in a
-        // single Text node, containing whatever text came before the ":" that
+        // We need to "rewind" by `need_reverse` chars, which should be in one
+        // or more Text nodes beforehand. Typically the chars will *all* be in
+        // a single Text node, containing whatever text came before the ":" that
         // triggered this method, eg. "See our website at http" ("://blah.com").
         //
         // relaxed_autolinks allows some slightly pathological cases. First,
@@ -1323,6 +1324,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         // a scheme including the letter "w", which will split Text inlines due
         // to them being their own trigger (for handle_autolink_w), meaning
         // "wa://…" will need to traverse two Texts to complete the rewind.
+        let mut reverse = need_reverse;
         while reverse > 0 {
             match node.last_child().unwrap().data.borrow_mut().value {
                 NodeValue::Text(ref mut prev) => {
@@ -1338,6 +1340,24 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
             }
         }
 
+        {
+            let sp = &mut post.data.borrow_mut().sourcepos;
+            // See [`make_inline`].
+            sp.start = (
+                self.line,
+                (startpos as isize - need_reverse as isize
+                    + 1
+                    + self.column_offset
+                    + self.line_offset as isize) as usize,
+            )
+                .into();
+            sp.end = (
+                self.line,
+                (self.pos as isize + self.column_offset + self.line_offset as isize) as usize,
+            )
+                .into();
+        }
+
         Some(post)
     }
 
diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs
index 4811f4f2..896df75f 100644
--- a/src/tests/sourcepos.rs
+++ b/src/tests/sourcepos.rs
@@ -305,12 +305,16 @@ const LINK: TestCase = (
         sourcepos!((3:7-3:11)),
         sourcepos!((4:7-4:16)),
         sourcepos!((5:7-5:29)),
+        sourcepos!((6:7-6:21)),
+        sourcepos!((7:7-7:21)),
     ],
     r#"hello <https://example.com/fooo> world
 hello [foo](https://example.com) world
 hello [foo] world
 hello [bar][bar] world
 hello https://example.com/foo world
+hello www.example.com world
+hello foo@example.com world
 
 [foo]: https://example.com
 [bar]: https://example.com"#,

From 19bcb5eea694da49554f07a0caf10c003a3cc031 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 08/30] inlines: more autolink fixes (still no email).

Need to set the inner text sourcepos, as well as propagate any rewind
adjustments to previous nodes' sourcepos.
---
 src/parser/autolink.rs |  3 ++-
 src/parser/inlines.rs  |  8 +++++++-
 src/tests/autolink.rs  | 25 +++++++++++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index 456f9293..f86f6c83 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -7,7 +7,8 @@ use typed_arena::Arena;
 use unicode_categories::UnicodeCategories;
 
 // TODO: this can probably be cleaned up a lot. It used to handle all three of
-// {url,www,email}_match, but now just the last of those.
+// {url,www,email}_match, but now just the last of those. (This is still per
+// upstream cmark-gfm, so it's not easily changed without breaking compat.)
 pub(crate) fn process_autolinks<'a>(
     arena: &'a Arena<AstNode<'a>>,
     node: &'a AstNode<'a>,
diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index e3dc237b..116ccf05 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -1326,10 +1326,12 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         // "wa://…" will need to traverse two Texts to complete the rewind.
         let mut reverse = need_reverse;
         while reverse > 0 {
-            match node.last_child().unwrap().data.borrow_mut().value {
+            let mut last_child = node.last_child().unwrap().data.borrow_mut();
+            match last_child.value {
                 NodeValue::Text(ref mut prev) => {
                     if reverse < prev.len() {
                         prev.truncate(prev.len() - reverse);
+                        last_child.sourcepos.end.column -= reverse;
                         reverse = 0;
                     } else {
                         reverse -= prev.len();
@@ -1356,6 +1358,10 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
                 (self.pos as isize + self.column_offset + self.line_offset as isize) as usize,
             )
                 .into();
+
+            // Inner text node gets the same sp, since there are no surrounding
+            // characters for autolinks of these kind.
+            post.first_child().unwrap().data.borrow_mut().sourcepos = *sp;
         }
 
         Some(post)
diff --git a/src/tests/autolink.rs b/src/tests/autolink.rs
index c4363a14..1e7a89a2 100644
--- a/src/tests/autolink.rs
+++ b/src/tests/autolink.rs
@@ -395,3 +395,28 @@ fn autolink_fuzz_we() {
         no_roundtrip,
     );
 }
+
+#[test]
+fn autolink_sourcepos() {
+    assert_ast_match!(
+        [extension.autolink],
+        "a  www.com\n"
+        "\n"
+        "b  https://www.com\n"
+        ,
+        (document (1:1-3:18) [
+            (paragraph (1:1-1:10) [
+                (text (1:1-1:3) "a  ")
+                (link (1:4-1:10) [
+                    (text (1:4-1:10) "www.com")
+                ])
+            ])
+            (paragraph (3:1-3:18) [
+                (text (3:1-3:3) "b  ")
+                (link (3:4-3:18) [
+                    (text (3:4-3:18) "https://www.com")
+                ])
+            ])
+        ])
+    );
+}

From ffd8159db68b35021fd9ef6f8d314a976ee4d3b4 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 09/30] autolink: fix a bunch of sourcepos!

---
 src/parser/autolink.rs | 258 +++++++++++++++++++++++------------------
 src/parser/mod.rs      |   1 +
 src/tests/autolink.rs  |  35 ++++--
 3 files changed, 166 insertions(+), 128 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index f86f6c83..e9c8369b 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -1,6 +1,6 @@
 use crate::character_set::character_set;
 use crate::ctype::{isalnum, isalpha, isspace};
-use crate::nodes::{AstNode, NodeLink, NodeValue};
+use crate::nodes::{AstNode, NodeLink, NodeValue, Sourcepos};
 use crate::parser::inlines::make_inline;
 use std::str;
 use typed_arena::Arena;
@@ -14,6 +14,7 @@ pub(crate) fn process_autolinks<'a>(
     node: &'a AstNode<'a>,
     contents_str: &mut String,
     relaxed_autolinks: bool,
+    sourcepos: &mut Sourcepos,
 ) {
     let contents = contents_str.as_bytes();
     let len = contents.len();
@@ -60,15 +61,154 @@ pub(crate) fn process_autolinks<'a>(
                 post.insert_after(make_inline(
                     arena,
                     NodeValue::Text(remain.to_string()),
-                    (0, 1, 0, 1).into(),
+                    (
+                        sourcepos.end.line,
+                        sourcepos.end.column + 1 - (len - i - skip),
+                        sourcepos.end.line,
+                        sourcepos.end.column,
+                    )
+                        .into(),
                 ));
             }
+
+            // Enormous HACK. This compensates for our lack of accounting for
+            // smart puncutation in sourcepos in exactly one extant regression
+            // test (tests::fuzz::trailing_hyphen_matches), but it is no
+            // substitute, probably fails miserably in other contexts, and we
+            // really instead just need an actual solution to that issue.
+            sourcepos.end.column -= str::from_utf8(&contents[i..]).unwrap().chars().count();
+
             contents_str.truncate(i);
+            let nsp: Sourcepos = (
+                sourcepos.end.line,
+                sourcepos.end.column + 1,
+                sourcepos.end.line,
+                sourcepos.end.column + skip,
+            )
+                .into();
+            post.data.borrow_mut().sourcepos = nsp;
+            // Inner text gets same sourcepos as link, since there's nothing but
+            // the text.
+            post.first_child().unwrap().data.borrow_mut().sourcepos = nsp;
             return;
         }
     }
 }
 
+fn email_match<'a>(
+    arena: &'a Arena<AstNode<'a>>,
+    contents: &[u8],
+    i: usize,
+    relaxed_autolinks: bool,
+) -> Option<(&'a AstNode<'a>, usize, usize)> {
+    const EMAIL_OK_SET: [bool; 256] = character_set!(b".+-_");
+
+    let size = contents.len();
+
+    let mut auto_mailto = true;
+    let mut is_xmpp = false;
+    let mut rewind = 0;
+
+    while rewind < i {
+        let c = contents[i - rewind - 1];
+
+        if isalnum(c) || EMAIL_OK_SET[c as usize] {
+            rewind += 1;
+            continue;
+        }
+
+        if c == b':' {
+            if validate_protocol("mailto", contents, i - rewind - 1) {
+                auto_mailto = false;
+                rewind += 1;
+                continue;
+            }
+
+            if validate_protocol("xmpp", contents, i - rewind - 1) {
+                is_xmpp = true;
+                auto_mailto = false;
+                rewind += 1;
+                continue;
+            }
+        }
+
+        break;
+    }
+
+    if rewind == 0 {
+        return None;
+    }
+
+    let mut link_end = 1;
+    let mut np = 0;
+
+    while link_end < size - i {
+        let c = contents[i + link_end];
+
+        if isalnum(c) {
+            // empty
+        } else if c == b'@' {
+            return None;
+        } else if c == b'.' && link_end < size - i - 1 && isalnum(contents[i + link_end + 1]) {
+            np += 1;
+        } else if c == b'/' && is_xmpp {
+            // xmpp allows a `/` in the url
+        } else if c != b'-' && c != b'_' {
+            break;
+        }
+
+        link_end += 1;
+    }
+
+    if link_end < 2
+        || np == 0
+        || (!isalpha(contents[i + link_end - 1]) && contents[i + link_end - 1] != b'.')
+    {
+        return None;
+    }
+
+    link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks);
+    if link_end == 0 {
+        return None;
+    }
+
+    let mut url = if auto_mailto {
+        "mailto:".to_string()
+    } else {
+        "".to_string()
+    };
+    let text = str::from_utf8(&contents[i - rewind..link_end + i]).unwrap();
+    url.push_str(text);
+
+    let inl = make_inline(
+        arena,
+        NodeValue::Link(NodeLink {
+            url,
+            title: String::new(),
+        }),
+        (0, 1, 0, 1).into(),
+    );
+
+    inl.append(make_inline(
+        arena,
+        NodeValue::Text(text.to_string()),
+        (0, 1, 0, 1).into(),
+    ));
+    Some((inl, rewind, rewind + link_end))
+}
+
+fn validate_protocol(protocol: &str, contents: &[u8], cursor: usize) -> bool {
+    let size = contents.len();
+    let mut rewind = 0;
+
+    while rewind < cursor && isalpha(contents[cursor - rewind - 1]) {
+        rewind += 1;
+    }
+
+    size - cursor + rewind >= protocol.len()
+        && &contents[cursor - rewind..cursor] == protocol.as_bytes()
+}
+
 pub fn www_match<'a>(
     arena: &'a Arena<AstNode<'a>>,
     contents: &[u8],
@@ -293,117 +433,3 @@ pub fn url_match<'a>(
     ));
     Some((inl, rewind, rewind + link_end))
 }
-
-fn email_match<'a>(
-    arena: &'a Arena<AstNode<'a>>,
-    contents: &[u8],
-    i: usize,
-    relaxed_autolinks: bool,
-) -> Option<(&'a AstNode<'a>, usize, usize)> {
-    const EMAIL_OK_SET: [bool; 256] = character_set!(b".+-_");
-
-    let size = contents.len();
-
-    let mut auto_mailto = true;
-    let mut is_xmpp = false;
-    let mut rewind = 0;
-
-    while rewind < i {
-        let c = contents[i - rewind - 1];
-
-        if isalnum(c) || EMAIL_OK_SET[c as usize] {
-            rewind += 1;
-            continue;
-        }
-
-        if c == b':' {
-            if validate_protocol("mailto", contents, i - rewind - 1) {
-                auto_mailto = false;
-                rewind += 1;
-                continue;
-            }
-
-            if validate_protocol("xmpp", contents, i - rewind - 1) {
-                is_xmpp = true;
-                auto_mailto = false;
-                rewind += 1;
-                continue;
-            }
-        }
-
-        break;
-    }
-
-    if rewind == 0 {
-        return None;
-    }
-
-    let mut link_end = 1;
-    let mut np = 0;
-
-    while link_end < size - i {
-        let c = contents[i + link_end];
-
-        if isalnum(c) {
-            // empty
-        } else if c == b'@' {
-            return None;
-        } else if c == b'.' && link_end < size - i - 1 && isalnum(contents[i + link_end + 1]) {
-            np += 1;
-        } else if c == b'/' && is_xmpp {
-            // xmpp allows a `/` in the url
-        } else if c != b'-' && c != b'_' {
-            break;
-        }
-
-        link_end += 1;
-    }
-
-    if link_end < 2
-        || np == 0
-        || (!isalpha(contents[i + link_end - 1]) && contents[i + link_end - 1] != b'.')
-    {
-        return None;
-    }
-
-    link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks);
-    if link_end == 0 {
-        return None;
-    }
-
-    let mut url = if auto_mailto {
-        "mailto:".to_string()
-    } else {
-        "".to_string()
-    };
-    let text = str::from_utf8(&contents[i - rewind..link_end + i]).unwrap();
-    url.push_str(text);
-
-    let inl = make_inline(
-        arena,
-        NodeValue::Link(NodeLink {
-            url,
-            title: String::new(),
-        }),
-        (0, 1, 0, 1).into(),
-    );
-
-    inl.append(make_inline(
-        arena,
-        NodeValue::Text(text.to_string()),
-        (0, 1, 0, 1).into(),
-    ));
-    Some((inl, rewind, rewind + link_end))
-}
-
-fn validate_protocol(protocol: &str, contents: &[u8], cursor: usize) -> bool {
-    let size = contents.len();
-    let mut rewind = 0;
-
-    while rewind < cursor && isalpha(contents[cursor - rewind - 1]) {
-        rewind += 1;
-    }
-
-    size - cursor + rewind >= protocol.len()
-        && &contents[cursor - rewind..cursor] == protocol.as_bytes()
-}
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index f2bb7ace..b27654ed 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -3014,6 +3014,7 @@ where
                 node,
                 text,
                 self.options.parse.relaxed_autolinks,
+                sourcepos,
             );
         }
     }
diff --git a/src/tests/autolink.rs b/src/tests/autolink.rs
index 1e7a89a2..36f524b8 100644
--- a/src/tests/autolink.rs
+++ b/src/tests/autolink.rs
@@ -259,9 +259,9 @@ fn sourcepos_correctly_restores_context() {
     // wasn't equal.  That probably wouldn't happen, though -- i.e. we're
     // never autolinking into the middle of a rendered smart punctuation.
     //
-    // For now the desired sourcepos is documented in comment.  What we
-    // have currently (after backing out the adjustments, having hit the
-    // above case) matches cmark-gfm.
+    // The below documents test cases that used to break; I suspect smart
+    // punctuation is still a breaking case however!
+
     assert_ast_match!(
         [],
         "ab _cde_ f@g.ee h*ijklm* n",
@@ -289,11 +289,11 @@ fn sourcepos_correctly_restores_context() {
                 (emph (1:4-1:8) [
                     (text (1:5-1:7) "cde")
                 ])
-                (text (1:9-1:17) " ")             // (text (1:9-1:9) " ")
-                (link (XXX) [                     // (link (1:10-1:15) [
-                    (text (XXX) "f@g.ee")             // (text (1:10-1:15) "f@g.ee")
+                (text (1:9-1:9) " ")
+                (link (1:10-1:15) [
+                    (text (1:10-1:15) "f@g.ee")
                 ])
-                (text (XXX) " h")                 // (text (1:16-1:17) " h")
+                (text (1:16-1:17) " h")
                 (emph (1:18-1:24) [
                     (text (1:19-1:23) "ijklm")
                 ])
@@ -400,22 +400,33 @@ fn autolink_fuzz_we() {
 fn autolink_sourcepos() {
     assert_ast_match!(
         [extension.autolink],
-        "a  www.com\n"
+        "a  www.com  x\n"
+        "\n"
+        "b  https://www.com  y\n"
         "\n"
-        "b  https://www.com\n"
+        "c  foo@www.com  z\n"
         ,
-        (document (1:1-3:18) [
-            (paragraph (1:1-1:10) [
+        (document (1:1-5:17) [
+            (paragraph (1:1-1:13) [
                 (text (1:1-1:3) "a  ")
                 (link (1:4-1:10) [
                     (text (1:4-1:10) "www.com")
                 ])
+                (text (1:11-1:13) "  x")
             ])
-            (paragraph (3:1-3:18) [
+            (paragraph (3:1-3:21) [
                 (text (3:1-3:3) "b  ")
                 (link (3:4-3:18) [
                     (text (3:4-3:18) "https://www.com")
                 ])
+                (text (3:19-3:21) "  y")
+            ])
+            (paragraph (5:1-5:17) [
+                (text (5:1-5:3) "c  ")
+                (link (5:4-5:14) [
+                    (text (5:4-5:14) "foo@www.com")
+                ])
+                (text (5:15-5:17) "  z")
             ])
         ])
     );

From 220a902b673a345646f03c97b69d7b1b5e839772 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 10/30] parser: fix ThematicBreak sourcepos end.

---
 src/parser/mod.rs      | 6 ++++++
 src/tests/sourcepos.rs | 5 ++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index b27654ed..1b724866 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -1851,6 +1851,7 @@ where
         *container = self.add_child(container, NodeValue::ThematicBreak, self.first_nonspace + 1);
 
         let adv = line.len() - 1 - self.offset;
+        container.data.borrow_mut().sourcepos.end = (self.line_number, adv).into();
         self.advance_offset(line, adv, false);
 
         true
@@ -2708,6 +2709,11 @@ where
             _ => false,
         } {
             ast.sourcepos.end = (self.line_number, self.curline_end_col).into();
+        } else if match ast.value {
+            NodeValue::ThematicBreak => true,
+            _ => false,
+        } {
+            // sourcepos.end set during opening.
         } else {
             ast.sourcepos.end = (self.line_number - 1, self.last_line_length).into();
         }
diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs
index 896df75f..76fc0628 100644
--- a/src/tests/sourcepos.rs
+++ b/src/tests/sourcepos.rs
@@ -161,10 +161,10 @@ hello world
 );
 
 const THEMATIC_BREAK: TestCase = (
-    &[sourcepos!((3:1-3:3))],
+    &[sourcepos!((3:2-3:4))],
     r#"Hello
 
----
+ ---
 
 World"#,
 );
@@ -425,7 +425,6 @@ fn node_values() -> HashMap<NodeValueDiscriminants, TestCase> {
                     | DescriptionItem // end is 4:0
                     | DescriptionTerm // end is 3:0
                     | DescriptionDetails // end is 4:0
-                    | ThematicBreak // end is 4:0
                     | Math // is 3:2-3:6 but should be 3:1-3:7
                     | Raw // unparseable
             )

From b92a2ebb55751df11318ab3aa0116bfd2e204411 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Wed, 26 Feb 2025 13:44:24 +1100
Subject: [PATCH 11/30] tests: document list/item sourcepos issues.

This one will be a bit of a pickle, and entails differing from cmark-gfm
(which currently gives these "-4:0" sourcepos ends for the list and last
item on the same test).
---
 src/tests/regressions.rs | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/tests/regressions.rs b/src/tests/regressions.rs
index a2456aac..8d54a217 100644
--- a/src/tests/regressions.rs
+++ b/src/tests/regressions.rs
@@ -146,3 +146,34 @@ fn sourcepos_lone_backtick() {
         ])
     );
 }
+
+#[ignore] // This one will require a bit of thinking.
+#[test]
+fn sourcepos_link_items() {
+    assert_ast_match!(
+        [],
+        "- ab\n"
+        "- cdef\n"
+        "\n"
+        "\n"
+        "g\n"
+        ,
+        (document (1:1-5:1) [
+            (list (1:1-2:6) [
+                (item (1:1-1:4) [
+                    (paragraph (1:3-1:4) [
+                        (text (1:3-1:4) "ab")
+                    ])
+                ])
+                (item (2:1-2:6) [
+                    (paragraph (2:3-2:6) [
+                        (text (2:3-2:6) "cdef")
+                    ])
+                ])
+            ])
+            (paragraph (5:1-5:1) [
+                (text (5:1-5:1) "g")
+            ])
+        ])
+    );
+}

From 30abc1c3b66879326d752457cfa5a5b3ee751755 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 13:06:21 +1100
Subject: [PATCH 12/30] inlines: fix Math sourcepos.

Test some more content literals, too.
---
 src/parser/inlines.rs  | 159 ++++++++++++++++++-----------------------
 src/tests.rs           |  18 +++--
 src/tests/math.rs      |  11 ++-
 src/tests/sourcepos.rs |   1 -
 4 files changed, 86 insertions(+), 103 deletions(-)

diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index 116ccf05..b465100d 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -721,8 +721,8 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn scan_to_closing_dollar(&mut self, opendollarlength: usize) -> Option<usize> {
-        if !(self.options.extension.math_dollars) || opendollarlength > MAX_MATH_DOLLARS {
+    fn scan_to_closing_dollar(&mut self, opendollarlength: usize) -> Option<usize> {
+        if !self.options.extension.math_dollars || opendollarlength > MAX_MATH_DOLLARS {
             return None;
         }
 
@@ -740,17 +740,15 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
                 return None;
             }
 
+            let c = self.input[self.pos - 1];
+
             // space not allowed before ending $
-            if opendollarlength == 1 {
-                let c = self.input[self.pos - 1];
-                if isspace(c) {
-                    return None;
-                }
+            if opendollarlength == 1 && isspace(c) {
+                return None;
             }
 
             // dollar signs must also be backslash-escaped if they occur within math
-            let c = self.input[self.pos - 1];
-            if opendollarlength == 1 && c == (b'\\') {
+            if opendollarlength == 1 && c == b'\\' {
                 self.pos += 1;
                 continue;
             }
@@ -768,10 +766,8 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn scan_to_closing_code_dollar(&mut self) -> Option<usize> {
-        if !self.options.extension.math_code {
-            return None;
-        }
+    fn scan_to_closing_code_dollar(&mut self) -> Option<usize> {
+        assert!(self.options.extension.math_code);
 
         loop {
             while self.peek_char().map_or(false, |&c| c != b'$') {
@@ -783,96 +779,77 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
             }
 
             let c = self.input[self.pos - 1];
+            self.pos += 1;
             if c == b'`' {
-                self.pos += 1;
                 return Some(self.pos);
-            } else {
-                self.pos += 1;
             }
         }
     }
 
     // Heuristics used from https://pandoc.org/MANUAL.html#extension-tex_math_dollars
-    pub fn handle_dollars(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
-        if self.options.extension.math_dollars || self.options.extension.math_code {
-            let opendollars = self.take_while(b'$');
-            let mut code_math = false;
-
-            // check for code math
-            if opendollars == 1
-                && self.options.extension.math_code
-                && self.peek_char().map_or(false, |&c| c == b'`')
-            {
-                code_math = true;
-                self.pos += 1;
-            }
+    fn handle_dollars(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
+        if !(self.options.extension.math_dollars || self.options.extension.math_code) {
+            self.pos += 1;
+            return self.make_inline(NodeValue::Text("$".to_string()), self.pos - 1, self.pos - 1);
+        }
+        let startpos = self.pos;
+        let opendollars = self.take_while(b'$');
+        let mut code_math = false;
 
-            let startpos = self.pos;
-            let endpos: Option<usize> = if code_math {
-                self.scan_to_closing_code_dollar()
-            } else {
-                self.scan_to_closing_dollar(opendollars)
-            };
+        // check for code math
+        if opendollars == 1
+            && self.options.extension.math_code
+            && self.peek_char().map_or(false, |&c| c == b'`')
+        {
+            code_math = true;
+            self.pos += 1;
+        }
+        let fence_length = if code_math { 2 } else { opendollars };
 
-            let fence_length = if code_math { 2 } else { opendollars };
-            let endpos: Option<usize> = match endpos {
-                Some(epos) => {
-                    if epos - startpos + fence_length < fence_length * 2 + 1 {
-                        None
-                    } else {
-                        endpos
-                    }
-                }
-                None => endpos,
-            };
+        let endpos: Option<usize> = if code_math {
+            self.scan_to_closing_code_dollar()
+        } else {
+            self.scan_to_closing_dollar(opendollars)
+        }
+        .filter(|epos| epos - startpos >= fence_length * 2 + 1);
 
-            match endpos {
-                None => {
-                    if code_math {
-                        self.pos = startpos - 1;
-                        self.make_inline(
-                            NodeValue::Text("$".to_string()),
-                            self.pos - 1,
-                            self.pos - 1,
-                        )
-                    } else {
-                        self.pos = startpos;
-                        self.make_inline(
-                            NodeValue::Text("$".repeat(opendollars)),
-                            self.pos,
-                            self.pos,
-                        )
-                    }
-                }
-                Some(endpos) => {
-                    let buf = &self.input[startpos..endpos - fence_length];
-                    let buf: Vec<u8> = if code_math || opendollars == 1 {
-                        strings::normalize_code(buf)
-                    } else {
-                        buf.to_vec()
-                    };
-                    let math = NodeMath {
-                        dollar_math: !code_math,
-                        display_math: opendollars == 2,
-                        literal: String::from_utf8(buf).unwrap(),
-                    };
-                    let node = self.make_inline(
-                        NodeValue::Math(math),
-                        startpos,
-                        endpos - fence_length - 1,
-                    );
-                    self.adjust_node_newlines(
-                        node,
-                        endpos - startpos,
-                        fence_length,
-                        parent_line_offsets,
-                    );
-                    node
+        eprintln!("** startpos is {:?}, endpos is {:?}", startpos, endpos);
+
+        match endpos {
+            None => {
+                if code_math {
+                    self.pos = startpos + 1;
+                    self.make_inline(NodeValue::Text("$".to_string()), self.pos - 1, self.pos - 1)
+                } else {
+                    self.pos = startpos + fence_length;
+                    self.make_inline(
+                        NodeValue::Text("$".repeat(opendollars)),
+                        self.pos - fence_length,
+                        self.pos - 1,
+                    )
                 }
             }
-        } else {
-            self.pos += 1;
-            self.make_inline(NodeValue::Text("$".to_string()), self.pos - 1, self.pos - 1)
+            Some(endpos) => {
+                let buf = &self.input[startpos + fence_length..endpos - fence_length];
+                let buf: Vec<u8> = if code_math || opendollars == 1 {
+                    strings::normalize_code(buf)
+                } else {
+                    buf.to_vec()
+                };
+                let math = NodeMath {
+                    dollar_math: !code_math,
+                    display_math: opendollars == 2,
+                    literal: String::from_utf8(buf).unwrap(),
+                };
+                let node = self.make_inline(NodeValue::Math(math), startpos, endpos - 1);
+                self.adjust_node_newlines(
+                    node,
+                    endpos - startpos - fence_length,
+                    fence_length,
+                    parent_line_offsets,
+                );
+                node
+            }
         }
     }
 
diff --git a/src/tests.rs b/src/tests.rs
index ea66f974..0c4cd950 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -387,11 +387,19 @@ impl AstMatchTree {
                     node.children().count(),
                     "text node should have no children"
                 );
-                assert_eq!(
-                    text,
-                    ast.value.text().unwrap(),
-                    "text node content should match"
-                );
+                match ast.value {
+                    NodeValue::Math(ref nm) => {
+                        assert_eq!(text, &nm.literal, "math node content should match");
+                    }
+                    NodeValue::CodeBlock(ref ncb) => {
+                        assert_eq!(text, &ncb.literal, "code block node content should match")
+                    }
+                    _ => assert_eq!(
+                        text,
+                        ast.value.text().unwrap(),
+                        "text node content should match"
+                    ),
+                }
             }
             AstMatchContent::Children(children) => {
                 assert_eq!(
diff --git a/src/tests/math.rs b/src/tests/math.rs
index 5920c50d..b0a0b54d 100644
--- a/src/tests/math.rs
+++ b/src/tests/math.rs
@@ -134,7 +134,6 @@ fn math_unrecognized_syntax_non_roundtrip(markdown: &str, html: &str) {
 }
 
 #[test]
-#[ignore]
 fn sourcepos() {
     assert_ast_match!(
         [extension.math_dollars, extension.math_code],
@@ -149,16 +148,16 @@ fn sourcepos() {
         "```\n",
         (document (1:1-9:3) [
             (paragraph (1:1-1:29) [
-                (math (1:1-1:5))
+                (math (1:1-1:5) "x^2")
                 (text (1:6-1:10) " and ")
-                (math (1:11-1:17))
+                (math (1:11-1:17) "y^2")
                 (text (1:18-1:22) " and ")
-                (math (1:23-1:29))
+                (math (1:23-1:29) "z^2")
             ])
             (paragraph (3:1-5:2) [
-                (math (3:1-5:2))
+                (math (3:1-5:2) "\na^2\n")
             ])
-            (code_block (7:1-9:3))
+            (code_block (7:1-9:3) "b^2\n")
         ])
     );
 }
diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs
index 76fc0628..19aeb619 100644
--- a/src/tests/sourcepos.rs
+++ b/src/tests/sourcepos.rs
@@ -425,7 +425,6 @@ fn node_values() -> HashMap<NodeValueDiscriminants, TestCase> {
                     | DescriptionItem // end is 4:0
                     | DescriptionTerm // end is 3:0
                     | DescriptionDetails // end is 4:0
-                    | Math // is 3:2-3:6 but should be 3:1-3:7
                     | Raw // unparseable
             )
         })

From 31b07ffcf1c9b8477c2260d6f1847716cdcdcc83 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 13:06:21 +1100
Subject: [PATCH 13/30] inlines: mark some funs as not pub.

---
 src/parser/inlines.rs | 54 +++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index b465100d..25df673c 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -566,7 +566,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
     }
 
     #[inline]
-    pub fn eof(&self) -> bool {
+    fn eof(&self) -> bool {
         self.pos >= self.input.len()
     }
 
@@ -586,7 +586,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn find_special_char(&self) -> usize {
+    fn find_special_char(&self) -> usize {
         for n in self.pos..self.input.len() {
             if self.special_chars[self.input[n] as usize] {
                 if self.input[n] == b'^' && self.within_brackets {
@@ -624,7 +624,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn handle_newline(&mut self) -> &'a AstNode<'a> {
+    fn handle_newline(&mut self) -> &'a AstNode<'a> {
         let nlpos = self.pos;
         if self.input[self.pos] == b'\r' {
             self.pos += 1;
@@ -643,7 +643,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         inl
     }
 
-    pub fn take_while(&mut self, c: u8) -> usize {
+    fn take_while(&mut self, c: u8) -> usize {
         let start_pos = self.pos;
         while self.peek_char() == Some(&c) {
             self.pos += 1;
@@ -651,7 +651,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         self.pos - start_pos
     }
 
-    pub fn take_while_with_limit(&mut self, c: u8, limit: usize) -> usize {
+    fn take_while_with_limit(&mut self, c: u8, limit: usize) -> usize {
         let start_pos = self.pos;
         let mut count = 0;
         while count < limit && self.peek_char() == Some(&c) {
@@ -661,7 +661,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         self.pos - start_pos
     }
 
-    pub fn scan_to_closing_backtick(&mut self, openticklength: usize) -> Option<usize> {
+    fn scan_to_closing_backtick(&mut self, openticklength: usize) -> Option<usize> {
         if openticklength > MAXBACKTICKS {
             return None;
         }
@@ -688,7 +688,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn handle_backticks(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
+    fn handle_backticks(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
         let startpos = self.pos;
         let openticks = self.take_while(b'`');
         let endpos = self.scan_to_closing_backtick(openticks);
@@ -813,8 +813,6 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
         .filter(|epos| epos - startpos >= fence_length * 2 + 1);
 
-        eprintln!("** startpos is {:?}, endpos is {:?}", startpos, endpos);
-
         match endpos {
             None => {
                 if code_math {
@@ -862,7 +860,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         skipped
     }
 
-    pub fn handle_delim(&mut self, c: u8) -> &'a AstNode<'a> {
+    fn handle_delim(&mut self, c: u8) -> &'a AstNode<'a> {
         let (numdelims, can_open, can_close) = self.scan_delims(c);
 
         let contents = if c == b'\'' && self.options.parse.smart {
@@ -891,7 +889,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         inl
     }
 
-    pub fn handle_hyphen(&mut self) -> &'a AstNode<'a> {
+    fn handle_hyphen(&mut self) -> &'a AstNode<'a> {
         let start = self.pos;
         self.pos += 1;
 
@@ -924,7 +922,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         self.make_inline(NodeValue::Text(buf), start, self.pos - 1)
     }
 
-    pub fn handle_period(&mut self) -> &'a AstNode<'a> {
+    fn handle_period(&mut self) -> &'a AstNode<'a> {
         self.pos += 1;
         if self.options.parse.smart && self.peek_char().map_or(false, |&c| c == b'.') {
             self.pos += 1;
@@ -943,7 +941,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn scan_delims(&mut self, c: u8) -> (usize, bool, bool) {
+    fn scan_delims(&mut self, c: u8) -> (usize, bool, bool) {
         let before_char = if self.pos == 0 {
             '\n'
         } else {
@@ -1037,7 +1035,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn push_delimiter(&mut self, c: u8, can_open: bool, can_close: bool, inl: &'a AstNode<'a>) {
+    fn push_delimiter(&mut self, c: u8, can_open: bool, can_close: bool, inl: &'a AstNode<'a>) {
         let d = self.delimiter_arena.alloc(Delimiter {
             prev: Cell::new(self.last_delimiter),
             next: Cell::new(None),
@@ -1059,7 +1057,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
     //
     // As a side-effect, handle long "***" and "___" nodes by truncating them in
     // place to be re-matched by `process_emphasis`.
-    pub fn insert_emph(
+    fn insert_emph(
         &mut self,
         opener: &'d Delimiter<'a, 'd>,
         closer: &'d Delimiter<'a, 'd>,
@@ -1188,7 +1186,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn handle_backslash(&mut self) -> &'a AstNode<'a> {
+    fn handle_backslash(&mut self) -> &'a AstNode<'a> {
         let startpos = self.pos;
         self.pos += 1;
 
@@ -1231,7 +1229,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         self.pos > old_pos || self.eof()
     }
 
-    pub fn handle_entity(&mut self) -> &'a AstNode<'a> {
+    fn handle_entity(&mut self) -> &'a AstNode<'a> {
         self.pos += 1;
 
         match entity::unescape(&self.input[self.pos..]) {
@@ -1248,7 +1246,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
     }
 
     #[cfg(feature = "shortcodes")]
-    pub fn handle_shortcodes_colon(&mut self) -> Option<&'a AstNode<'a>> {
+    fn handle_shortcodes_colon(&mut self) -> Option<&'a AstNode<'a>> {
         let matchlen = scanners::shortcode(&self.input[self.pos + 1..])?;
 
         let shortcode = unsafe {
@@ -1265,11 +1263,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         ))
     }
 
-    pub fn handle_autolink_with<F>(
-        &mut self,
-        node: &'a AstNode<'a>,
-        f: F,
-    ) -> Option<&'a AstNode<'a>>
+    fn handle_autolink_with<F>(&mut self, node: &'a AstNode<'a>, f: F) -> Option<&'a AstNode<'a>>
     where
         F: Fn(
             &'a Arena<AstNode<'a>>,
@@ -1344,15 +1338,15 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         Some(post)
     }
 
-    pub fn handle_autolink_colon(&mut self, node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
+    fn handle_autolink_colon(&mut self, node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
         self.handle_autolink_with(node, autolink::url_match)
     }
 
-    pub fn handle_autolink_w(&mut self, node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
+    fn handle_autolink_w(&mut self, node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
         self.handle_autolink_with(node, autolink::www_match)
     }
 
-    pub fn handle_pointy_brace(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
+    fn handle_pointy_brace(&mut self, parent_line_offsets: &[usize]) -> &'a AstNode<'a> {
         self.pos += 1;
 
         if let Some(matchlen) = scanners::autolink_uri(&self.input[self.pos..]) {
@@ -1453,7 +1447,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         self.make_inline(NodeValue::Text("<".to_string()), self.pos - 1, self.pos - 1)
     }
 
-    pub fn push_bracket(&mut self, image: bool, inl_text: &'a AstNode<'a>) {
+    fn push_bracket(&mut self, image: bool, inl_text: &'a AstNode<'a>) {
         let len = self.brackets.len();
         if len > 0 {
             self.brackets[len - 1].bracket_after = true;
@@ -1469,7 +1463,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
     }
 
-    pub fn handle_close_bracket(&mut self) -> Option<&'a AstNode<'a>> {
+    fn handle_close_bracket(&mut self) -> Option<&'a AstNode<'a>> {
         self.pos += 1;
         let initial_pos = self.pos;
 
@@ -1690,7 +1684,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         Some(self.make_inline(NodeValue::Text("]".to_string()), self.pos - 1, self.pos - 1))
     }
 
-    pub fn close_bracket_match(&mut self, is_image: bool, url: String, title: String) {
+    fn close_bracket_match(&mut self, is_image: bool, url: String, title: String) {
         let brackets_len = self.brackets.len();
 
         let nl = NodeLink { url, title };
@@ -1771,7 +1765,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
     // Handles wikilink syntax
     //   [[link text|url]]
     //   [[url|link text]]
-    pub fn handle_wikilink(&mut self) -> Option<&'a AstNode<'a>> {
+    fn handle_wikilink(&mut self) -> Option<&'a AstNode<'a>> {
         let startpos = self.pos;
         let component = self.wikilink_url_link_label()?;
         let url_clean = strings::clean_url(component.url);

From ee42217a7d9cf6ac989c542331936e8d18cc10f5 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 14:03:20 +1100
Subject: [PATCH 14/30] tests: elaborate on a bunch of AST tests.

Also test text content of links, images, frontmatter, and support
testing both text and children (since many nodes have both).
---
 src/parser/autolink.rs    | 27 +++++++----
 src/tests.rs              | 94 ++++++++++++++++++++++++++-------------
 src/tests/autolink.rs     |  8 ++--
 src/tests/core.rs         | 16 +++----
 src/tests/front_matter.rs | 78 +++++++++++++++-----------------
 src/tests/fuzz.rs         | 29 +++++++-----
 6 files changed, 149 insertions(+), 103 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index e9c8369b..b9bdd346 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -55,7 +55,23 @@ pub(crate) fn process_autolinks<'a>(
         if let Some((post, reverse, skip)) = post_org {
             i -= reverse;
             node.insert_after(post);
-            if i + skip < len {
+
+            // Enormous HACK. This compensates for our lack of accounting for
+            // smart puncutation in sourcepos in exactly one extant regression
+            // test (tests::fuzz::trailing_hyphen_matches) and one fuzzer
+            // crash observed so far, but it is no substitute, probably fails
+            // miserably in other contexts, and we really instead just need an
+            // actual solution to that issue.
+            //
+            // Problems as follows:
+            // * We almost certainly need to measure `skip` the same way.
+            // * We're counting extremely arbitrary Unicode characters, which
+            //   don't _necessarily_ map 1:1 with the transformation done by
+            //   smart punctuation.
+            // * I've briefly forgotten _how_ this actually fixes anything.
+            let len_less_i = str::from_utf8(&contents[i..]).unwrap().chars().count();
+
+            if skip < len_less_i {
                 let remain = str::from_utf8(&contents[i + skip..]).unwrap();
                 assert!(!remain.is_empty());
                 post.insert_after(make_inline(
@@ -63,7 +79,7 @@ pub(crate) fn process_autolinks<'a>(
                     NodeValue::Text(remain.to_string()),
                     (
                         sourcepos.end.line,
-                        sourcepos.end.column + 1 - (len - i - skip),
+                        sourcepos.end.column + 1 - (len_less_i - skip),
                         sourcepos.end.line,
                         sourcepos.end.column,
                     )
@@ -71,12 +87,7 @@ pub(crate) fn process_autolinks<'a>(
                 ));
             }
 
-            // Enormous HACK. This compensates for our lack of accounting for
-            // smart puncutation in sourcepos in exactly one extant regression
-            // test (tests::fuzz::trailing_hyphen_matches), but it is no
-            // substitute, probably fails miserably in other contexts, and we
-            // really instead just need an actual solution to that issue.
-            sourcepos.end.column -= str::from_utf8(&contents[i..]).unwrap().chars().count();
+            sourcepos.end.column -= len_less_i;
 
             contents_str.truncate(i);
             let nsp: Sourcepos = (
diff --git a/src/tests.rs b/src/tests.rs
index 0c4cd950..2822e80d 100644
--- a/src/tests.rs
+++ b/src/tests.rs
@@ -289,17 +289,16 @@ macro_rules! sourcepos {
 pub(crate) use sourcepos;
 
 macro_rules! ast {
-    (($name:tt $sp:tt)) => {
-        ast!(($name $sp []))
-    };
-    (($name:tt $sp:tt $content:tt)) => {
+    (($name:tt $sp:tt $( $content:tt )*)) => {
         AstMatchTree {
             name: stringify!($name).to_string(),
             sourcepos: sourcepos!($sp),
-            content: ast!($content),
+            matches: vec![ $( ast_content!($content), )* ],
         }
     };
+}
 
+macro_rules! ast_content {
     ($text:literal) => {AstMatchContent::Text($text.to_string())};
     ([ $( $children:tt )* ]) => {
         AstMatchContent::Children(vec![ $( ast!($children), )* ])
@@ -307,6 +306,7 @@ macro_rules! ast {
 }
 
 pub(crate) use ast;
+pub(crate) use ast_content;
 
 #[track_caller]
 fn assert_ast_match_i<F>(md: &str, amt: AstMatchTree, opts: F)
@@ -365,7 +365,7 @@ pub(crate) use assert_ast_match;
 struct AstMatchTree {
     name: String,
     sourcepos: Sourcepos,
-    content: AstMatchContent,
+    matches: Vec<AstMatchContent>,
 }
 
 enum AstMatchContent {
@@ -380,37 +380,71 @@ impl AstMatchTree {
         assert_eq!(self.name, ast.value.xml_node_name(), "node type matches");
         assert_eq!(self.sourcepos, ast.sourcepos, "sourcepos are equal");
 
-        match &self.content {
-            AstMatchContent::Text(text) => {
-                assert_eq!(
-                    0,
-                    node.children().count(),
-                    "text node should have no children"
-                );
-                match ast.value {
+        let mut asserted_text = false;
+        let mut asserted_children = false;
+
+        for m in &self.matches {
+            match m {
+                AstMatchContent::Text(text) => match ast.value {
                     NodeValue::Math(ref nm) => {
-                        assert_eq!(text, &nm.literal, "math node content should match");
+                        assert_eq!(text, &nm.literal, "Math literal should match");
+                        asserted_text = true;
                     }
                     NodeValue::CodeBlock(ref ncb) => {
-                        assert_eq!(text, &ncb.literal, "code block node content should match")
+                        assert_eq!(text, &ncb.literal, "CodeBlock literal should match");
+                        asserted_text = true;
+                    }
+                    NodeValue::Text(ref nt) => {
+                        assert_eq!(text, nt, "Text content should match");
+                        asserted_text = true;
+                    }
+                    NodeValue::Link(ref nl) => {
+                        assert_eq!(text, &nl.url, "Link destination should match");
+                        asserted_text = true;
+                    }
+                    NodeValue::Image(ref ni) => {
+                        assert_eq!(text, &ni.url, "Image source should match");
+                        asserted_text = true;
+                    }
+                    NodeValue::FrontMatter(ref nfm) => {
+                        assert_eq!(text, nfm, "Front matter content should match");
+                        asserted_text = true;
                     }
-                    _ => assert_eq!(
-                        text,
-                        ast.value.text().unwrap(),
-                        "text node content should match"
+                    _ => panic!(
+                        "no text content matcher for this node type: {:?}",
+                        ast.value
                     ),
-                }
-            }
-            AstMatchContent::Children(children) => {
-                assert_eq!(
-                    children.len(),
-                    node.children().count(),
-                    "children count should match"
-                );
-                for (e, a) in children.iter().zip(node.children()) {
-                    e.assert_match(a);
+                },
+                AstMatchContent::Children(children) => {
+                    assert_eq!(
+                        children.len(),
+                        node.children().count(),
+                        "children count should match"
+                    );
+                    for (e, a) in children.iter().zip(node.children()) {
+                        e.assert_match(a);
+                    }
+                    asserted_children = true;
                 }
             }
         }
+
+        assert!(
+            asserted_children || node.children().count() == 0,
+            "children were not asserted"
+        );
+        assert!(
+            asserted_text
+                || !matches!(
+                    ast.value,
+                    NodeValue::Math(_)
+                        | NodeValue::CodeBlock(_)
+                        | NodeValue::Text(_)
+                        | NodeValue::Link(_)
+                        | NodeValue::Image(_)
+                        | NodeValue::FrontMatter(_)
+                ),
+            "text wasn't asserted"
+        );
     }
 }
diff --git a/src/tests/autolink.rs b/src/tests/autolink.rs
index 36f524b8..9fb766cd 100644
--- a/src/tests/autolink.rs
+++ b/src/tests/autolink.rs
@@ -290,7 +290,7 @@ fn sourcepos_correctly_restores_context() {
                     (text (1:5-1:7) "cde")
                 ])
                 (text (1:9-1:9) " ")
-                (link (1:10-1:15) [
+                (link (1:10-1:15) "mailto:f@g.ee" [
                     (text (1:10-1:15) "f@g.ee")
                 ])
                 (text (1:16-1:17) " h")
@@ -409,21 +409,21 @@ fn autolink_sourcepos() {
         (document (1:1-5:17) [
             (paragraph (1:1-1:13) [
                 (text (1:1-1:3) "a  ")
-                (link (1:4-1:10) [
+                (link (1:4-1:10) "http://www.com" [
                     (text (1:4-1:10) "www.com")
                 ])
                 (text (1:11-1:13) "  x")
             ])
             (paragraph (3:1-3:21) [
                 (text (3:1-3:3) "b  ")
-                (link (3:4-3:18) [
+                (link (3:4-3:18) "https://www.com" [
                     (text (3:4-3:18) "https://www.com")
                 ])
                 (text (3:19-3:21) "  y")
             ])
             (paragraph (5:1-5:17) [
                 (text (5:1-5:3) "c  ")
-                (link (5:4-5:14) [
+                (link (5:4-5:14) "mailto:foo@www.com" [
                     (text (5:4-5:14) "foo@www.com")
                 ])
                 (text (5:15-5:17) "  z")
diff --git a/src/tests/core.rs b/src/tests/core.rs
index 714ced6b..d8df80f9 100644
--- a/src/tests/core.rs
+++ b/src/tests/core.rs
@@ -523,7 +523,7 @@ fn link_sourcepos_baseline() {
         "[ABCD](/)\n",
         (document (1:1-1:9) [
             (paragraph (1:1-1:9) [
-                (link (1:1-1:9) [
+                (link (1:1-1:9) "/" [
                     (text (1:2-1:5) "ABCD")
                 ])
             ])
@@ -539,7 +539,7 @@ fn link_sourcepos_newline() {
         "[AB\nCD](/)\n",
         (document (1:1-2:6) [
             (paragraph (1:1-2:6) [
-                (link (1:1-2:6) [
+                (link (1:1-2:6) "/" [
                     (text (1:2-1:3) "AB")
                     (softbreak (1:4-1:4))
                     (text (2:1-2:2) "CD")
@@ -560,8 +560,8 @@ fn link_sourcepos_truffle() {
                     (paragraph (1:3-2:18) [
                         (text (1:3-1:3) "A")
                         (softbreak (1:4-1:4))
-                        (link (2:1-2:18) [
-                            (image (2:2-2:13) [
+                        (link (2:1-2:18) "/B" [
+                            (image (2:2-2:13) "/B.png" [
                                 (text (2:4-2:4) "B")
                             ])
                         ])
@@ -583,8 +583,8 @@ fn link_sourcepos_truffle_twist() {
                     (paragraph (1:3-2:20) [
                         (text (1:3-1:3) "A")
                         (softbreak (1:4-1:4))
-                        (link (2:3-2:20) [
-                            (image (2:4-2:15) [
+                        (link (2:3-2:20) "/B" [
+                            (image (2:4-2:15) "/B.png" [
                                 (text (2:6-2:6) "B")
                             ])
                         ])
@@ -606,8 +606,8 @@ fn link_sourcepos_truffle_bergamot() {
                     (paragraph (1:3-2:21) [
                         (text (1:3-1:3) "A")
                         (softbreak (1:4-1:4))
-                        (link (2:4-2:21) [
-                            (image (2:5-2:16) [
+                        (link (2:4-2:21) "/B" [
+                            (image (2:5-2:16) "/B.png" [
                                 (text (2:7-2:7) "B")
                             ])
                         ])
diff --git a/src/tests/front_matter.rs b/src/tests/front_matter.rs
index 60580eb5..548f7ab2 100644
--- a/src/tests/front_matter.rs
+++ b/src/tests/front_matter.rs
@@ -26,72 +26,64 @@ fn round_trip_wide_delimiter() {
     assert_eq!(&String::from_utf8(buf).unwrap(), input);
 }
 
-#[test]
-fn ast_wide_delimiter() {
-    let input = "\u{04fc}\nlayout: post\n\u{04fc}\nText\n";
-
-    assert_ast_match_i(
-        input,
-        ast!((document (1:1-4:4) [
-            (frontmatter (1:1-3:2) [])
-            (paragraph (4:1-4:4) [
-                (text (4:1-4:4) [])
-            ])
-        ])),
-        |opts| opts.extension.front_matter_delimiter = Some("\u{04fc}".to_owned()),
-    );
-}
-
 #[test]
 fn ast() {
-    let input = "q\nlayout: post\nq\nText\n";
-
-    assert_ast_match_i(
-        input,
-        ast!((document (1:1-4:4) [
-            (frontmatter (1:1-3:1) [])
+    assert_ast_match!(
+        [extension.front_matter_delimiter = Some("q".to_owned())],
+        "q\nlayout: post\nq\nText\n",
+        (document (1:1-4:4) [
+            (frontmatter (1:1-3:1) "q\nlayout: post\nq\n")
             (paragraph (4:1-4:4) [
-                (text (4:1-4:4) [])
+                (text (4:1-4:4) "Text")
             ])
-        ])),
-        |opts| opts.extension.front_matter_delimiter = Some("q".to_owned()),
+        ])
     );
 }
 
 #[test]
 fn ast_blank_line() {
-    let input = r#"---
+    assert_ast_match!(
+        [extension.front_matter_delimiter = Some("---".to_owned())],
+        r#"---
 a: b
 ---
 
 hello world
-"#;
-
-    assert_ast_match_i(
-        input,
-        ast!((document (1:1-5:11) [
-            (frontmatter (1:1-3:3) [])
+"#,
+        (document (1:1-5:11) [
+            (frontmatter (1:1-3:3) "---\na: b\n---\n\n")
             (paragraph (5:1-5:11) [
-                (text (5:1-5:11) [])
+                (text (5:1-5:11) "hello world")
             ])
-        ])),
-        |opts| opts.extension.front_matter_delimiter = Some("---".to_owned()),
+        ])
     );
 }
 
 #[test]
 fn ast_carriage_return() {
-    let input = "q\r\nlayout: post\r\nq\r\nText\r\n";
+    assert_ast_match!(
+        [extension.front_matter_delimiter = Some("q".to_owned())],
+        "q\r\nlayout: post\r\nq\r\nText\r\n",
+        (document (1:1-4:4) [
+            (frontmatter (1:1-3:1) "q\r\nlayout: post\r\nq\r\n")
+            (paragraph (4:1-4:4) [
+                (text (4:1-4:4) "Text")
+            ])
+        ])
+    );
+}
 
-    assert_ast_match_i(
-        input,
-        ast!((document (1:1-4:4) [
-            (frontmatter (1:1-3:1) [])
+#[test]
+fn ast_wide_delimiter() {
+    assert_ast_match!(
+        [extension.front_matter_delimiter = Some("\u{04fc}".to_owned())],
+        "\u{04fc}\nlayout: post\n\u{04fc}\nText\n",
+        (document (1:1-4:4) [
+            (frontmatter (1:1-3:2) "\u{04fc}\nlayout: post\n\u{04fc}\n")
             (paragraph (4:1-4:4) [
-                (text (4:1-4:4) [])
+                (text (4:1-4:4) "Text")
             ])
-        ])),
-        |opts| opts.extension.front_matter_delimiter = Some("q".to_owned()),
+        ])
     );
 }
 
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 5be56726..bc7a58a6 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -1,4 +1,4 @@
-use super::{html, html_opts};
+use super::*;
 
 #[test]
 fn pointy_brace_open() {
@@ -72,21 +72,30 @@ fn bracket_match() {
 
 #[test]
 fn trailing_hyphen() {
-    html_opts!(
-        [extension.autolink, parse.smart, render.sourcepos],
+    assert_ast_match!(
+        [extension.autolink, parse.smart],
         "3@.l-",
-        "<p data-sourcepos=\"1:1-1:5\">3@.l-</p>\n"
+        (document (1:1-1:5) [
+            (paragraph (1:1-1:5) [
+                (text (1:1-1:5) "3@.l-")
+            ])
+        ])
     );
 }
 
+#[ignore]
 #[test]
 fn trailing_hyphen_matches() {
-    html_opts!(
-        [extension.autolink, parse.smart, render.sourcepos],
+    assert_ast_match!(
+        [extension.autolink, parse.smart],
         "3@.l--",
-        "<p data-sourcepos=\"1:1-1:6\"><a href=\"mailto:3@.l\">3@.l</a>–</p>\n",
-        no_roundtrip // We serialise the link back to <3@.l>, which doesn't
-                     // parse as a classic autolink, but the email inside the
-                     // <...> does, meaning the </> get rendered!
+         (document (1:1-1:6) [
+             (paragraph (1:1-1:6) [
+                 (link (1:1-1:4) [
+                     (text (1:1-1:4) "3@.l")
+                 ])
+                 (text (1:5-1:6) "–")
+             ])
+         ])
     );
 }

From 33363de759e414d0ed87551145f15385155e03a7 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 14:43:45 +1100
Subject: [PATCH 15/30] autolink: fix up email sourcepos in presence of smart
 punctuation.

---
 src/parser/autolink.rs | 94 ++++++++++++++++++++++++++++--------------
 src/parser/mod.rs      | 47 ++++++++++-----------
 src/tests/autolink.rs  | 23 -----------
 src/tests/fuzz.rs      | 58 +++++++++++++++++++++-----
 4 files changed, 134 insertions(+), 88 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index b9bdd346..dd9ecabe 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -9,12 +9,13 @@ use unicode_categories::UnicodeCategories;
 // TODO: this can probably be cleaned up a lot. It used to handle all three of
 // {url,www,email}_match, but now just the last of those. (This is still per
 // upstream cmark-gfm, so it's not easily changed without breaking compat.)
-pub(crate) fn process_autolinks<'a>(
+pub(crate) fn process_email_autolinks<'a>(
     arena: &'a Arena<AstNode<'a>>,
     node: &'a AstNode<'a>,
     contents_str: &mut String,
     relaxed_autolinks: bool,
     sourcepos: &mut Sourcepos,
+    spx: &[(Sourcepos, usize)],
 ) {
     let contents = contents_str.as_bytes();
     let len = contents.len();
@@ -56,38 +57,56 @@ pub(crate) fn process_autolinks<'a>(
             i -= reverse;
             node.insert_after(post);
 
-            // Enormous HACK. This compensates for our lack of accounting for
-            // smart puncutation in sourcepos in exactly one extant regression
-            // test (tests::fuzz::trailing_hyphen_matches) and one fuzzer
-            // crash observed so far, but it is no substitute, probably fails
-            // miserably in other contexts, and we really instead just need an
-            // actual solution to that issue.
-            //
-            // Problems as follows:
-            // * We almost certainly need to measure `skip` the same way.
-            // * We're counting extremely arbitrary Unicode characters, which
-            //   don't _necessarily_ map 1:1 with the transformation done by
-            //   smart punctuation.
-            // * I've briefly forgotten _how_ this actually fixes anything.
-            let len_less_i = str::from_utf8(&contents[i..]).unwrap().chars().count();
-
-            if skip < len_less_i {
+            let remain = if i + skip < len {
                 let remain = str::from_utf8(&contents[i + skip..]).unwrap();
                 assert!(!remain.is_empty());
-                post.insert_after(make_inline(
-                    arena,
-                    NodeValue::Text(remain.to_string()),
-                    (
-                        sourcepos.end.line,
-                        sourcepos.end.column + 1 - (len_less_i - skip),
-                        sourcepos.end.line,
-                        sourcepos.end.column,
-                    )
-                        .into(),
-                ));
-            }
+                Some(remain.to_string())
+            } else {
+                None
+            };
+            let initial_end_col = sourcepos.end.column;
 
-            sourcepos.end.column -= len_less_i;
+            // Sourcepos end column `e` of the original node (set by writing
+            // to `*sourcepos`) determined by advancing through `spx` until `i`
+            // bytes of input are seen.
+            //
+            // For each element `(sp, x)` in `spx`:
+            // - if remaining `i` is greater than the byte count `x`,
+            //     set `i -= x` and continue.
+            // - if remaining `i` is equal to the byte count `x`,
+            //     set `e = sp.end.column - 1` and finish.
+            // - if remaining `i` is less than the byte count `x`,
+            //     assert `sp.end.column - sp.start.column + 1 == x` (1),
+            //     set `e = sp.start.column + i - 1` and finish.
+            //
+            // (1) If `x` doesn't equal the range covered between the start
+            //     and end column, there's no way to determine sourcepos within
+            //     the range. This is a bug if it happens; it suggests we've
+            //     matched an email autolink with some smart punctuation in it,
+            //     or worse.
+            //
+            // NOTE: a little iffy on the way I've added `- 1` --- what we
+            // calculate here technically is the start column of the linked
+            // portion, then adjusted. I think this should be robust, but needs
+            // checking at edges.
+
+            let mut rem_i = i;
+            for &(sp, x) in spx {
+                if rem_i > x {
+                    rem_i -= x;
+                } else if rem_i == x {
+                    sourcepos.end.column = sp.end.column - 1;
+                    rem_i = 0;
+                    break;
+                } else {
+                    // rem_i < x
+                    assert_eq!(sp.end.column - sp.start.column + 1, x);
+                    sourcepos.end.column = sp.start.column + rem_i - 1;
+                    rem_i = 0;
+                    break;
+                }
+            }
+            assert!(rem_i == 0);
 
             contents_str.truncate(i);
             let nsp: Sourcepos = (
@@ -101,6 +120,21 @@ pub(crate) fn process_autolinks<'a>(
             // Inner text gets same sourcepos as link, since there's nothing but
             // the text.
             post.first_child().unwrap().data.borrow_mut().sourcepos = nsp;
+
+            if let Some(remain) = remain {
+                post.insert_after(make_inline(
+                    arena,
+                    NodeValue::Text(remain.to_string()),
+                    (
+                        sourcepos.end.line,
+                        nsp.end.column + 1,
+                        sourcepos.end.line,
+                        initial_end_col,
+                    )
+                        .into(),
+                ));
+            }
+
             return;
         }
     }
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 1b724866..743a88b3 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -2955,38 +2955,33 @@ where
                 let n_ast = &mut n.data.borrow_mut();
                 let mut sourcepos = n_ast.sourcepos;
 
-                loop {
-                    match n_ast.value {
-                        // Join adjacent text nodes together
-                        NodeValue::Text(ref mut root) => {
-                            let ns = match n.next_sibling() {
-                                Some(ns) => ns,
-                                _ => {
-                                    // Post-process once we are finished joining text nodes
-                                    self.postprocess_text_node(n, root, &mut sourcepos);
-                                    break;
-                                }
-                            };
-
+                match n_ast.value {
+                    NodeValue::Text(ref mut root) => {
+                        // Join adjacent text nodes together, then post-process.
+                        // Record the original list of sourcepos and bytecounts
+                        // for the post-processing step.
+                        let mut spx = vec![(sourcepos, root.len())];
+                        while let Some(ns) = n.next_sibling() {
                             match ns.data.borrow().value {
                                 NodeValue::Text(ref adj) => {
                                     root.push_str(adj);
-                                    sourcepos.end.column = ns.data.borrow().sourcepos.end.column;
+                                    let sp = ns.data.borrow().sourcepos;
+                                    spx.push((sp, adj.len()));
+                                    sourcepos.end.column = sp.end.column;
                                     ns.detach();
                                 }
-                                _ => {
-                                    // Post-process once we are finished joining text nodes
-                                    self.postprocess_text_node(n, root, &mut sourcepos);
-                                    break;
-                                }
+                                _ => break,
                             }
                         }
-                        NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::WikiLink(..) => {
-                            this_bracket = true;
-                            break;
-                        }
-                        _ => break,
+
+                        self.postprocess_text_node(n, root, &mut sourcepos, &spx);
+                    }
+                    NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::WikiLink(..) => {
+                        // Don't recurse into links (no links-within-links) or
+                        // images (title part).
+                        this_bracket = true;
                     }
+                    _ => {}
                 }
 
                 n_ast.sourcepos = sourcepos;
@@ -3009,18 +3004,20 @@ where
         node: &'a AstNode<'a>,
         text: &mut String,
         sourcepos: &mut Sourcepos,
+        spx: &[(Sourcepos, usize)],
     ) {
         if self.options.extension.tasklist {
             self.process_tasklist(node, text, sourcepos);
         }
 
         if self.options.extension.autolink {
-            autolink::process_autolinks(
+            autolink::process_email_autolinks(
                 self.arena,
                 node,
                 text,
                 self.options.parse.relaxed_autolinks,
                 sourcepos,
+                spx,
             );
         }
     }
diff --git a/src/tests/autolink.rs b/src/tests/autolink.rs
index 9fb766cd..1b01a9c0 100644
--- a/src/tests/autolink.rs
+++ b/src/tests/autolink.rs
@@ -239,29 +239,6 @@ fn autolink_relaxed_links_schemes() {
 
 #[test]
 fn sourcepos_correctly_restores_context() {
-    // There's unsoundness in trying to maintain and adjust sourcepos
-    // when doing autolinks in the light of:
-    //
-    // a) Some source elements introducing a different number of characters
-    //    to the content text than they take in source, i.e. smart
-    //    punctuation.
-    //
-    // b) Text node consolidation happening before autolinking.
-    //
-    // (b) is obviously non-optional, but it means we end up with Text
-    // nodes with different byte counts than their sourcepos span lengths.
-    //
-    // One possible solution would be to actually accumulate multiple
-    // sourcepos spans per Text node, each also tracking the number of
-    // bytes of content text it's responsible for.  This would work well
-    // enough as long as we never had to adjust a sourcepos into a spot
-    // within a sourcepos span that had a target text width where it
-    // wasn't equal.  That probably wouldn't happen, though -- i.e. we're
-    // never autolinking into the middle of a rendered smart punctuation.
-    //
-    // The below documents test cases that used to break; I suspect smart
-    // punctuation is still a breaking case however!
-
     assert_ast_match!(
         [],
         "ab _cde_ f@g.ee h*ijklm* n",
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index bc7a58a6..9be71e54 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -83,19 +83,57 @@ fn trailing_hyphen() {
     );
 }
 
-#[ignore]
 #[test]
 fn trailing_hyphen_matches() {
+    // TODO: repeat below without smart, direct entry
+    // TODO: "\ at EOL" style break breaks sourcepos
+    // TODO: check no empty text node before autolink at start
     assert_ast_match!(
         [extension.autolink, parse.smart],
-        "3@.l--",
-         (document (1:1-1:6) [
-             (paragraph (1:1-1:6) [
-                 (link (1:1-1:4) [
-                     (text (1:1-1:4) "3@.l")
-                 ])
-                 (text (1:5-1:6) "–")
-             ])
-         ])
+        "--\n"
+        "--(3@.l--\n",
+        (document (1:1-2:9) [
+            (paragraph (1:1-2:9) [
+                (text (1:1-1:2) "–")  // en-dash
+                (softbreak (1:3-1:3))
+                (text (2:1-2:3) "–(")  // en-dash
+                (link (2:4-2:7) "mailto:3@.l" [
+                    (text (2:4-2:7) "3@.l")
+                ])
+                (text (2:8-2:9) "–")  // en-dash
+            ])
+        ])
+    );
+}
+
+#[test]
+fn smart_sourcepos() {
+    assert_ast_match!(
+        [parse.smart],
+        ": _--_ **---**\n\n"
+        // As above, but entered directly.
+        ": _–_ **—**\n",
+        (document (1:1-3:15) [
+            (paragraph (1:1-1:14) [
+                (text (1:1-1:2) ": ")
+                (emph (1:3-1:6) [
+                    (text (1:4-1:5) "–")  // en-dash
+                ])
+                (text (1:7-1:7) " ")
+                (strong (1:8-1:14) [
+                    (text (1:10-1:12) "—")  // em-dash
+                ])
+            ])
+            (paragraph (3:1-3:15) [
+                (text (3:1-3:2) ": ")
+                (emph (3:3-3:7) [
+                    (text (3:4-3:6) "–")  // en-dash; 3 bytes in input
+                ])
+                (text (3:8-3:8) " ")
+                (strong (3:9-3:15) [
+                    (text (3:11-3:13) "—")  // em-dash; (still) 3 bytes
+                ])
+            ])
+        ])
     );
 }

From 21eae3012f7a12e484e93e945614fb9cac2ab324 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 18:17:55 +1100
Subject: [PATCH 16/30] parser: remove empty node left over by postprocessing.

---
 src/parser/mod.rs     |  8 +++++++-
 src/tests/autolink.rs | 19 +++++++++++++++++++
 src/tests/fuzz.rs     | 39 ++++++++++++++++++++++++++++++++++++---
 3 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 743a88b3..b0a81c52 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -2952,6 +2952,7 @@ where
 
             while let Some(n) = nch {
                 let mut this_bracket = false;
+                let mut emptied = false;
                 let n_ast = &mut n.data.borrow_mut();
                 let mut sourcepos = n_ast.sourcepos;
 
@@ -2975,6 +2976,7 @@ where
                         }
 
                         self.postprocess_text_node(n, root, &mut sourcepos, &spx);
+                        emptied = root.len() == 0;
                     }
                     NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::WikiLink(..) => {
                         // Don't recurse into links (no links-within-links) or
@@ -2986,11 +2988,15 @@ where
 
                 n_ast.sourcepos = sourcepos;
 
-                if !this_bracket {
+                if !this_bracket && !emptied {
                     children.push(n);
                 }
 
                 nch = n.next_sibling();
+
+                if emptied {
+                    n.detach();
+                }
             }
 
             // Push children onto work stack in reverse order so they are
diff --git a/src/tests/autolink.rs b/src/tests/autolink.rs
index 1b01a9c0..7f3c1f6b 100644
--- a/src/tests/autolink.rs
+++ b/src/tests/autolink.rs
@@ -408,3 +408,22 @@ fn autolink_sourcepos() {
         ])
     );
 }
+
+#[test]
+fn autolink_consecutive_email() {
+    assert_ast_match!(
+        [extension.autolink],
+        "scyther@pokemon.com/beedrill@pokemon.com",
+        (document (1:1-1:40) [
+            (paragraph (1:1-1:40) [
+                (link (1:1-1:19) "mailto:scyther@pokemon.com" [
+                    (text (1:1-1:19) "scyther@pokemon.com")
+                ])
+                (text (1:20-1:20) "/")
+                (link (1:21-1:40) "mailto:beedrill@pokemon.com" [
+                    (text (1:21-1:40) "beedrill@pokemon.com")
+                ])
+            ])
+        ])
+    );
+}
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 9be71e54..eabd5b5a 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -84,10 +84,8 @@ fn trailing_hyphen() {
 }
 
 #[test]
-fn trailing_hyphen_matches() {
-    // TODO: repeat below without smart, direct entry
+fn trailing_smart_endash_matches() {
     // TODO: "\ at EOL" style break breaks sourcepos
-    // TODO: check no empty text node before autolink at start
     assert_ast_match!(
         [extension.autolink, parse.smart],
         "--\n"
@@ -106,6 +104,41 @@ fn trailing_hyphen_matches() {
     );
 }
 
+#[test]
+fn trailing_endash_matches() {
+    assert_ast_match!(
+        [extension.autolink],
+        "–\n"
+        "–(3@.l–\n",
+        (document (1:1-2:11) [
+            (paragraph (1:1-2:11) [
+                (text (1:1-1:3) "–")  // en-dash
+                (softbreak (1:4-1:4))
+                (text (2:1-2:4) "–(")  // en-dash
+                (link (2:5-2:8) "mailto:3@.l" [
+                    (text (2:5-2:8) "3@.l")
+                ])
+                (text (2:9-2:11) "–")  // en-dash
+            ])
+        ])
+    );
+}
+
+#[test]
+fn no_empty_text_before_email() {
+    assert_ast_match!(
+        [extension.autolink],
+        "a@b.c\n",
+        (document (1:1-1:5) [
+            (paragraph (1:1-1:5) [
+                (link (1:1-1:5) "mailto:a@b.c" [
+                    (text (1:1-1:5) "a@b.c")
+                ])
+            ])
+        ])
+    );
+}
+
 #[test]
 fn smart_sourcepos() {
     assert_ast_match!(

From 6849733bec144c7c2bbdece856e3a782bc7ec7c4 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 18:23:25 +1100
Subject: [PATCH 17/30] inlines: fix LineBreak sourcepos from backslash.

---
 src/parser/inlines.rs |  6 +++++-
 src/tests/fuzz.rs     | 16 ++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index 25df673c..2fabba57 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -1208,7 +1208,11 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
                 inline_text
             }
         } else if !self.eof() && self.skip_line_end() {
-            self.make_inline(NodeValue::LineBreak, startpos, self.pos - 1)
+            let inl = self.make_inline(NodeValue::LineBreak, startpos, self.pos - 1);
+            self.line += 1;
+            self.column_offset = -(self.pos as isize);
+            self.skip_spaces();
+            inl
         } else {
             self.make_inline(
                 NodeValue::Text("\\".to_string()),
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index eabd5b5a..94fa4ba8 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -170,3 +170,19 @@ fn smart_sourcepos() {
         ])
     );
 }
+
+#[test]
+fn linebreak_sourcepos() {
+    assert_ast_match!(
+        [],
+        "a\\\n"
+        "b\n",
+        (document (1:1-2:1) [
+            (paragraph (1:1-2:1) [
+                (text (1:1-1:1) "a")
+                (linebreak (1:2-1:3))
+                (text (2:1-2:1) "b")
+            ])
+        ])
+    );
+}

From 4ed12c7f097707ce6c193b8789bd7a8528aa5c8c Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 18:33:36 +1100
Subject: [PATCH 18/30] autolink: correct "=" case.

---
 flake.nix              |  2 ++
 src/parser/autolink.rs | 33 ++++++++++-----------
 src/parser/inlines.rs  | 67 +++++++++++++++++++-----------------------
 src/tests/fuzz.rs      | 20 +++++++++++++
 4 files changed, 68 insertions(+), 54 deletions(-)

diff --git a/flake.nix b/flake.nix
index 4a1d797f..7275de65 100644
--- a/flake.nix
+++ b/flake.nix
@@ -114,6 +114,8 @@
       formatter = pkgs.alejandra;
 
       devShells.default = pkgs.mkShell {
+        name = "comrak";
+
         inputsFrom = builtins.attrValues self.checks.${system};
 
         nativeBuildInputs = [
diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index dd9ecabe..b9e1b22d 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -74,10 +74,10 @@ pub(crate) fn process_email_autolinks<'a>(
             // - if remaining `i` is greater than the byte count `x`,
             //     set `i -= x` and continue.
             // - if remaining `i` is equal to the byte count `x`,
-            //     set `e = sp.end.column - 1` and finish.
+            //     set `e = sp.end.column` and finish.
             // - if remaining `i` is less than the byte count `x`,
             //     assert `sp.end.column - sp.start.column + 1 == x` (1),
-            //     set `e = sp.start.column + i - 1` and finish.
+            //     set `e = sp.start.column + i - 1` (2) and finish.
             //
             // (1) If `x` doesn't equal the range covered between the start
             //     and end column, there's no way to determine sourcepos within
@@ -85,17 +85,17 @@ pub(crate) fn process_email_autolinks<'a>(
             //     matched an email autolink with some smart punctuation in it,
             //     or worse.
             //
-            // NOTE: a little iffy on the way I've added `- 1` --- what we
-            // calculate here technically is the start column of the linked
-            // portion, then adjusted. I think this should be robust, but needs
-            // checking at edges.
+            // (2) A little iffy on the way I've added `- 1` --- what we
+            //     calculate here technically is the start column of the linked
+            //     portion, then adjusted. I think this should be robust, but
+            //     needs checking at edges.
 
             let mut rem_i = i;
             for &(sp, x) in spx {
                 if rem_i > x {
                     rem_i -= x;
                 } else if rem_i == x {
-                    sourcepos.end.column = sp.end.column - 1;
+                    sourcepos.end.column = sp.end.column;
                     rem_i = 0;
                     break;
                 } else {
@@ -122,17 +122,14 @@ pub(crate) fn process_email_autolinks<'a>(
             post.first_child().unwrap().data.borrow_mut().sourcepos = nsp;
 
             if let Some(remain) = remain {
-                post.insert_after(make_inline(
-                    arena,
-                    NodeValue::Text(remain.to_string()),
-                    (
-                        sourcepos.end.line,
-                        nsp.end.column + 1,
-                        sourcepos.end.line,
-                        initial_end_col,
-                    )
-                        .into(),
-                ));
+                let asp: Sourcepos = (
+                    sourcepos.end.line,
+                    nsp.end.column + 1,
+                    sourcepos.end.line,
+                    initial_end_col,
+                )
+                    .into();
+                post.insert_after(make_inline(arena, NodeValue::Text(remain.to_string()), asp));
             }
 
             return;
diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index 2fabba57..29c4e3fa 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -811,43 +811,38 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         } else {
             self.scan_to_closing_dollar(opendollars)
         }
-        .filter(|epos| epos - startpos >= fence_length * 2 + 1);
+        .filter(|endpos| endpos - startpos >= fence_length * 2 + 1);
 
-        match endpos {
-            None => {
-                if code_math {
-                    self.pos = startpos + 1;
-                    self.make_inline(NodeValue::Text("$".to_string()), self.pos - 1, self.pos - 1)
-                } else {
-                    self.pos = startpos + fence_length;
-                    self.make_inline(
-                        NodeValue::Text("$".repeat(opendollars)),
-                        self.pos - fence_length,
-                        self.pos - 1,
-                    )
-                }
-            }
-            Some(endpos) => {
-                let buf = &self.input[startpos + fence_length..endpos - fence_length];
-                let buf: Vec<u8> = if code_math || opendollars == 1 {
-                    strings::normalize_code(buf)
-                } else {
-                    buf.to_vec()
-                };
-                let math = NodeMath {
-                    dollar_math: !code_math,
-                    display_math: opendollars == 2,
-                    literal: String::from_utf8(buf).unwrap(),
-                };
-                let node = self.make_inline(NodeValue::Math(math), startpos, endpos - 1);
-                self.adjust_node_newlines(
-                    node,
-                    endpos - startpos - fence_length,
-                    fence_length,
-                    parent_line_offsets,
-                );
-                node
-            }
+        if let Some(endpos) = endpos {
+            let buf = &self.input[startpos + fence_length..endpos - fence_length];
+            let buf: Vec<u8> = if code_math || opendollars == 1 {
+                strings::normalize_code(buf)
+            } else {
+                buf.to_vec()
+            };
+            let math = NodeMath {
+                dollar_math: !code_math,
+                display_math: opendollars == 2,
+                literal: String::from_utf8(buf).unwrap(),
+            };
+            let node = self.make_inline(NodeValue::Math(math), startpos, endpos - 1);
+            self.adjust_node_newlines(
+                node,
+                endpos - startpos - fence_length,
+                fence_length,
+                parent_line_offsets,
+            );
+            node
+        } else if code_math {
+            self.pos = startpos + 1;
+            self.make_inline(NodeValue::Text("$".to_string()), self.pos - 1, self.pos - 1)
+        } else {
+            self.pos = startpos + fence_length;
+            self.make_inline(
+                NodeValue::Text("$".repeat(opendollars)),
+                self.pos - fence_length,
+                self.pos - 1,
+            )
         }
     }
 
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 94fa4ba8..6aa6d9fe 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -186,3 +186,23 @@ fn linebreak_sourcepos() {
         ])
     );
 }
+
+#[test]
+fn echaw() {
+    assert_ast_match!(
+        [extension.autolink],
+        "<U@.J<AA@.J",
+        (document (1:1-1:11) [
+            (paragraph (1:1-1:11) [
+                (text (1:1-1:1) "<")
+                (link (1:2-1:5) "mailto:U@.J" [
+                    (text (1:2-1:5) "U@.J")
+                ])
+                (text (1:6-1:6) "<")
+                (link (1:7-1:11) "mailto:AA@.J" [
+                    (text (1:7-1:11) "AA@.J")
+                ])
+            ])
+        ])
+    );
+}

From 2bb23c0c7c491fe3371e6801cfd018af9a36c243 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 18:33:36 +1100
Subject: [PATCH 19/30] autolink: deal with consecutive replacements.

---
 src/parser/autolink.rs   | 97 +++++++++++++++++++++++++++++++---------
 src/parser/mod.rs        | 11 ++---
 src/tests/autolink.rs    | 19 ++++++++
 src/tests/fuzz.rs        | 20 +++++++++
 src/tests/regressions.rs | 62 +++++++++++++++++++++++++
 5 files changed, 183 insertions(+), 26 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index b9e1b22d..52de43b6 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -2,20 +2,18 @@ use crate::character_set::character_set;
 use crate::ctype::{isalnum, isalpha, isspace};
 use crate::nodes::{AstNode, NodeLink, NodeValue, Sourcepos};
 use crate::parser::inlines::make_inline;
+use std::collections::VecDeque;
 use std::str;
 use typed_arena::Arena;
 use unicode_categories::UnicodeCategories;
 
-// TODO: this can probably be cleaned up a lot. It used to handle all three of
-// {url,www,email}_match, but now just the last of those. (This is still per
-// upstream cmark-gfm, so it's not easily changed without breaking compat.)
 pub(crate) fn process_email_autolinks<'a>(
     arena: &'a Arena<AstNode<'a>>,
     node: &'a AstNode<'a>,
     contents_str: &mut String,
     relaxed_autolinks: bool,
     sourcepos: &mut Sourcepos,
-    spx: &[(Sourcepos, usize)],
+    mut spx: VecDeque<(Sourcepos, usize)>,
 ) {
     let contents = contents_str.as_bytes();
     let len = contents.len();
@@ -90,23 +88,37 @@ pub(crate) fn process_email_autolinks<'a>(
             //     portion, then adjusted. I think this should be robust, but
             //     needs checking at edges.
 
-            let mut rem_i = i;
-            for &(sp, x) in spx {
-                if rem_i > x {
-                    rem_i -= x;
-                } else if rem_i == x {
-                    sourcepos.end.column = sp.end.column;
-                    rem_i = 0;
-                    break;
-                } else {
-                    // rem_i < x
-                    assert_eq!(sp.end.column - sp.start.column + 1, x);
-                    sourcepos.end.column = sp.start.column + rem_i - 1;
-                    rem_i = 0;
-                    break;
+            if i > 0 {
+                let mut rem_i = i;
+                while let Some((sp, x)) = spx.pop_front() {
+                    if rem_i > x {
+                        rem_i -= x;
+                    } else if rem_i == x {
+                        sourcepos.end.column = sp.end.column;
+                        rem_i = 0;
+                        break;
+                    } else {
+                        // rem_i < x
+                        assert_eq!(sp.end.column - sp.start.column + 1, x);
+                        sourcepos.end.column = sp.start.column + rem_i - 1;
+                        spx.push_front((
+                            (
+                                sp.start.line,
+                                sp.start.column + rem_i,
+                                sp.end.line,
+                                sp.end.column,
+                            )
+                                .into(),
+                            x - rem_i,
+                        ));
+                        rem_i = 0;
+                        break;
+                    }
                 }
+                assert!(rem_i == 0);
+            } else {
+                sourcepos.end.column = 0;
             }
-            assert!(rem_i == 0);
 
             contents_str.truncate(i);
             let nsp: Sourcepos = (
@@ -122,14 +134,57 @@ pub(crate) fn process_email_autolinks<'a>(
             post.first_child().unwrap().data.borrow_mut().sourcepos = nsp;
 
             if let Some(remain) = remain {
-                let asp: Sourcepos = (
+                let mut asp: Sourcepos = (
                     sourcepos.end.line,
                     nsp.end.column + 1,
                     sourcepos.end.line,
                     initial_end_col,
                 )
                     .into();
-                post.insert_after(make_inline(arena, NodeValue::Text(remain.to_string()), asp));
+
+                // Consume `skip` from spx.
+                let mut rem_skip = skip;
+                while let Some((sp, x)) = spx.pop_front() {
+                    if rem_skip > x {
+                        rem_skip -= x;
+                    } else if rem_skip == x {
+                        rem_skip = 0;
+                        break;
+                    } else {
+                        // rem_skip < x
+                        assert_eq!(sp.end.column - sp.start.column + 1, x);
+                        spx.push_front((
+                            (
+                                sp.start.line,
+                                sp.start.column + rem_skip,
+                                sp.end.line,
+                                sp.end.column,
+                            )
+                                .into(),
+                            x - rem_skip,
+                        ));
+                        rem_skip = 0;
+                        break;
+                    }
+                }
+                assert!(rem_skip == 0);
+
+                let after = make_inline(arena, NodeValue::Text(remain.to_string()), asp);
+                post.insert_after(after);
+
+                let after_ast = &mut after.data.borrow_mut();
+                process_email_autolinks(
+                    arena,
+                    after,
+                    match after_ast.value {
+                        NodeValue::Text(ref mut t) => t,
+                        _ => unreachable!(),
+                    },
+                    relaxed_autolinks,
+                    &mut asp,
+                    spx,
+                );
+                after_ast.sourcepos = asp;
             }
 
             return;
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index b0a81c52..188ff2a8 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -21,7 +21,7 @@ use crate::scanners::{self, SetextChar};
 use crate::strings::{self, split_off_front_matter, Case};
 use std::cell::RefCell;
 use std::cmp::min;
-use std::collections::HashMap;
+use std::collections::{HashMap, VecDeque};
 use std::fmt::{self, Debug, Formatter};
 use std::mem;
 use std::panic::RefUnwindSafe;
@@ -2961,13 +2961,14 @@ where
                         // Join adjacent text nodes together, then post-process.
                         // Record the original list of sourcepos and bytecounts
                         // for the post-processing step.
-                        let mut spx = vec![(sourcepos, root.len())];
+                        let mut spx = VecDeque::new();
+                        spx.push_back((sourcepos, root.len()));
                         while let Some(ns) = n.next_sibling() {
                             match ns.data.borrow().value {
                                 NodeValue::Text(ref adj) => {
                                     root.push_str(adj);
                                     let sp = ns.data.borrow().sourcepos;
-                                    spx.push((sp, adj.len()));
+                                    spx.push_back((sp, adj.len()));
                                     sourcepos.end.column = sp.end.column;
                                     ns.detach();
                                 }
@@ -2975,7 +2976,7 @@ where
                             }
                         }
 
-                        self.postprocess_text_node(n, root, &mut sourcepos, &spx);
+                        self.postprocess_text_node(n, root, &mut sourcepos, spx);
                         emptied = root.len() == 0;
                     }
                     NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::WikiLink(..) => {
@@ -3010,7 +3011,7 @@ where
         node: &'a AstNode<'a>,
         text: &mut String,
         sourcepos: &mut Sourcepos,
-        spx: &[(Sourcepos, usize)],
+        spx: VecDeque<(Sourcepos, usize)>,
     ) {
         if self.options.extension.tasklist {
             self.process_tasklist(node, text, sourcepos);
diff --git a/src/tests/autolink.rs b/src/tests/autolink.rs
index 7f3c1f6b..997a5d51 100644
--- a/src/tests/autolink.rs
+++ b/src/tests/autolink.rs
@@ -427,3 +427,22 @@ fn autolink_consecutive_email() {
         ])
     );
 }
+
+#[test]
+fn autolink_consecutive_email_smart() {
+    assert_ast_match!(
+        [extension.autolink, parse.smart],
+        "scyther@pokemon.com--beedrill@pokemon.com",
+        (document (1:1-1:41) [
+            (paragraph (1:1-1:41) [
+                (link (1:1-1:19) "mailto:scyther@pokemon.com" [
+                    (text (1:1-1:19) "scyther@pokemon.com")
+                ])
+                (text (1:20-1:21) "–") // en-dash
+                (link (1:22-1:41) "mailto:beedrill@pokemon.com" [
+                    (text (1:22-1:41) "beedrill@pokemon.com")
+                ])
+            ])
+        ])
+    );
+}
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 6aa6d9fe..10601e88 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -206,3 +206,23 @@ fn echaw() {
         ])
     );
 }
+
+#[test]
+fn echaw2() {
+    assert_ast_match!(
+        [extension.autolink, parse.smart],
+        ":C@.t'C@.t",
+        (document (1:1-1:10) [
+            (paragraph (1:1-1:10) [
+                (text (1:1-1:1) ":")
+                (link (1:2-1:5) "mailto:C@.t" [
+                    (text (1:2-1:5) "C@.t")
+                ])
+                (text (1:6-1:6) "’")
+                (link (1:7-1:10) "mailto:C@.t" [
+                    (text (1:7-1:10) "C@.t")
+                ])
+            ])
+        ])
+    );
+}
diff --git a/src/tests/regressions.rs b/src/tests/regressions.rs
index 8d54a217..5e98a404 100644
--- a/src/tests/regressions.rs
+++ b/src/tests/regressions.rs
@@ -177,3 +177,65 @@ fn sourcepos_link_items() {
         ])
     );
 }
+
+#[test]
+fn assorted_links() {
+    assert_ast_match!(
+        [extension.autolink],
+        r#"hello <https://example.com/fooo> world
+hello [foo](https://example.com) world
+hello [foo] world
+hello [bar][bar] world
+hello https://example.com/foo world
+hello www.example.com world
+hello foo@example.com world
+
+[foo]: https://example.com
+[bar]: https://example.com"#,
+        (document (1:1-10:26) [
+            (paragraph (1:1-7:27) [
+                (text (1:1-1:6) "hello ")
+                (link (1:7-1:32) "https://example.com/fooo" [
+                    (text (1:8-1:31) "https://example.com/fooo")
+                ])
+                (text (1:33-1:38) " world")
+                (softbreak (1:39-1:39))
+                (text (2:1-2:6) "hello ")
+                (link (2:7-2:32) "https://example.com" [
+                    (text (2:8-2:10) "foo")
+                ])
+                (text (2:33-2:38) " world")
+                (softbreak (2:39-2:39))
+                (text (3:1-3:6) "hello ")
+                (link (3:7-3:11) "https://example.com" [
+                    (text (3:8-3:10) "foo")
+                ])
+                (text (3:12-3:17) " world")
+                (softbreak (3:18-3:18))
+                (text (4:1-4:6) "hello ")
+                (link (4:7-4:16) "https://example.com" [
+                    (text (4:8-4:10) "bar")
+                ])
+                (text (4:17-4:22) " world")
+                (softbreak (4:23-4:23))
+                (text (5:1-5:6) "hello ")
+                (link (5:7-5:29) "https://example.com/foo" [
+                    (text (5:7-5:29) "https://example.com/foo")
+                ])
+                (text (5:30-5:35) " world")
+                (softbreak (5:36-5:36))
+                (text (6:1-6:6) "hello ")
+                (link (6:7-6:21) "http://www.example.com" [
+                    (text (6:7-6:21) "www.example.com")
+                ])
+                (text (6:22-6:27) " world")
+                (softbreak (6:28-6:28))
+                (text (7:1-7:6) "hello ")
+                (link (7:7-7:21) "mailto:foo@example.com" [
+                    (text (7:7-7:21) "foo@example.com")
+                ])
+                (text (7:22-7:27) " world")
+            ])
+        ])
+    );
+}

From 1d030cf0900af1150300abe75d729736946868f8 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 20:41:06 +1100
Subject: [PATCH 20/30] autolink: refactor consume_spx.

---
 src/parser/autolink.rs | 132 +++++++++++++++--------------------------
 1 file changed, 49 insertions(+), 83 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index 52de43b6..4f6a0b98 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -64,61 +64,7 @@ pub(crate) fn process_email_autolinks<'a>(
             };
             let initial_end_col = sourcepos.end.column;
 
-            // Sourcepos end column `e` of the original node (set by writing
-            // to `*sourcepos`) determined by advancing through `spx` until `i`
-            // bytes of input are seen.
-            //
-            // For each element `(sp, x)` in `spx`:
-            // - if remaining `i` is greater than the byte count `x`,
-            //     set `i -= x` and continue.
-            // - if remaining `i` is equal to the byte count `x`,
-            //     set `e = sp.end.column` and finish.
-            // - if remaining `i` is less than the byte count `x`,
-            //     assert `sp.end.column - sp.start.column + 1 == x` (1),
-            //     set `e = sp.start.column + i - 1` (2) and finish.
-            //
-            // (1) If `x` doesn't equal the range covered between the start
-            //     and end column, there's no way to determine sourcepos within
-            //     the range. This is a bug if it happens; it suggests we've
-            //     matched an email autolink with some smart punctuation in it,
-            //     or worse.
-            //
-            // (2) A little iffy on the way I've added `- 1` --- what we
-            //     calculate here technically is the start column of the linked
-            //     portion, then adjusted. I think this should be robust, but
-            //     needs checking at edges.
-
-            if i > 0 {
-                let mut rem_i = i;
-                while let Some((sp, x)) = spx.pop_front() {
-                    if rem_i > x {
-                        rem_i -= x;
-                    } else if rem_i == x {
-                        sourcepos.end.column = sp.end.column;
-                        rem_i = 0;
-                        break;
-                    } else {
-                        // rem_i < x
-                        assert_eq!(sp.end.column - sp.start.column + 1, x);
-                        sourcepos.end.column = sp.start.column + rem_i - 1;
-                        spx.push_front((
-                            (
-                                sp.start.line,
-                                sp.start.column + rem_i,
-                                sp.end.line,
-                                sp.end.column,
-                            )
-                                .into(),
-                            x - rem_i,
-                        ));
-                        rem_i = 0;
-                        break;
-                    }
-                }
-                assert!(rem_i == 0);
-            } else {
-                sourcepos.end.column = 0;
-            }
+            sourcepos.end.column = if i > 0 { consume_spx(&mut spx, i) } else { 0 };
 
             contents_str.truncate(i);
             let nsp: Sourcepos = (
@@ -134,6 +80,8 @@ pub(crate) fn process_email_autolinks<'a>(
             post.first_child().unwrap().data.borrow_mut().sourcepos = nsp;
 
             if let Some(remain) = remain {
+                consume_spx(&mut spx, skip);
+
                 let mut asp: Sourcepos = (
                     sourcepos.end.line,
                     nsp.end.column + 1,
@@ -141,34 +89,6 @@ pub(crate) fn process_email_autolinks<'a>(
                     initial_end_col,
                 )
                     .into();
-
-                // Consume `skip` from spx.
-                let mut rem_skip = skip;
-                while let Some((sp, x)) = spx.pop_front() {
-                    if rem_skip > x {
-                        rem_skip -= x;
-                    } else if rem_skip == x {
-                        rem_skip = 0;
-                        break;
-                    } else {
-                        // rem_skip < x
-                        assert_eq!(sp.end.column - sp.start.column + 1, x);
-                        spx.push_front((
-                            (
-                                sp.start.line,
-                                sp.start.column + rem_skip,
-                                sp.end.line,
-                                sp.end.column,
-                            )
-                                .into(),
-                            x - rem_skip,
-                        ));
-                        rem_skip = 0;
-                        break;
-                    }
-                }
-                assert!(rem_skip == 0);
-
                 let after = make_inline(arena, NodeValue::Text(remain.to_string()), asp);
                 post.insert_after(after);
 
@@ -192,6 +112,52 @@ pub(crate) fn process_email_autolinks<'a>(
     }
 }
 
+// Sourcepos end column `e` of the original node (set by writing to
+// `*sourcepos`) determined by advancing through `spx` until `i` bytes of input
+// are seen.
+//
+// For each element `(sp, x)` in `spx`:
+// - if remaining `i` is greater than the byte count `x`,
+//     set `i -= x` and continue.
+// - if remaining `i` is equal to the byte count `x`,
+//     set `e = sp.end.column` and finish.
+// - if remaining `i` is less than the byte count `x`,
+//     assert `sp.end.column - sp.start.column + 1 == x` (1),
+//     set `e = sp.start.column + i - 1` (2) and finish.
+//
+// (1) If `x` doesn't equal the range covered between the start and end column,
+//     there's no way to determine sourcepos within the range. This is a bug if
+//     it happens; it suggests we've matched an email autolink with some smart
+//     punctuation in it, or worse.
+//
+// (2) A little iffy on the way I've added `- 1` --- what we calculate here
+//     technically is the start column of the linked portion, then adjusted. I
+//     think this should be robust, but needs checking at edges.
+fn consume_spx(spx: &mut VecDeque<(Sourcepos, usize)>, mut rem: usize) -> usize {
+    while let Some((sp, x)) = spx.pop_front() {
+        if rem > x {
+            rem -= x;
+        } else if rem == x {
+            return sp.end.column;
+        } else {
+            // rem < x
+            assert_eq!(sp.end.column - sp.start.column + 1, x);
+            spx.push_front((
+                (
+                    sp.start.line,
+                    sp.start.column + rem,
+                    sp.end.line,
+                    sp.end.column,
+                )
+                    .into(),
+                x - rem,
+            ));
+            return sp.start.column + rem - 1;
+        }
+    }
+    unreachable!();
+}
+
 fn email_match<'a>(
     arena: &'a Arena<AstNode<'a>>,
     contents: &[u8],

From b86e2242923e90e8e3c3647d9375b05aed27d2ee Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 20:41:06 +1100
Subject: [PATCH 21/30] sourcepos: good grief.

---
 src/parser/autolink.rs |  6 +++++-
 src/parser/inlines.rs  | 10 ++++++----
 src/strings.rs         |  9 +++++++--
 src/tests/fuzz.rs      | 29 +++++++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index 4f6a0b98..3ffd8f7b 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -64,7 +64,11 @@ pub(crate) fn process_email_autolinks<'a>(
             };
             let initial_end_col = sourcepos.end.column;
 
-            sourcepos.end.column = if i > 0 { consume_spx(&mut spx, i) } else { 0 };
+            sourcepos.end.column = if i > 0 {
+                consume_spx(&mut spx, i)
+            } else {
+                sourcepos.start.column - 1
+            };
 
             contents_str.truncate(i);
             let nsp: Sourcepos = (
diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index 29c4e3fa..4e17f4ac 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -291,16 +291,16 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
             '$' => Some(self.handle_dollars(&node_ast.line_offsets)),
             '|' if self.options.extension.spoiler => Some(self.handle_delim(b'|')),
             _ => {
-                let endpos = self.find_special_char();
+                let mut endpos = self.find_special_char();
                 let mut contents = self.input[self.pos..endpos].to_vec();
-                let startpos = self.pos;
+                let mut startpos = self.pos;
                 self.pos = endpos;
 
                 if self
                     .peek_char()
                     .map_or(false, |&c| strings::is_line_end_char(c))
                 {
-                    strings::rtrim(&mut contents);
+                    endpos -= strings::rtrim(&mut contents);
                 }
 
                 // if we've just produced a LineBreak, then we should consume any leading
@@ -308,7 +308,9 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
                 if node.last_child().map_or(false, |n| {
                     matches!(n.data.borrow().value, NodeValue::LineBreak)
                 }) {
-                    strings::ltrim(&mut contents);
+                    // TODO: test this more explicitly.
+                    let n = strings::ltrim(&mut contents);
+                    startpos += n;
                 }
 
                 Some(self.make_inline(
diff --git a/src/strings.rs b/src/strings.rs
index 0a13db95..68fc7f82 100644
--- a/src/strings.rs
+++ b/src/strings.rs
@@ -144,17 +144,19 @@ pub fn chop_trailing_hashtags(line: &mut Vec<u8>) {
     }
 }
 
-pub fn rtrim(line: &mut Vec<u8>) {
+pub fn rtrim(line: &mut Vec<u8>) -> usize {
     let spaces = line.iter().rev().take_while(|&&b| isspace(b)).count();
     let new_len = line.len() - spaces;
     line.truncate(new_len);
+    spaces
 }
 
-pub fn ltrim(line: &mut Vec<u8>) {
+pub fn ltrim(line: &mut Vec<u8>) -> usize {
     let spaces = line.iter().take_while(|&&b| isspace(b)).count();
     shift_buf_left(line, spaces);
     let new_len = line.len() - spaces;
     line.truncate(new_len);
+    spaces
 }
 
 pub fn trim(line: &mut Vec<u8>) {
@@ -191,6 +193,9 @@ pub fn trim_slice(mut i: &[u8]) -> &[u8] {
 }
 
 fn shift_buf_left(buf: &mut [u8], n: usize) {
+    if n == 0 {
+        return;
+    }
     assert!(n <= buf.len());
     let keep = buf.len() - n;
     unsafe {
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 10601e88..6bde3f12 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -226,3 +226,32 @@ fn echaw2() {
         ])
     );
 }
+
+#[test]
+fn echaw3() {
+    assert_ast_match!(
+        [extension.autolink, parse.smart],
+        // XXX As an extra special case, NUL bytes are expanded to U+FFFD
+        // REPLACEMENT CHARACTER (UTF-8: EF BF BD) during the feed stage, so
+        // sourcepos sees three bytes (!). I might like to change this later.
+        "c@.r\0\t\r"
+        "z  \n"
+        " f@.x",
+        (document (1:1-3:5) [
+            (paragraph (1:1-3:5) [
+                (link (1:1-1:4) "mailto:c@.r" [
+                    (text (1:1-1:4) "c@.r")
+                ])
+                (text (1:5-1:7) "�")
+                // !! Spaces at EOL are trimmed.
+                // See parser::inlines::Subject::parse_inline's final case.
+                (softbreak (1:9-1:9))
+                (text (2:1-2:1) "z")
+                (linebreak (2:2-2:4))
+                (link (3:2-3:5) "mailto:f@.x" [
+                    (text (3:2-3:5) "f@.x")
+                ])
+            ])
+        ])
+    );
+}

From dca38dcd15cd0e7c8136842217c35f6fdadc8bfc Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 18:33:36 +1100
Subject: [PATCH 22/30] autolink: no longer iffy on this; record another
 question.

---
 src/parser/autolink.rs | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index 3ffd8f7b..60e7200c 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -64,11 +64,7 @@ pub(crate) fn process_email_autolinks<'a>(
             };
             let initial_end_col = sourcepos.end.column;
 
-            sourcepos.end.column = if i > 0 {
-                consume_spx(&mut spx, i)
-            } else {
-                sourcepos.start.column - 1
-            };
+            sourcepos.end.column = consume_spx(&mut spx, i);
 
             contents_str.truncate(i);
             let nsp: Sourcepos = (
@@ -84,6 +80,8 @@ pub(crate) fn process_email_autolinks<'a>(
             post.first_child().unwrap().data.borrow_mut().sourcepos = nsp;
 
             if let Some(remain) = remain {
+                // TODO: why we don't need the return value here? What's the
+                // relationship between it, nsp.end.column, and initial_end_col?
                 consume_spx(&mut spx, skip);
 
                 let mut asp: Sourcepos = (
@@ -127,16 +125,12 @@ pub(crate) fn process_email_autolinks<'a>(
 //     set `e = sp.end.column` and finish.
 // - if remaining `i` is less than the byte count `x`,
 //     assert `sp.end.column - sp.start.column + 1 == x` (1),
-//     set `e = sp.start.column + i - 1` (2) and finish.
+//     set `e = sp.start.column + i - 1` and finish.
 //
 // (1) If `x` doesn't equal the range covered between the start and end column,
 //     there's no way to determine sourcepos within the range. This is a bug if
 //     it happens; it suggests we've matched an email autolink with some smart
 //     punctuation in it, or worse.
-//
-// (2) A little iffy on the way I've added `- 1` --- what we calculate here
-//     technically is the start column of the linked portion, then adjusted. I
-//     think this should be robust, but needs checking at edges.
 fn consume_spx(spx: &mut VecDeque<(Sourcepos, usize)>, mut rem: usize) -> usize {
     while let Some((sp, x)) = spx.pop_front() {
         if rem > x {

From cba4264d4935cdcd0e092bb72c46c0c410b57dbd Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 21:25:47 +1100
Subject: [PATCH 23/30] autolink: answer this question.

---
 src/parser/autolink.rs | 5 ++---
 src/tests/fuzz.rs      | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index 60e7200c..05185c9a 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -80,9 +80,8 @@ pub(crate) fn process_email_autolinks<'a>(
             post.first_child().unwrap().data.borrow_mut().sourcepos = nsp;
 
             if let Some(remain) = remain {
-                // TODO: why we don't need the return value here? What's the
-                // relationship between it, nsp.end.column, and initial_end_col?
-                consume_spx(&mut spx, skip);
+                let rs = consume_spx(&mut spx, skip);
+                assert_eq!(rs, nsp.end.column);
 
                 let mut asp: Sourcepos = (
                     sourcepos.end.line,
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 6bde3f12..c25974af 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -85,7 +85,6 @@ fn trailing_hyphen() {
 
 #[test]
 fn trailing_smart_endash_matches() {
-    // TODO: "\ at EOL" style break breaks sourcepos
     assert_ast_match!(
         [extension.autolink, parse.smart],
         "--\n"

From 87aa79c6831a03e68b93585cfce09ff806da80b6 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 21:29:19 +1100
Subject: [PATCH 24/30] tests: new failing test; entities are cursed.

---
 src/tests/fuzz.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index c25974af..3f313855 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -254,3 +254,16 @@ fn echaw3() {
         ])
     );
 }
+
+#[test]
+fn echaw4() {
+    assert_ast_match!(
+        [extension.autolink, parse.smart],
+        "-@&UnderBar;.e--",
+        (document (1:1-1:16) [
+            (paragraph (1:1-1:16) [
+                (text (1:1-1:16) "-@_.e–")  // underbar & en-dash
+            ])
+        ])
+    );
+}

From 82cb6e6387511e8185eed9c8f924ad469de56710 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Thu, 27 Feb 2025 18:33:36 +1100
Subject: [PATCH 25/30] autolink: consume_spx to find new end.column.

---
 src/parser/autolink.rs | 8 ++++----
 src/tests/fuzz.rs      | 6 +++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index 05185c9a..6f629ecb 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -66,12 +66,15 @@ pub(crate) fn process_email_autolinks<'a>(
 
             sourcepos.end.column = consume_spx(&mut spx, i);
 
+            let nsp_end_col = consume_spx(&mut spx, skip);
+
             contents_str.truncate(i);
+
             let nsp: Sourcepos = (
                 sourcepos.end.line,
                 sourcepos.end.column + 1,
                 sourcepos.end.line,
-                sourcepos.end.column + skip,
+                nsp_end_col,
             )
                 .into();
             post.data.borrow_mut().sourcepos = nsp;
@@ -80,9 +83,6 @@ pub(crate) fn process_email_autolinks<'a>(
             post.first_child().unwrap().data.borrow_mut().sourcepos = nsp;
 
             if let Some(remain) = remain {
-                let rs = consume_spx(&mut spx, skip);
-                assert_eq!(rs, nsp.end.column);
-
                 let mut asp: Sourcepos = (
                     sourcepos.end.line,
                     nsp.end.column + 1,
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 3f313855..656491ac 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -257,12 +257,16 @@ fn echaw3() {
 
 #[test]
 fn echaw4() {
+    // &UnderBar; resolves to a plain ASCII underscore "_".
     assert_ast_match!(
         [extension.autolink, parse.smart],
         "-@&UnderBar;.e--",
         (document (1:1-1:16) [
             (paragraph (1:1-1:16) [
-                (text (1:1-1:16) "-@_.e–")  // underbar & en-dash
+                (link (1:1-1:14) "mailto:-@_.e" [
+                    (text (1:1-1:14) "-@_.e")
+                ])
+                (text (1:15-1:16) "–")  // en-dash
             ])
         ])
     );

From 642ba7be2c8134f5409d213384d785deefbc112e Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Fri, 28 Feb 2025 14:45:04 +1100
Subject: [PATCH 26/30] inlines: first fixes to emphasis sourcepos.

---
 src/parser/inlines.rs  | 44 ++++++++++++++++++++++++++----------------
 src/tests/fuzz.rs      | 38 ++++++++++++++++++++++++++++++++++++
 src/tests/sourcepos.rs |  4 +++-
 3 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs
index 4e17f4ac..646e639c 100644
--- a/src/parser/inlines.rs
+++ b/src/parser/inlines.rs
@@ -99,6 +99,21 @@ pub struct Delimiter<'a: 'd, 'd> {
     next: Cell<Option<&'d Delimiter<'a, 'd>>>,
 }
 
+impl<'a: 'd, 'd> std::fmt::Debug for Delimiter<'a, 'd> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "[pos {}, len {}, delim_char {:?}, open? {} close? {} -- {}]",
+            self.position,
+            self.length,
+            self.delim_char,
+            self.can_open,
+            self.can_close,
+            self.inl.data.borrow().sourcepos
+        )
+    }
+}
+
 struct Bracket<'a> {
     inl_text: &'a AstNode<'a>,
     position: usize,
@@ -1136,22 +1151,14 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
             self.pos,
             self.pos,
         );
-        {
-            // if we have `___` or `***` then we need to adjust the sourcepos colums by 1
-            let triple_adjustment = if opener_num_chars > 0 && use_delims == 2 {
-                1
-            } else {
-                0
-            };
 
-            emph.data.borrow_mut().sourcepos = (
-                opener.inl.data.borrow().sourcepos.start.line,
-                opener.inl.data.borrow().sourcepos.start.column + triple_adjustment,
-                closer.inl.data.borrow().sourcepos.end.line,
-                closer.inl.data.borrow().sourcepos.end.column - triple_adjustment,
-            )
-                .into();
-        }
+        emph.data.borrow_mut().sourcepos = (
+            opener.inl.data.borrow().sourcepos.start.line,
+            opener.inl.data.borrow().sourcepos.start.column + opener_num_chars,
+            closer.inl.data.borrow().sourcepos.end.line,
+            closer.inl.data.borrow().sourcepos.end.column - closer_num_chars,
+        )
+            .into();
 
         // Drop all the interior AST nodes into the emphasis node
         // and then insert the emphasis node
@@ -1167,11 +1174,13 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
         }
         opener.inl.insert_after(emph);
 
-        // Drop the delimiters and return the next closer to process
-
+        // Drop completely "used up" delimiters, adjust sourcepos of those not,
+        // and return the next closest one for processing.
         if opener_num_chars == 0 {
             opener.inl.detach();
             self.remove_delimiter(opener);
+        } else {
+            opener.inl.data.borrow_mut().sourcepos.end.column -= use_delims;
         }
 
         if closer_num_chars == 0 {
@@ -1179,6 +1188,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c> Subject<'a, 'r, 'o, 'd, 'i, 'c> {
             self.remove_delimiter(closer);
             closer.next.get()
         } else {
+            closer.inl.data.borrow_mut().sourcepos.start.column += use_delims;
             Some(closer)
         }
     }
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 656491ac..52cf02f8 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -271,3 +271,41 @@ fn echaw4() {
         ])
     );
 }
+
+#[test]
+fn echaw5() {
+    assert_ast_match!(
+        [],
+        // [extension.autolink],
+        "_#___@e.u",
+        (document (1:1-1:9) [
+            (paragraph (1:1-1:9) [
+                (emph (1:1-1:3) [
+                    (text (1:2-1:2) "#")
+                ])
+                // (link (1:1-1:14) "mailto:-@_.e" [
+                //     (text (1:1-1:14) "-@_.e")
+                // ])
+                (text (1:4-1:9) "__@e.u")
+            ])
+        ])
+    );
+}
+
+#[test]
+fn echaw6() {
+    assert_ast_match!(
+        [extension.autolink],
+        "_#___@e.u",
+        (document (1:1-1:9) [
+            (paragraph (1:1-1:9) [
+                (emph (1:1-1:3) [
+                    (text (1:2-1:2) "#")
+                ])
+                (link (1:4-1:9) "mailto:__@e.u" [
+                    (text (1:4-1:9) "__@e.u")
+                ])
+            ])
+        ])
+    );
+}
diff --git a/src/tests/sourcepos.rs b/src/tests/sourcepos.rs
index 19aeb619..5c61468b 100644
--- a/src/tests/sourcepos.rs
+++ b/src/tests/sourcepos.rs
@@ -394,8 +394,10 @@ const SPOILERED_TEXT: TestCase = (
 after"#,
 );
 
+// NOTE: I've adjusted this from its original asserted sourcepos (2:1-2:8) while
+// fixing emphasis sourcepos. I am not even sure what it is, really.
 const ESCAPED_TAG: TestCase = (
-    &[sourcepos!((2:1-2:8))],
+    &[sourcepos!((2:2-2:8))],
     r#"before
 ||hello|
 after"#,

From 91ad747eb0693192400d33a460f15dbcafde8325 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Sun, 2 Mar 2025 13:41:40 +1100
Subject: [PATCH 27/30] autolink: identify & permit `rem == 0` case.

---
 src/parser/autolink.rs |  8 ++++++--
 src/tests/fuzz.rs      | 19 +++++++++++++++----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index 6f629ecb..72e2e1e3 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -123,13 +123,17 @@ pub(crate) fn process_email_autolinks<'a>(
 // - if remaining `i` is equal to the byte count `x`,
 //     set `e = sp.end.column` and finish.
 // - if remaining `i` is less than the byte count `x`,
-//     assert `sp.end.column - sp.start.column + 1 == x` (1),
+//     assert `sp.end.column - sp.start.column + 1 == x || rem == 0` (1),
 //     set `e = sp.start.column + i - 1` and finish.
 //
 // (1) If `x` doesn't equal the range covered between the start and end column,
 //     there's no way to determine sourcepos within the range. This is a bug if
 //     it happens; it suggests we've matched an email autolink with some smart
 //     punctuation in it, or worse.
+//
+//     The one exception is if `rem == 0`. Given nothing to consume, we can
+//     happily restore what we popped, returning `sp.start.column - 1` for the
+//     end column of the original node.
 fn consume_spx(spx: &mut VecDeque<(Sourcepos, usize)>, mut rem: usize) -> usize {
     while let Some((sp, x)) = spx.pop_front() {
         if rem > x {
@@ -138,7 +142,7 @@ fn consume_spx(spx: &mut VecDeque<(Sourcepos, usize)>, mut rem: usize) -> usize
             return sp.end.column;
         } else {
             // rem < x
-            assert_eq!(sp.end.column - sp.start.column + 1, x);
+            assert!((sp.end.column - sp.start.column + 1 == x) || rem == 0);
             spx.push_front((
                 (
                     sp.start.line,
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 52cf02f8..191ce8df 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -276,16 +276,12 @@ fn echaw4() {
 fn echaw5() {
     assert_ast_match!(
         [],
-        // [extension.autolink],
         "_#___@e.u",
         (document (1:1-1:9) [
             (paragraph (1:1-1:9) [
                 (emph (1:1-1:3) [
                     (text (1:2-1:2) "#")
                 ])
-                // (link (1:1-1:14) "mailto:-@_.e" [
-                //     (text (1:1-1:14) "-@_.e")
-                // ])
                 (text (1:4-1:9) "__@e.u")
             ])
         ])
@@ -309,3 +305,18 @@ fn echaw6() {
         ])
     );
 }
+
+#[test]
+fn echaw7() {
+    assert_ast_match!(
+        [extension.autolink],
+        "&#65;i@i.a",
+        (document (1:1-1:10) [
+            (paragraph (1:1-1:10) [
+                (link (1:1-1:10) "mailto:Ai@i.a" [
+                    (text (1:1-1:10) "Ai@i.a")
+                ])
+            ])
+        ])
+    );
+}

From c8d935e610bf43fb700cea54d6603e42d1d0805f Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Sun, 2 Mar 2025 13:41:40 +1100
Subject: [PATCH 28/30] tests: add new failure (tasklist).

---
 src/tests/fuzz.rs | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index 191ce8df..b2567dbd 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -320,3 +320,21 @@ fn echaw7() {
         ])
     );
 }
+
+#[test]
+fn echaw8() {
+    // fuzz/artifacts/all_options/minimized-from-57c3eaf5e03b3fd7fa971b0db6143ee3c21a7452
+    assert_ast_match!(
+        [extension.autolink, extension.tasklist],
+        "- [x] &Xfr;-<A@.N",
+        (document (1:1-1:17) [
+            (list (1:1-1:17) [
+                (item (1:1-1:17) [
+                    (paragraph (1:3-1:17) [
+                        (text (1:3-1:17) "[x] V𝔛-<A@N")
+                    ])
+                ])
+            ])
+        ]),
+    );
+}

From ec9d14f34f9950157ae7f58bea862041f0495203 Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Sun, 2 Mar 2025 13:41:40 +1100
Subject: [PATCH 29/30] parser: correct tasklist spx.

---
 src/parser/autolink.rs | 56 +++--------------------------------
 src/parser/mod.rs      | 67 +++++++++++++++++++++++++++++++++++++-----
 src/tests/fuzz.rs      |  9 ++++--
 3 files changed, 70 insertions(+), 62 deletions(-)

diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs
index 72e2e1e3..294d6621 100644
--- a/src/parser/autolink.rs
+++ b/src/parser/autolink.rs
@@ -1,8 +1,7 @@
 use crate::character_set::character_set;
 use crate::ctype::{isalnum, isalpha, isspace};
 use crate::nodes::{AstNode, NodeLink, NodeValue, Sourcepos};
-use crate::parser::inlines::make_inline;
-use std::collections::VecDeque;
+use crate::parser::{inlines::make_inline, Spx};
 use std::str;
 use typed_arena::Arena;
 use unicode_categories::UnicodeCategories;
@@ -13,7 +12,7 @@ pub(crate) fn process_email_autolinks<'a>(
     contents_str: &mut String,
     relaxed_autolinks: bool,
     sourcepos: &mut Sourcepos,
-    mut spx: VecDeque<(Sourcepos, usize)>,
+    spx: &mut Spx,
 ) {
     let contents = contents_str.as_bytes();
     let len = contents.len();
@@ -64,9 +63,9 @@ pub(crate) fn process_email_autolinks<'a>(
             };
             let initial_end_col = sourcepos.end.column;
 
-            sourcepos.end.column = consume_spx(&mut spx, i);
+            sourcepos.end.column = spx.consume(i);
 
-            let nsp_end_col = consume_spx(&mut spx, skip);
+            let nsp_end_col = spx.consume(skip);
 
             contents_str.truncate(i);
 
@@ -112,53 +111,6 @@ pub(crate) fn process_email_autolinks<'a>(
         }
     }
 }
-
-// Sourcepos end column `e` of the original node (set by writing to
-// `*sourcepos`) determined by advancing through `spx` until `i` bytes of input
-// are seen.
-//
-// For each element `(sp, x)` in `spx`:
-// - if remaining `i` is greater than the byte count `x`,
-//     set `i -= x` and continue.
-// - if remaining `i` is equal to the byte count `x`,
-//     set `e = sp.end.column` and finish.
-// - if remaining `i` is less than the byte count `x`,
-//     assert `sp.end.column - sp.start.column + 1 == x || rem == 0` (1),
-//     set `e = sp.start.column + i - 1` and finish.
-//
-// (1) If `x` doesn't equal the range covered between the start and end column,
-//     there's no way to determine sourcepos within the range. This is a bug if
-//     it happens; it suggests we've matched an email autolink with some smart
-//     punctuation in it, or worse.
-//
-//     The one exception is if `rem == 0`. Given nothing to consume, we can
-//     happily restore what we popped, returning `sp.start.column - 1` for the
-//     end column of the original node.
-fn consume_spx(spx: &mut VecDeque<(Sourcepos, usize)>, mut rem: usize) -> usize {
-    while let Some((sp, x)) = spx.pop_front() {
-        if rem > x {
-            rem -= x;
-        } else if rem == x {
-            return sp.end.column;
-        } else {
-            // rem < x
-            assert!((sp.end.column - sp.start.column + 1 == x) || rem == 0);
-            spx.push_front((
-                (
-                    sp.start.line,
-                    sp.start.column + rem,
-                    sp.end.line,
-                    sp.end.column,
-                )
-                    .into(),
-                x - rem,
-            ));
-            return sp.start.column + rem - 1;
-        }
-    }
-    unreachable!();
-}
-
 fn email_match<'a>(
     arena: &'a Arena<AstNode<'a>>,
     contents: &[u8],
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 188ff2a8..1bb6f801 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -2961,14 +2961,14 @@ where
                         // Join adjacent text nodes together, then post-process.
                         // Record the original list of sourcepos and bytecounts
                         // for the post-processing step.
-                        let mut spx = VecDeque::new();
-                        spx.push_back((sourcepos, root.len()));
+                        let mut spxv = VecDeque::new();
+                        spxv.push_back((sourcepos, root.len()));
                         while let Some(ns) = n.next_sibling() {
                             match ns.data.borrow().value {
                                 NodeValue::Text(ref adj) => {
                                     root.push_str(adj);
                                     let sp = ns.data.borrow().sourcepos;
-                                    spx.push_back((sp, adj.len()));
+                                    spxv.push_back((sp, adj.len()));
                                     sourcepos.end.column = sp.end.column;
                                     ns.detach();
                                 }
@@ -2976,7 +2976,7 @@ where
                             }
                         }
 
-                        self.postprocess_text_node(n, root, &mut sourcepos, spx);
+                        self.postprocess_text_node(n, root, &mut sourcepos, spxv);
                         emptied = root.len() == 0;
                     }
                     NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::WikiLink(..) => {
@@ -3011,10 +3011,11 @@ where
         node: &'a AstNode<'a>,
         text: &mut String,
         sourcepos: &mut Sourcepos,
-        spx: VecDeque<(Sourcepos, usize)>,
+        spxv: VecDeque<(Sourcepos, usize)>,
     ) {
+        let mut spx = Spx(spxv);
         if self.options.extension.tasklist {
-            self.process_tasklist(node, text, sourcepos);
+            self.process_tasklist(node, text, sourcepos, &mut spx);
         }
 
         if self.options.extension.autolink {
@@ -3024,7 +3025,7 @@ where
                 text,
                 self.options.parse.relaxed_autolinks,
                 sourcepos,
-                spx,
+                &mut spx,
             );
         }
     }
@@ -3034,6 +3035,7 @@ where
         node: &'a AstNode<'a>,
         text: &mut String,
         sourcepos: &mut Sourcepos,
+        spx: &mut Spx,
     ) {
         let (end, symbol) = match scanners::tasklist(text.as_bytes()) {
             Some(p) => p,
@@ -3071,6 +3073,8 @@ where
         // the count thereof (i.e. "end") will precisely map to characters in
         // the source document.
         sourcepos.start.column += end;
+        let reference = spx.consume(end) + 1;
+        assert_eq!(reference, sourcepos.start.column);
         parent.data.borrow_mut().sourcepos.start.column += end;
 
         grandparent.data.borrow_mut().value =
@@ -3323,3 +3327,52 @@ pub enum ListStyleType {
     /// The `*` character
     Star = 42,
 }
+
+pub(crate) struct Spx(VecDeque<(Sourcepos, usize)>);
+
+impl Spx {
+    // Sourcepos end column `e` of a node determined by advancing through `spx`
+    // until `i` bytes of input are seen.
+    //
+    // For each element `(sp, x)` in `spx`:
+    // - if remaining `i` is greater than the byte count `x`,
+    //     set `i -= x` and continue.
+    // - if remaining `i` is equal to the byte count `x`,
+    //     set `e = sp.end.column` and finish.
+    // - if remaining `i` is less than the byte count `x`,
+    //     assert `sp.end.column - sp.start.column + 1 == x || i == 0` (1),
+    //     set `e = sp.start.column + i - 1` and finish.
+    //
+    // (1) If `x` doesn't equal the range covered between the start and end column,
+    //     there's no way to determine sourcepos within the range. This is a bug if
+    //     it happens; it suggests we've matched an email autolink with some smart
+    //     punctuation in it, or worse.
+    //
+    //     The one exception is if `i == 0`. Given nothing to consume, we can
+    //     happily restore what we popped, returning `sp.start.column - 1` for the
+    //     end column of the original node.
+    pub(crate) fn consume(&mut self, mut rem: usize) -> usize {
+        while let Some((sp, x)) = self.0.pop_front() {
+            if rem > x {
+                rem -= x;
+            } else if rem == x {
+                return sp.end.column;
+            } else {
+                // rem < x
+                assert!((sp.end.column - sp.start.column + 1 == x) || rem == 0);
+                self.0.push_front((
+                    (
+                        sp.start.line,
+                        sp.start.column + rem,
+                        sp.end.line,
+                        sp.end.column,
+                    )
+                        .into(),
+                    x - rem,
+                ));
+                return sp.start.column + rem - 1;
+            }
+        }
+        unreachable!();
+    }
+}
diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index b2567dbd..b99db429 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -329,9 +329,12 @@ fn echaw8() {
         "- [x] &Xfr;-<A@.N",
         (document (1:1-1:17) [
             (list (1:1-1:17) [
-                (item (1:1-1:17) [
-                    (paragraph (1:3-1:17) [
-                        (text (1:3-1:17) "[x] V𝔛-<A@N")
+                (taskitem (1:1-1:17) [
+                    (paragraph (1:7-1:17) [
+                        (text (1:7-1:13) "𝔛-<")
+                        (link (1:14-1:17) "mailto:A@.N" [
+                            (text (1:14-1:17) "A@.N")
+                        ])
                     ])
                 ])
             ])

From 6f1649e0654d72ecfc337e6664e717c5ee0955cf Mon Sep 17 00:00:00 2001
From: Talya Connor <talya@kivikakk.ee>
Date: Mon, 3 Mar 2025 07:37:36 +1100
Subject: [PATCH 30/30] tests: add new failure (&NewLine;).

---
 src/tests/fuzz.rs | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs
index b99db429..3bd3f5b6 100644
--- a/src/tests/fuzz.rs
+++ b/src/tests/fuzz.rs
@@ -341,3 +341,24 @@ fn echaw8() {
         ]),
     );
 }
+
+#[test]
+fn echaw9() {
+    // fuzz/artifacts/all_options/minimized-from-8a07a44ba1f971ec39d0c14d377c78c2535c6fd5
+    assert_ast_match!(
+        [extension.autolink, extension.tasklist],
+        "-\t[ ]&NewLine;",
+        (document (1:1-1:15) [
+            (list (1:1-1:15) [
+                (taskitem (1:1-1:15) [
+                    (paragraph (1:7-1:17) [
+                        (text (1:7-1:13) "𝔛-<")
+                        (link (1:14-1:17) "mailto:A@.N" [
+                            (text (1:14-1:17) "A@.N")
+                        ])
+                    ])
+                ])
+            ])
+        ]),
+    );
+}