diff --git a/examples/iterator_replace.rs b/examples/iterator_replace.rs index 24d13c80..6006a4d5 100644 --- a/examples/iterator_replace.rs +++ b/examples/iterator_replace.rs @@ -27,7 +27,7 @@ fn main() { let doc = "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n"; let orig = "my"; let repl = "your"; - let html = replace_text(&doc, &orig, &repl); + let html = replace_text(doc, orig, repl); println!("{}", html); } diff --git a/examples/sample.rs b/examples/sample.rs index d3e5141f..8c30bcc3 100644 --- a/examples/sample.rs +++ b/examples/sample.rs @@ -38,7 +38,7 @@ fn large() { let doc = "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n"; let orig = "my"; let repl = "your"; - let html = replace_text(&doc, &orig, &repl); + let html = replace_text(doc, orig, repl); println!("{}", html); // Output: diff --git a/examples/update-readme.rs b/examples/update-readme.rs index c683a8f3..37e70ccf 100644 --- a/examples/update-readme.rs +++ b/examples/update-readme.rs @@ -49,7 +49,7 @@ fn main() -> Result<(), Box> { if next_block_is_help_body { next_block_is_help_body = false; - assert!(ncb.info == "" && ncb.literal.starts_with(HELP_START)); + assert!(ncb.info.is_empty() && ncb.literal.starts_with(HELP_START)); let mut content = String::new(); let mut cmd = std::process::Command::new("cargo"); content.push_str( diff --git a/flake.nix b/flake.nix index 4d531c1a..4a1d797f 100644 --- a/flake.nix +++ b/flake.nix @@ -121,6 +121,7 @@ "cargo" "rustc" "rust-analyzer" + "clippy" ]) pkgs.cargo-fuzz pkgs.python3 diff --git a/src/cm.rs b/src/cm.rs index 545a9666..dd753f8a 100644 --- a/src/cm.rs +++ b/src/cm.rs @@ -10,9 +10,12 @@ use crate::parser::{Options, WikiLinksMode}; use crate::scanners; use crate::strings::trim_start_match; use crate::{nodes, Plugins}; +pub use typed_arena::Arena; use std::cmp::max; +use std::io::BufWriter; use std::io::{self, Write}; +use std::str; /// Formats an AST as CommonMark, modified by the given options. pub fn format_document<'a>( @@ -40,10 +43,14 @@ pub fn format_document_with_plugins<'a>( ) -> io::Result<()> { let mut f = CommonMarkFormatter::new(root, options); f.format(root); - if !f.v.is_empty() && f.v[f.v.len() - 1] != b'\n' { - f.v.push(b'\n'); + let mut result = f.v; + if !result.is_empty() && result[result.len() - 1] != b'\n' { + result.push(b'\n'); } - output.write_all(&f.v)?; + if options.render.experimental_minimize_commonmark { + minimize_commonmark(&mut result, options); + } + output.write_all(&result)?; Ok(()) } @@ -1001,3 +1008,33 @@ fn table_escape<'a>(node: &'a AstNode<'a>, c: u8) -> bool { _ => c == b'|', } } + +fn minimize_commonmark(text: &mut Vec, original_options: &Options) { + let mut options_without = original_options.clone(); + options_without.render.experimental_minimize_commonmark = false; + + let ixs: Vec = text + .iter() + .enumerate() + .filter_map(|(ix, &c)| if c == b'\\' { Some(ix) } else { None }) + .collect(); + let original = text.clone(); + + let mut adjust = 0; + for ix in ixs { + text.remove(ix - adjust); + + let arena = Arena::new(); + let root = crate::parse_document(&arena, str::from_utf8(text).unwrap(), &options_without); + + let mut bw = BufWriter::new(Vec::new()); + format_document(root, &options_without, &mut bw).unwrap(); + let result = bw.into_inner().unwrap(); + + if original == result { + adjust += 1; + } else { + text.insert(ix - adjust, b'\\'); + } + } +} diff --git a/src/main.rs b/src/main.rs index 43698ef7..5d90b558 100644 --- a/src/main.rs +++ b/src/main.rs @@ -156,6 +156,10 @@ struct Cli { /// Ignore empty links #[arg(long)] ignore_empty_links: bool, + + // Minimize escapes in CommonMark output using a trial-and-error algorithm. + #[arg(long)] + experimental_minimize_commonmark: bool, } #[derive(Clone, Copy, Debug, ValueEnum)] @@ -297,6 +301,7 @@ fn main() -> Result<(), Box> { .list_style(cli.list_style.into()) .sourcepos(cli.sourcepos) .experimental_inline_sourcepos(cli.experimental_inline_sourcepos) + .experimental_minimize_commonmark(cli.experimental_minimize_commonmark) .escaped_char_spans(cli.escaped_char_spans) .ignore_setext(cli.ignore_setext) .ignore_empty_links(cli.ignore_empty_links) @@ -364,7 +369,7 @@ fn main() -> Result<(), Box> { formatter(root, &options, &mut bw, &plugins)?; bw.flush()?; } else if cli.inplace { - let output_filename = cli.files.unwrap().get(0).unwrap().clone(); + let output_filename = cli.files.unwrap().first().unwrap().clone(); let mut bw = BufWriter::new(fs::File::create(output_filename)?); formatter(root, &options, &mut bw, &plugins)?; bw.flush()?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 07369bad..1e803383 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1083,6 +1083,26 @@ pub struct RenderOptions { /// ``` #[cfg_attr(feature = "bon", builder(default))] pub ol_width: usize, + + /// Minimise escapes used in CommonMark output (`-t commonmark`) by removing + /// each individually and seeing if the resulting document roundtrips. + /// Brute-force and expensive, but produces nicer output. Note that the + /// result may not in fact be minimal. + /// + /// ```rust + /// # use comrak::{markdown_to_commonmark, Options}; + /// let mut options = Options::default(); + /// let input = "__hi"; + /// + /// assert_eq!(markdown_to_commonmark(input, &options), + /// "\\_\\_hi\n"); + /// + /// options.render.experimental_minimize_commonmark = true; + /// assert_eq!(markdown_to_commonmark(input, &options), + /// "__hi\n"); + /// ``` + #[cfg_attr(feature = "bon", builder(default))] + pub experimental_minimize_commonmark: bool, } #[derive(Default, Debug, Clone)] diff --git a/src/tests/front_matter.rs b/src/tests/front_matter.rs index d3146693..60580eb5 100644 --- a/src/tests/front_matter.rs +++ b/src/tests/front_matter.rs @@ -10,7 +10,7 @@ fn round_trip_one_field() { let input = "---\nlayout: post\n---\nText\n"; let root = parse_document(&arena, input, &options); let mut buf = Vec::new(); - format_commonmark(&root, &options, &mut buf).unwrap(); + format_commonmark(root, &options, &mut buf).unwrap(); assert_eq!(&String::from_utf8(buf).unwrap(), input); } @@ -22,7 +22,7 @@ fn round_trip_wide_delimiter() { let input = "\u{04fc}\nlayout: post\n\u{04fc}\nText\n"; let root = parse_document(&arena, input, &options); let mut buf = Vec::new(); - format_commonmark(&root, &options, &mut buf).unwrap(); + format_commonmark(root, &options, &mut buf).unwrap(); assert_eq!(&String::from_utf8(buf).unwrap(), input); } @@ -106,8 +106,7 @@ fn trailing_space_open() { let found = root .descendants() - .filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))) - .next(); + .find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))); assert!(found.is_none(), "no FrontMatter expected"); } @@ -123,8 +122,7 @@ fn leading_space_open() { let found = root .descendants() - .filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))) - .next(); + .find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))); assert!(found.is_none(), "no FrontMatter expected"); } @@ -140,8 +138,7 @@ fn leading_space_close() { let found = root .descendants() - .filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))) - .next(); + .find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))); assert!(found.is_none(), "no FrontMatter expected"); } @@ -157,8 +154,7 @@ fn trailing_space_close() { let found = root .descendants() - .filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))) - .next(); + .find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))); assert!(found.is_none(), "no FrontMatter expected"); } @@ -174,8 +170,7 @@ fn second_line() { let found = root .descendants() - .filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))) - .next(); + .find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))); assert!(found.is_none(), "no FrontMatter expected"); } @@ -191,8 +186,7 @@ fn fm_only_with_trailing_newline() { let found = root .descendants() - .filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))) - .next(); + .find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))); assert!(found.is_some(), "front matter expected"); } @@ -208,8 +202,7 @@ fn fm_only_without_trailing_newline() { let found = root .descendants() - .filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))) - .next(); + .find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..))); assert!(found.is_some(), "front matter expected"); }