Skip to content

Commit

Permalink
Merge pull request #523 from kivikakk/lotte-minimize-cm
Browse files Browse the repository at this point in the history
commonmark: experimental minimize
  • Loading branch information
charlottia authored Feb 21, 2025
2 parents 7dec387 + e245220 commit 7ce7578
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 23 deletions.
2 changes: 1 addition & 1 deletion examples/iterator_replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn main() {
let doc = "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n";
let orig = "my";
let repl = "your";
let html = replace_text(&doc, &orig, &repl);
let html = replace_text(doc, orig, repl);

println!("{}", html);
}
Expand Down
2 changes: 1 addition & 1 deletion examples/sample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fn large() {
let doc = "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n";
let orig = "my";
let repl = "your";
let html = replace_text(&doc, &orig, &repl);
let html = replace_text(doc, orig, repl);

println!("{}", html);
// Output:
Expand Down
2 changes: 1 addition & 1 deletion examples/update-readme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ fn main() -> Result<(), Box<dyn Error>> {

if next_block_is_help_body {
next_block_is_help_body = false;
assert!(ncb.info == "" && ncb.literal.starts_with(HELP_START));
assert!(ncb.info.is_empty() && ncb.literal.starts_with(HELP_START));
let mut content = String::new();
let mut cmd = std::process::Command::new("cargo");
content.push_str(
Expand Down
1 change: 1 addition & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
"cargo"
"rustc"
"rust-analyzer"
"clippy"
])
pkgs.cargo-fuzz
pkgs.python3
Expand Down
43 changes: 40 additions & 3 deletions src/cm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ use crate::parser::{Options, WikiLinksMode};
use crate::scanners;
use crate::strings::trim_start_match;
use crate::{nodes, Plugins};
pub use typed_arena::Arena;

use std::cmp::max;
use std::io::BufWriter;
use std::io::{self, Write};
use std::str;

/// Formats an AST as CommonMark, modified by the given options.
pub fn format_document<'a>(
Expand Down Expand Up @@ -40,10 +43,14 @@ pub fn format_document_with_plugins<'a>(
) -> io::Result<()> {
let mut f = CommonMarkFormatter::new(root, options);
f.format(root);
if !f.v.is_empty() && f.v[f.v.len() - 1] != b'\n' {
f.v.push(b'\n');
let mut result = f.v;
if !result.is_empty() && result[result.len() - 1] != b'\n' {
result.push(b'\n');
}
output.write_all(&f.v)?;
if options.render.experimental_minimize_commonmark {
minimize_commonmark(&mut result, options);
}
output.write_all(&result)?;
Ok(())
}

Expand Down Expand Up @@ -1001,3 +1008,33 @@ fn table_escape<'a>(node: &'a AstNode<'a>, c: u8) -> bool {
_ => c == b'|',
}
}

fn minimize_commonmark(text: &mut Vec<u8>, original_options: &Options) {
let mut options_without = original_options.clone();
options_without.render.experimental_minimize_commonmark = false;

let ixs: Vec<usize> = text
.iter()
.enumerate()
.filter_map(|(ix, &c)| if c == b'\\' { Some(ix) } else { None })
.collect();
let original = text.clone();

let mut adjust = 0;
for ix in ixs {
text.remove(ix - adjust);

let arena = Arena::new();
let root = crate::parse_document(&arena, str::from_utf8(text).unwrap(), &options_without);

let mut bw = BufWriter::new(Vec::new());
format_document(root, &options_without, &mut bw).unwrap();
let result = bw.into_inner().unwrap();

if original == result {
adjust += 1;
} else {
text.insert(ix - adjust, b'\\');
}
}
}
7 changes: 6 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ struct Cli {
/// Ignore empty links
#[arg(long)]
ignore_empty_links: bool,

// Minimize escapes in CommonMark output using a trial-and-error algorithm.
#[arg(long)]
experimental_minimize_commonmark: bool,
}

#[derive(Clone, Copy, Debug, ValueEnum)]
Expand Down Expand Up @@ -297,6 +301,7 @@ fn main() -> Result<(), Box<dyn Error>> {
.list_style(cli.list_style.into())
.sourcepos(cli.sourcepos)
.experimental_inline_sourcepos(cli.experimental_inline_sourcepos)
.experimental_minimize_commonmark(cli.experimental_minimize_commonmark)
.escaped_char_spans(cli.escaped_char_spans)
.ignore_setext(cli.ignore_setext)
.ignore_empty_links(cli.ignore_empty_links)
Expand Down Expand Up @@ -364,7 +369,7 @@ fn main() -> Result<(), Box<dyn Error>> {
formatter(root, &options, &mut bw, &plugins)?;
bw.flush()?;
} else if cli.inplace {
let output_filename = cli.files.unwrap().get(0).unwrap().clone();
let output_filename = cli.files.unwrap().first().unwrap().clone();
let mut bw = BufWriter::new(fs::File::create(output_filename)?);
formatter(root, &options, &mut bw, &plugins)?;
bw.flush()?;
Expand Down
20 changes: 20 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1083,6 +1083,26 @@ pub struct RenderOptions {
/// ```
#[cfg_attr(feature = "bon", builder(default))]
pub ol_width: usize,

/// Minimise escapes used in CommonMark output (`-t commonmark`) by removing
/// each individually and seeing if the resulting document roundtrips.
/// Brute-force and expensive, but produces nicer output. Note that the
/// result may not in fact be minimal.
///
/// ```rust
/// # use comrak::{markdown_to_commonmark, Options};
/// let mut options = Options::default();
/// let input = "__hi";
///
/// assert_eq!(markdown_to_commonmark(input, &options),
/// "\\_\\_hi\n");
///
/// options.render.experimental_minimize_commonmark = true;
/// assert_eq!(markdown_to_commonmark(input, &options),
/// "__hi\n");
/// ```
#[cfg_attr(feature = "bon", builder(default))]
pub experimental_minimize_commonmark: bool,
}

#[derive(Default, Debug, Clone)]
Expand Down
25 changes: 9 additions & 16 deletions src/tests/front_matter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ fn round_trip_one_field() {
let input = "---\nlayout: post\n---\nText\n";
let root = parse_document(&arena, input, &options);
let mut buf = Vec::new();
format_commonmark(&root, &options, &mut buf).unwrap();
format_commonmark(root, &options, &mut buf).unwrap();
assert_eq!(&String::from_utf8(buf).unwrap(), input);
}

Expand All @@ -22,7 +22,7 @@ fn round_trip_wide_delimiter() {
let input = "\u{04fc}\nlayout: post\n\u{04fc}\nText\n";
let root = parse_document(&arena, input, &options);
let mut buf = Vec::new();
format_commonmark(&root, &options, &mut buf).unwrap();
format_commonmark(root, &options, &mut buf).unwrap();
assert_eq!(&String::from_utf8(buf).unwrap(), input);
}

Expand Down Expand Up @@ -106,8 +106,7 @@ fn trailing_space_open() {

let found = root
.descendants()
.filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)))
.next();
.find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)));

assert!(found.is_none(), "no FrontMatter expected");
}
Expand All @@ -123,8 +122,7 @@ fn leading_space_open() {

let found = root
.descendants()
.filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)))
.next();
.find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)));

assert!(found.is_none(), "no FrontMatter expected");
}
Expand All @@ -140,8 +138,7 @@ fn leading_space_close() {

let found = root
.descendants()
.filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)))
.next();
.find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)));

assert!(found.is_none(), "no FrontMatter expected");
}
Expand All @@ -157,8 +154,7 @@ fn trailing_space_close() {

let found = root
.descendants()
.filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)))
.next();
.find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)));

assert!(found.is_none(), "no FrontMatter expected");
}
Expand All @@ -174,8 +170,7 @@ fn second_line() {

let found = root
.descendants()
.filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)))
.next();
.find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)));

assert!(found.is_none(), "no FrontMatter expected");
}
Expand All @@ -191,8 +186,7 @@ fn fm_only_with_trailing_newline() {

let found = root
.descendants()
.filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)))
.next();
.find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)));

assert!(found.is_some(), "front matter expected");
}
Expand All @@ -208,8 +202,7 @@ fn fm_only_without_trailing_newline() {

let found = root
.descendants()
.filter(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)))
.next();
.find(|n| matches!(n.data.borrow().value, NodeValue::FrontMatter(..)));

assert!(found.is_some(), "front matter expected");
}

0 comments on commit 7ce7578

Please sign in to comment.