Skip to content

Commit

Permalink
Merge pull request #67 from kivikakk/safe
Browse files Browse the repository at this point in the history
`safe` / --safe
  • Loading branch information
Ashe Connor authored May 7, 2018
2 parents d833b6c + 4020ac8 commit 27ffdff
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 4 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ FLAGS:
--github-pre-lang Use GitHub-style <pre lang> for code blocks
--hardbreaks Treat newlines as hard line breaks
-h, --help Prints help information
--safe Suppress raw HTML and dangerous URLs
--smart Use smart punctuation
-V, --version Prints version information
Expand Down Expand Up @@ -113,6 +114,8 @@ assert_eq!(

As with [`cmark-gfm`](https://github.com/github/cmark#security), Comrak will pass through inline HTML, dangerous links, anything you can imagine — it only performs Markdown to HTML conversion per the CommonMark/GFM spec. We recommend the use of a sanitisation library like [`ammonia`](https://github.com/notriddle/ammonia) configured specific to your needs.

You can also disable this potentially unsafe feature by using the `safe` option (or `--safe` at the command-line).

## Extensions

Comrak supports the five extensions to CommonMark defined in the
Expand Down
21 changes: 17 additions & 4 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use ctype::isspace;
use nodes::{AstNode, ListType, NodeValue, TableAlignment};
use parser::ComrakOptions;
use regex::Regex;
use scanners;
use std::borrow::Cow;
use std::cell::Cell;
use std::collections::HashSet;
Expand Down Expand Up @@ -149,6 +150,10 @@ fn tagfilter_block(input: &[u8], o: &mut Write) -> io::Result<()> {
Ok(())
}

fn dangerous_url(input: &[u8]) -> bool {
scanners::dangerous_url(input).is_some()
}

impl<'o> HtmlFormatter<'o> {
fn new(options: &'o ComrakOptions, output: &'o mut WriteWithLast<'o>) -> Self {
HtmlFormatter {
Expand Down Expand Up @@ -416,7 +421,9 @@ impl<'o> HtmlFormatter<'o> {
},
NodeValue::HtmlBlock(ref nhb) => if entering {
try!(self.cr());
if self.options.ext_tagfilter {
if self.options.safe {
try!(self.output.write_all(b"<!-- raw HTML omitted -->"));
} else if self.options.ext_tagfilter {
try!(tagfilter_block(&nhb.literal, &mut self.output));
} else {
try!(self.output.write_all(&nhb.literal));
Expand Down Expand Up @@ -472,7 +479,9 @@ impl<'o> HtmlFormatter<'o> {
try!(self.output.write_all(b"</code>"));
},
NodeValue::HtmlInline(ref literal) => if entering {
if self.options.ext_tagfilter && tagfilter(literal) {
if self.options.safe {
try!(self.output.write_all(b"<!-- raw HTML omitted -->"));
} else if self.options.ext_tagfilter && tagfilter(literal) {
try!(self.output.write_all(b"&lt;"));
try!(self.output.write_all(&literal[1..]));
} else {
Expand Down Expand Up @@ -501,7 +510,9 @@ impl<'o> HtmlFormatter<'o> {
},
NodeValue::Link(ref nl) => if entering {
try!(self.output.write_all(b"<a href=\""));
try!(self.escape_href(&nl.url));
if !self.options.safe || !dangerous_url(&nl.url) {
try!(self.escape_href(&nl.url));
}
if !nl.title.is_empty() {
try!(self.output.write_all(b"\" title=\""));
try!(self.escape(&nl.title));
Expand All @@ -512,7 +523,9 @@ impl<'o> HtmlFormatter<'o> {
},
NodeValue::Image(ref nl) => if entering {
try!(self.output.write_all(b"<img src=\""));
try!(self.escape_href(&nl.url));
if !self.options.safe || !dangerous_url(&nl.url) {
try!(self.escape_href(&nl.url));
}
try!(self.output.write_all(b"\" alt=\""));
return Ok(true);
} else {
Expand Down
2 changes: 2 additions & 0 deletions src/lexer.pest
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,5 @@ table_cell = { ( escaped_char | !("|" | "\r" | "\n") ~ any)* }
table_start = { "|"? ~ table_marker ~ ("|" ~ table_marker)* ~ "|"? ~ table_spacechar* ~ table_newline }
table_cell_end = { "|" ~ table_spacechar* ~ table_newline? }
table_row_end = { table_spacechar* ~ table_newline }

dangerous_url = { "data:" ~ !("png" | "gif" | "jpeg" | "webp") | "javascript:" | "vbscript:" | "file:" }
6 changes: 6 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ fn main() {
.value_name("INFO")
.takes_value(true),
)
.arg(
clap::Arg::with_name("safe")
.long("safe")
.help("Suppress raw HTML and dangerous URLs"),
)
.arg(
clap::Arg::with_name("extension")
.short("e")
Expand Down Expand Up @@ -132,6 +137,7 @@ fn main() {
default_info_string: matches
.value_of("default-info-string")
.map(|e| e.to_owned()),
safe: matches.is_present("safe"),
ext_strikethrough: exts.remove("strikethrough"),
ext_tagfilter: exts.remove("tagfilter"),
ext_table: exts.remove("table"),
Expand Down
25 changes: 25 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,31 @@ pub struct ComrakOptions {
/// ```
pub default_info_string: Option<String>,

/// Disable rendering of raw HTML and potentially dangerous links.
///
/// ```
/// # use comrak::{markdown_to_html, ComrakOptions};
/// let mut options = ComrakOptions::default();
/// let input = "<script>\nalert('xyz');\n</script>\n\n\
/// Possibly <marquee>annoying</marquee>.\n\n\
/// [Dangerous](javascript:alert(document.cookie)).\n\n\
/// [Safe](http://commonmark.org).\n";
///
/// assert_eq!(markdown_to_html(input, &options),
/// "<script>\nalert(\'xyz\');\n</script>\n\
/// <p>Possibly <marquee>annoying</marquee>.</p>\n\
/// <p><a href=\"javascript:alert(document.cookie)\">Dangerous</a>.</p>\n\
/// <p><a href=\"http://commonmark.org\">Safe</a>.</p>\n");
///
/// options.safe = true;
/// assert_eq!(markdown_to_html(input, &options),
/// "<!-- raw HTML omitted -->\n\
/// <p>Possibly <!-- raw HTML omitted -->annoying<!-- raw HTML omitted -->.</p>\n\
/// <p><a href=\"\">Dangerous</a>.</p>\n\
/// <p><a href=\"http://commonmark.org\">Safe</a>.</p>\n");
/// ```
pub safe: bool,

/// Enables the
/// [strikethrough extension](https://github.github.com/gfm/#strikethrough-extension-)
/// from the GFM spec.
Expand Down
5 changes: 5 additions & 0 deletions src/scanners.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,8 @@ pub fn table_cell_end(line: &[u8]) -> Option<usize> {
pub fn table_row_end(line: &[u8]) -> Option<usize> {
search(Rule::table_row_end, line)
}

#[inline(always)]
pub fn dangerous_url(line: &[u8]) -> Option<usize> {
search(Rule::dangerous_url, line)
}
27 changes: 27 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -821,3 +821,30 @@ fn cm_autolink_regression() {
// Testing that the cm renderer handles this case without crashing
html("<a+c:dd>", "<p><a href=\"a+c:dd\">a+c:dd</a></p>\n");
}

#[test]
fn safe() {
html_opts(
concat!(
"[data:png](data:png/x)\n\n",
"[data:gif](data:gif/x)\n\n",
"[data:jpeg](data:jpeg/x)\n\n",
"[data:webp](data:webp/x)\n\n",
"[data:malicious](data:malicious/x)\n\n",
"[javascript:malicious](javascript:malicious)\n\n",
"[vbscript:malicious](vbscript:malicious)\n\n",
"[file:malicious](file:malicious)\n\n",
),
concat!(
"<p><a href=\"data:png/x\">data:png</a></p>\n",
"<p><a href=\"data:gif/x\">data:gif</a></p>\n",
"<p><a href=\"data:jpeg/x\">data:jpeg</a></p>\n",
"<p><a href=\"data:webp/x\">data:webp</a></p>\n",
"<p><a href=\"\">data:malicious</a></p>\n",
"<p><a href=\"\">javascript:malicious</a></p>\n",
"<p><a href=\"\">vbscript:malicious</a></p>\n",
"<p><a href=\"\">file:malicious</a></p>\n",
),
|opts| opts.safe = true,
)
}

0 comments on commit 27ffdff

Please sign in to comment.