diff --git a/README.md b/README.md index 31b20670..c07dfbda 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ FLAGS: --github-pre-lang Use GitHub-style
 for code blocks
         --hardbreaks         Treat newlines as hard line breaks
     -h, --help               Prints help information
+        --safe               Suppress raw HTML and dangerous URLs
         --smart              Use smart punctuation
     -V, --version            Prints version information
 
@@ -113,6 +114,8 @@ assert_eq!(
 
 As with [`cmark-gfm`](https://github.com/github/cmark#security), Comrak will pass through inline HTML, dangerous links, anything you can imagine — it only performs Markdown to HTML conversion per the CommonMark/GFM spec.  We recommend the use of a sanitisation library like [`ammonia`](https://github.com/notriddle/ammonia) configured specific to your needs.
 
+You can also disable this potentially unsafe feature by using the `safe` option (or `--safe` at the command-line).
+
 ## Extensions
 
 Comrak supports the five extensions to CommonMark defined in the
diff --git a/src/html.rs b/src/html.rs
index 35fae74d..db7b1f4b 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -2,6 +2,7 @@ use ctype::isspace;
 use nodes::{AstNode, ListType, NodeValue, TableAlignment};
 use parser::ComrakOptions;
 use regex::Regex;
+use scanners;
 use std::borrow::Cow;
 use std::cell::Cell;
 use std::collections::HashSet;
@@ -149,6 +150,10 @@ fn tagfilter_block(input: &[u8], o: &mut Write) -> io::Result<()> {
     Ok(())
 }
 
+fn dangerous_url(input: &[u8]) -> bool {
+    scanners::dangerous_url(input).is_some()
+}
+
 impl<'o> HtmlFormatter<'o> {
     fn new(options: &'o ComrakOptions, output: &'o mut WriteWithLast<'o>) -> Self {
         HtmlFormatter {
@@ -416,7 +421,9 @@ impl<'o> HtmlFormatter<'o> {
             },
             NodeValue::HtmlBlock(ref nhb) => if entering {
                 try!(self.cr());
-                if self.options.ext_tagfilter {
+                if self.options.safe {
+                    try!(self.output.write_all(b""));
+                } else if self.options.ext_tagfilter {
                     try!(tagfilter_block(&nhb.literal, &mut self.output));
                 } else {
                     try!(self.output.write_all(&nhb.literal));
@@ -472,7 +479,9 @@ impl<'o> HtmlFormatter<'o> {
                 try!(self.output.write_all(b""));
             },
             NodeValue::HtmlInline(ref literal) => if entering {
-                if self.options.ext_tagfilter && tagfilter(literal) {
+                if self.options.safe {
+                    try!(self.output.write_all(b""));
+                } else if self.options.ext_tagfilter && tagfilter(literal) {
                     try!(self.output.write_all(b"<"));
                     try!(self.output.write_all(&literal[1..]));
                 } else {
@@ -501,7 +510,9 @@ impl<'o> HtmlFormatter<'o> {
             },
             NodeValue::Link(ref nl) => if entering {
                 try!(self.output.write_all(b" HtmlFormatter<'o> {
             },
             NodeValue::Image(ref nl) => if entering {
                 try!(self.output.write_all(b"\""));,
 
+    /// Disable rendering of raw HTML and potentially dangerous links.
+    ///
+    /// ```
+    /// # use comrak::{markdown_to_html, ComrakOptions};
+    /// let mut options = ComrakOptions::default();
+    /// let input = "\n\n\
+    ///              Possibly annoying.\n\n\
+    ///              [Dangerous](javascript:alert(document.cookie)).\n\n\
+    ///              [Safe](http://commonmark.org).\n";
+    ///
+    /// assert_eq!(markdown_to_html(input, &options),
+    ///            "\n\
+    ///             

Possibly annoying.

\n\ + ///

Dangerous.

\n\ + ///

Safe.

\n"); + /// + /// options.safe = true; + /// assert_eq!(markdown_to_html(input, &options), + /// "\n\ + ///

Possibly annoying.

\n\ + ///

Dangerous.

\n\ + ///

Safe.

\n"); + /// ``` + pub safe: bool, + /// Enables the /// [strikethrough extension](https://github.github.com/gfm/#strikethrough-extension-) /// from the GFM spec. diff --git a/src/scanners.rs b/src/scanners.rs index 48982ec4..ef37976e 100644 --- a/src/scanners.rs +++ b/src/scanners.rs @@ -178,3 +178,8 @@ pub fn table_cell_end(line: &[u8]) -> Option { pub fn table_row_end(line: &[u8]) -> Option { search(Rule::table_row_end, line) } + +#[inline(always)] +pub fn dangerous_url(line: &[u8]) -> Option { + search(Rule::dangerous_url, line) +} diff --git a/src/tests.rs b/src/tests.rs index 590919c7..c395ea75 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -821,3 +821,30 @@ fn cm_autolink_regression() { // Testing that the cm renderer handles this case without crashing html("", "

a+c:dd

\n"); } + +#[test] +fn safe() { + html_opts( + concat!( + "[data:png](data:png/x)\n\n", + "[data:gif](data:gif/x)\n\n", + "[data:jpeg](data:jpeg/x)\n\n", + "[data:webp](data:webp/x)\n\n", + "[data:malicious](data:malicious/x)\n\n", + "[javascript:malicious](javascript:malicious)\n\n", + "[vbscript:malicious](vbscript:malicious)\n\n", + "[file:malicious](file:malicious)\n\n", + ), + concat!( + "

data:png

\n", + "

data:gif

\n", + "

data:jpeg

\n", + "

data:webp

\n", + "

data:malicious

\n", + "

javascript:malicious

\n", + "

vbscript:malicious

\n", + "

file:malicious

\n", + ), + |opts| opts.safe = true, + ) +}