Skip to content

Commit

Permalink
Preserve case and ; when replacing url(;base64)
Browse files Browse the repository at this point in the history
Fixes #10
  • Loading branch information
kornelski committed Jan 29, 2025
1 parent 1682759 commit ea7d8ed
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 70 deletions.
112 changes: 47 additions & 65 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -520,67 +520,47 @@ impl Filter {
/// and enabling filter bypass (e.g. if our tokenizer saw a comment but browsers didn't).
/// Proper implementation needs browser-grade parsers, and that's a big ask for a small tool.
///
/// So here's a brute approach instead: throwing out everything that isn't obviously trivial.
/// We don't really need to support SVG files that rely on weird tricky syntax. We can mangle all
/// the edge cases as long as we're erring on the side of safety.
/// So here's a brute approach instead: replacing everything that could possibly be a `url()`,
/// and throwing away everything that looks tricky to parse.
fn filtered_url_func(&self, css_or_svg: &str) -> String {
// If we see something suspicious we'll try to remove it instead of throwing away the whole stylesheet/attribute.
css_or_svg.split_inclusive([';', '{', '}', ','])
.filter_map(|full_chunk| {
// CSS allows escapes in identifiers! No escape schenanigans allowed, as this makes matching require a proper tokenizer.
if full_chunk.contains('\\') {
// TODO: could perform unescaping instead
return None;
}

// without escapes it's reasonable to search for identifiers now (lowercase to make them case-insensitive)
let chunk_lower = full_chunk.to_ascii_lowercase();
// @rules don't allow extra whitespace, so apart from escapes, they can't be obfuscated
if chunk_lower.contains("@import") {
return None;
}
let mut out = String::with_capacity(css_or_svg.len());

// find next potential start of a URL (case-insensitive)
let mut inside_url = false;
for mut chunk in css_or_svg.split_inclusive('(') {
if inside_url {
// unescaped ( in URLs is not allowed, keep skipping until url( closes
let Some((url, rest)) = chunk.split_once(')') else {
continue;
};
chunk = rest;

let url = url.trim()
.trim_start_matches(['"', '\'']).trim_end_matches(['"', '\''])
// the quoted value allows whitespace too!
.trim();

let url = self.filter_url(url)
// no tricky chars please.
.filter(|url| !url.contains(['(', ')', '\'', '"', '\\', ' ']));
// the url(#) must stay to keep the syntax valid
out.push_str(&url.as_deref().unwrap_or("#"));
out.push(')');
}

// `url()` doesn't allow whitespace before '('
if chunk_lower.contains("url(") {
let mut out = String::with_capacity(chunk_lower.len());
// this is like split(), but with the match done on lowercased string for case-insensitivity. Indices are used to get original case back.
// this works thanks to to_ascii_lowercase preserving offsets.
let mut last_idx = 0;
let mut url_chunks = chunk_lower.match_indices("url(").chain(Some((full_chunk.len(), "")))
.map(|(idx, frag)| {
let s = &full_chunk[last_idx..idx];
last_idx = idx+frag.len();
s
});
out.push_str(url_chunks.next().expect("first")); // text before `url(`
while let Some(url_chunk) = url_chunks.next() {
// Too bad if you use unescaped () in your URL
let mut urlfunc_parts = url_chunk.split(')');
let urlfunc = urlfunc_parts.next().expect("first");
let after_urlfunc = urlfunc_parts.next()?; // if it's none, it's `url(url(`, which we don't want
let url = urlfunc.trim()
// we've already established there are no escape chars in the chunk,
// so there can't be any tricky things inside the string.
// Sorry to everyone who uses unescaped quote chars at the end of their URLs.
.trim_start_matches(['"', '\'']).trim_end_matches(['"', '\''])
// the quoted value allows whitespace too!
.trim();
let url = self.filter_url(url)
// no tricky chars please. For SVG it's important to keep url() even if the URL inside it is bogus
// because otherwise properties like fill would default to black instead of transparent
.filter(|url| !url.contains(['(',')','\'','"','\\']));
let url = url.as_deref().unwrap_or("#");
out.push_str("url(");
out.push_str(url);
out.push(')');
out.push_str(after_urlfunc); // it's important to keep the terminator
for chunk in chunk.split_inclusive([';', '{', '}', ',']) {
if chunk.contains('\\') || (chunk.contains('@') && chunk.to_ascii_lowercase().contains("@import")) {
if let Some(b @ (b'{' | b'}')) = chunk.as_bytes().last().copied() {
out.push(b as char);
}
return Some(Cow::Owned(out));
continue;
}

Some(Cow::Borrowed(full_chunk))
})
.collect()
out.push_str(chunk);
// `url()` doesn't allow whitespace before '('
inside_url = chunk.len() >= 4 && chunk.get(chunk.len() - 4..).is_some_and(|c| c.eq_ignore_ascii_case("url("));
}
}
out
}
}

Expand All @@ -593,28 +573,30 @@ fn urlfunc() {
assert_eq!(f.filtered_url_func("hello, url( http://evil.com )"), "hello, url(#)");
assert_eq!(f.filtered_url_func("hello, url( //evil.com )"), "hello, url(#)");
assert_eq!(f.filtered_url_func("url( /okay ), (), bye"), "url(/okay), (), bye");
assert_eq!(f.filtered_url_func("hello, url( unclosed"), "hello,");
assert_eq!(f.filtered_url_func("hello, url( unclosed"), "hello, url(");
assert_eq!(f.filtered_url_func("hello, url( ) url( ) bork"), "hello, url() url() bork");
assert_eq!(f.filtered_url_func("hello, url('1' )url( 2 ) bork"), "hello, url(1)url(2) bork");
assert_eq!(f.filtered_url_func("hello, url('(' )"), "hello, url(%28)");
assert_eq!(f.filtered_url_func("hello, url('(' )"), "hello, url()");
}

#[test]
fn css() {
let f = Filter::new();
assert_eq!(f.filtered_url_func("color: red; background: URL(X); huh"), "color: red; background: url(X); huh");
assert_eq!(f.filtered_url_func("ok:url(data:text/plain;base64,AAA)"), "ok:url(#)");
assert_eq!(f.filtered_url_func("color: red; background: URL(X); huh"), "color: red; background: URL(X); huh");
assert_eq!(f.filtered_url_func("@import 'foo'; FONT-size: 1em;"), " FONT-size: 1em;");
assert_eq!(f.filtered_url_func("u;url(data:x);rl(hack)"), "u;url(#);rl(hack)");
assert_eq!(f.filtered_url_func("u;\\;rl(hack)"), "u;rl(hack)");
assert_eq!(f.filtered_url_func("u;\\,rl(hack)"), "u;rl(hack)");
assert_eq!(f.filtered_url_func("u;\\url(hack)"), "u;");
assert_eq!(f.filtered_url_func("u;\\url(hack)"), "u;hack)");
assert_eq!(f.filtered_url_func("font-size: 1em; @Import 'foo';"), "font-size: 1em;");
assert_eq!(f.filtered_url_func("@\\69MporT 'foo';"), "");
assert_eq!(f.filtered_url_func("color: red; background: URL( url(); huh)"), "color: red; huh)");
assert_eq!(f.filtered_url_func("color: red; background: URL(//x/rel);"), "color: red; background: url(/rel);");
assert_eq!(f.filtered_url_func("color: red; background: URL(data:xx);"), "color: red; background: url(#);");
assert_eq!(f.filtered_url_func("color: red; background: UR\\L(x); border: blue"), "color: red; border: blue");
assert_eq!(f.filtered_url_func("color: red; background: URL( url(); huh)"), "color: red; background: URL(); huh)");
assert_eq!(f.filtered_url_func("color: red; background: URL(//x/rel);"), "color: red; background: URL(/rel);");
assert_eq!(f.filtered_url_func("color: red; background: URL(data:xx);"), "color: red; background: URL(#);");
assert_eq!(f.filtered_url_func("color: red; background: UR\\L(x); border: blue"), "color: red;x); border: blue");
assert_eq!(f.filtered_url_func("prop: url (it is not);"), "prop: url (it is not);");
assert_eq!(f.filtered_url_func("sel, sel\\2 { bg: url(data:x); ba\\d; }"), "sel,{ bg: url(#); }");
}

#[test]
Expand Down
11 changes: 6 additions & 5 deletions tests/filtered.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,19 @@
<rect fill="url(/url.svg#p1)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(/url.svg#p2)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(/url.svg#p3)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(/url.svg#p4)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(/url.svg#p5)" height="1000" width="1000" x="0" y="0"/>
<rect fill="" height="5" width="5" x="0" y="0"/>
<rect fill="url(/url.svg%28)#p7&apos;" height="1000" width="10" x="0" y="0"/>
<rect fill="" height="4" width="4" x="0" y="0"/>
<rect fill="URL(/url.svg#p4)" height="1000" width="1000" x="0" y="0"/>
<rect fill="Url(/url.svg#p5)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(#)#p6)" height="5" width="5" x="0" y="0"/>
<rect fill="url()#p7&apos;)" height="1000" width="10" x="0" y="0"/>
<rect fill="&apos;//backslash.invalid/url.svg()#p8&apos;)" height="4" width="4" x="0" y="0"/>
<rect fill="url(/test.svg)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(/test.svg)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(/test.svg)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(/test.svg)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(/test.svg)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(/test.svg)" height="1000" width="1000" x="0" y="0"/>
<rect fill="url(#test.svg)" height="1000" width="1000" x="0" y="0"/>
<style>@font-face {font-family: FontyFont;src: url(#);}</style>

<image href="test.gif" x="333" y="444"/>
<image/>
Expand Down
1 change: 1 addition & 0 deletions tests/test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
<rect fill="url('//x.example.com/test.svg')" x="0" y="0" width="1000" height="1000"></rect>
<rect fill="url(&quot;/test.svg&quot;)" x="0" y="0" width="1000" height="1000"></rect>
<rect fill="ur&#108;('#test.svg')" x="0" y="0" width="1000" height="1000"></rect>
<style>@font-face {font-family: FontyFont;src: url(data:font/woff2;base64,d09GMgABAAAAABIkAA4AAAAAIAgAABHRAAEAAAAAAAAAAAA);}</style>
<img rdf:nodeID="abc" xmlns:rdf="urn:the-rdf-uri" />
<image href="test.gif" x="333" y="444"/>
<image src="//x.example.com/" />
Expand Down

0 comments on commit ea7d8ed

Please sign in to comment.