From c07d2405326000eac1976b1ffad137ec2878b67e Mon Sep 17 00:00:00 2001
From: Jordan Petridis
Date: Thu, 7 Dec 2017 06:38:31 +0200
Subject: [PATCH] Sanitize html during feed parsing.
---
Cargo.lock | 22 ++++++++++++++++++++++
hammond-data/Cargo.toml | 3 ++-
hammond-data/src/lib.rs | 1 +
hammond-data/src/parser.rs | 25 +++++++++++++------------
hammond-gtk/src/widgets/episode.rs | 29 ++++++++++++++++++-----------
scripts/release.sh | 2 --
6 files changed, 56 insertions(+), 26 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 280ca0f..f6037b3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -20,6 +20,19 @@ dependencies = [
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
+[[package]]
+name = "ammonia"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "html5ever 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "maplit 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "url 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
[[package]]
name = "ansi_term"
version = "0.10.2"
@@ -548,6 +561,7 @@ dependencies = [
name = "hammond-data"
version = "0.1.0"
dependencies = [
+ "ammonia 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"derive_builder 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"diesel 0.99.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -599,6 +613,7 @@ dependencies = [
"loggerv 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"open 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@@ -752,6 +767,11 @@ name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
+[[package]]
+name = "maplit"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
[[package]]
name = "markup5ever"
version = "0.6.2"
@@ -1583,6 +1603,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum adler32 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6cbd0b9af8587c72beadc9f72d35b9fbb070982c9e6203e46e93f10df25f8f45"
"checksum advapi32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e06588080cb19d0acb6739808aafa5f26bfb2ca015b2b6370028b44cf7cb8a9a"
"checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4"
+"checksum ammonia 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2cc0ea12b4977283c563e78eaf227b024d89d72a6394040fad4063899bfcfb48"
"checksum ansi_term 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6b3568b48b7cefa6b8ce125f9bb4989e52fbcc29ebea88df04cc7c5f12f70455"
"checksum antidote 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "34fde25430d87a9388dadbe6e34d7f72a462c8b43ac8d309b42b0a8505d7e2a5"
"checksum atk-sys 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "33a67fd81e1922dddc335887516f2f5254534e89c9d39fa89bca5d79bd150d34"
@@ -1658,6 +1679,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "880f77541efa6e5cc74e76910c9884d9859683118839d6a1dc3b11e63512565b"
"checksum loggerv 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b178879253fab6ddb4ea931e1e6f514d45ce6a53f7fe618a0a8751f43e42e4f1"
"checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
+"checksum maplit 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5ed95049d40b8a1a7691adbabca028ad481f7e6a2921ce4846e1ee168b4e4ca5"
"checksum markup5ever 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2cf89d3e0486c32c9d99521455ddf9a438910a1ce2bd376936086edc15dff5fc"
"checksum matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "100aabe6b8ff4e4a7e32c1c13523379802df0772b82466207ac25b013f193376"
"checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d"
diff --git a/hammond-data/Cargo.toml b/hammond-data/Cargo.toml
index f20c271..34e41c4 100644
--- a/hammond-data/Cargo.toml
+++ b/hammond-data/Cargo.toml
@@ -5,7 +5,9 @@ version = "0.1.0"
workspace = "../"
[dependencies]
+ammonia = "1.0.0"
chrono = "0.4.0"
+derive_builder = "0.5.0"
dotenv = "0.10.1"
error-chain = "0.11.0"
lazy_static = "1.0.0"
@@ -18,7 +20,6 @@ rfc822_sanitizer = "0.3.3"
rss = "1.2.1"
url = "1.6.0"
xdg = "2.1.0"
-derive_builder = "0.5.0"
[dependencies.diesel]
features = ["sqlite"]
diff --git a/hammond-data/src/lib.rs b/hammond-data/src/lib.rs
index 25994a8..b4465f1 100644
--- a/hammond-data/src/lib.rs
+++ b/hammond-data/src/lib.rs
@@ -46,6 +46,7 @@ extern crate rfc822_sanitizer;
extern crate rss;
extern crate url;
extern crate xdg;
+extern crate ammonia;
#[allow(missing_docs)]
pub mod dbqueries;
diff --git a/hammond-data/src/parser.rs b/hammond-data/src/parser.rs
index da04862..2cddc50 100644
--- a/hammond-data/src/parser.rs
+++ b/hammond-data/src/parser.rs
@@ -1,3 +1,4 @@
+use ammonia;
use rss::{Channel, Item};
use rfc822_sanitizer::parse_from_rfc2822_with_fallback;
@@ -10,7 +11,7 @@ use errors::*;
/// Parses a `rss::Channel` into a `NewPodcast` Struct.
pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast {
let title = chan.title().trim();
- let description = chan.description().trim();
+ let description = ammonia::clean(chan.description().trim());
let link = url_cleaner(chan.link());
let x = chan.itunes_ext().map(|s| s.image());
@@ -33,7 +34,7 @@ pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast {
/// Parses an `rss::Item` into a `NewEpisode` Struct.
pub(crate) fn new_episode(item: &Item, parent_id: i32) -> Result {
let title = item.title().map(|s| s.trim().to_owned());
- let description = item.description().map(|s| s.trim().to_owned());
+ let description = item.description().map(|s| ammonia::clean(s.trim()));
let guid = item.guid().map(|s| s.value().trim().to_owned());
// Its kinda weird this being an Option type.
@@ -276,10 +277,10 @@ mod tests {
let channel = Channel::read_from(BufReader::new(file)).unwrap();
let firstitem = channel.items().first().unwrap();
- let descr =
- "Audit your network with a couple of easy commands on Kali Linux. Chris decides to \
- blow off a little steam by attacking his IoT devices, Wes has the scope on Equifax \
- blaming open source & the Beard just saved the show. It’s a really packed episode!";
+ let descr = "Audit your network with a couple of easy commands on Kali Linux. Chris \
+ decides to blow off a little steam by attacking his IoT devices, Wes has \
+ the scope on Equifax blaming open source & the Beard just saved the \
+ show. It’s a really packed episode!";
let i = new_episode(&firstitem, 0).unwrap();
assert_eq!(i.title(), Some("Hacking Devices with Kali Linux | LUP 214"));
@@ -301,7 +302,7 @@ mod tests {
decisions for the next version of Gnome have us worried about the \
future.
\n\n
Plus we chat with Wimpy about the Ubuntu Rally in NYC, \
Microsoft’s sneaky move to turn Windows 10 into the “ULTIMATE LINUX \
- RUNTIME”, community news & more!
";
+ RUNTIME”, community news & more!";
assert_eq!(i2.title(), Some("Gnome Does it Again | LUP 213"));
assert_eq!(
i2.uri(),
@@ -321,8 +322,8 @@ mod tests {
let firstitem = channel.items().iter().nth(9).unwrap();
let descr = "This week we look at RFC 2094 \
- \"Non-lexical lifetimes\"";
+ href=\"https://github.com/rust-lang/rfcs/pull/2094\" rel=\"noopener \
+ noreferrer\">RFC 2094 \"Non-lexical lifetimes\"";
let i = new_episode(&firstitem, 0).unwrap();
assert_eq!(i.title(), Some("Episode #9 - A Once in a Lifetime RFC"));
@@ -343,9 +344,9 @@ mod tests {
let second = channel.items().iter().nth(8).unwrap();
let i2 = new_episode(&second, 0).unwrap();
- let descr2 = "This week we look at RFC 2071 \"Add \
- impl Trait type alias and variable declarations\"";
+ let descr2 = "This week we look at RFC 2071 \"Add impl Trait type alias and \
+ variable declarations\"";
assert_eq!(i2.title(), Some("Episode #8 - An Existential Crisis"));
assert_eq!(
i2.uri(),
diff --git a/hammond-gtk/src/widgets/episode.rs b/hammond-gtk/src/widgets/episode.rs
index 4d51d85..4d876df 100644
--- a/hammond-gtk/src/widgets/episode.rs
+++ b/hammond-gtk/src/widgets/episode.rs
@@ -1,23 +1,25 @@
+use glib;
+use gtk;
+use gtk::prelude::*;
+use gtk::{ContainerExt, TextBufferExt};
+
use open;
+use dissolve::strip_html_tags;
+use diesel::associations::Identifiable;
+
use hammond_data::dbqueries;
use hammond_data::{Episode, Podcast};
use hammond_downloader::downloader;
use hammond_data::utils::*;
use hammond_data::errors::*;
-use dissolve::strip_html_tags;
-use diesel::associations::Identifiable;
+// use utils::html_to_markup;
use std::thread;
use std::cell::RefCell;
use std::sync::mpsc::{channel, Receiver};
use std::path::Path;
-use glib;
-use gtk;
-use gtk::prelude::*;
-use gtk::{ContainerExt, TextBufferExt};
-
type Foo = RefCell