From c07d2405326000eac1976b1ffad137ec2878b67e Mon Sep 17 00:00:00 2001 From: Jordan Petridis Date: Thu, 7 Dec 2017 06:38:31 +0200 Subject: [PATCH] Sanitize html during feed parsing. --- Cargo.lock | 22 ++++++++++++++++++++++ hammond-data/Cargo.toml | 3 ++- hammond-data/src/lib.rs | 1 + hammond-data/src/parser.rs | 25 +++++++++++++------------ hammond-gtk/src/widgets/episode.rs | 29 ++++++++++++++++++----------- scripts/release.sh | 2 -- 6 files changed, 56 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 280ca0f..f6037b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,19 @@ dependencies = [ "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "ammonia" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "html5ever 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "maplit 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "url 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "ansi_term" version = "0.10.2" @@ -548,6 +561,7 @@ dependencies = [ name = "hammond-data" version = "0.1.0" dependencies = [ + "ammonia 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "derive_builder 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "diesel 0.99.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -599,6 +613,7 @@ dependencies = [ "loggerv 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", "open 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -752,6 +767,11 @@ name = "mac" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "maplit" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "markup5ever" version = "0.6.2" @@ -1583,6 +1603,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum adler32 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6cbd0b9af8587c72beadc9f72d35b9fbb070982c9e6203e46e93f10df25f8f45" "checksum advapi32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e06588080cb19d0acb6739808aafa5f26bfb2ca015b2b6370028b44cf7cb8a9a" "checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4" +"checksum ammonia 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2cc0ea12b4977283c563e78eaf227b024d89d72a6394040fad4063899bfcfb48" "checksum ansi_term 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6b3568b48b7cefa6b8ce125f9bb4989e52fbcc29ebea88df04cc7c5f12f70455" "checksum antidote 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "34fde25430d87a9388dadbe6e34d7f72a462c8b43ac8d309b42b0a8505d7e2a5" "checksum atk-sys 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "33a67fd81e1922dddc335887516f2f5254534e89c9d39fa89bca5d79bd150d34" @@ -1658,6 +1679,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "880f77541efa6e5cc74e76910c9884d9859683118839d6a1dc3b11e63512565b" "checksum loggerv 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b178879253fab6ddb4ea931e1e6f514d45ce6a53f7fe618a0a8751f43e42e4f1" "checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +"checksum maplit 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5ed95049d40b8a1a7691adbabca028ad481f7e6a2921ce4846e1ee168b4e4ca5" "checksum markup5ever 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2cf89d3e0486c32c9d99521455ddf9a438910a1ce2bd376936086edc15dff5fc" "checksum matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "100aabe6b8ff4e4a7e32c1c13523379802df0772b82466207ac25b013f193376" "checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d" diff --git a/hammond-data/Cargo.toml b/hammond-data/Cargo.toml index f20c271..34e41c4 100644 --- a/hammond-data/Cargo.toml +++ b/hammond-data/Cargo.toml @@ -5,7 +5,9 @@ version = "0.1.0" workspace = "../" [dependencies] +ammonia = "1.0.0" chrono = "0.4.0" +derive_builder = "0.5.0" dotenv = "0.10.1" error-chain = "0.11.0" lazy_static = "1.0.0" @@ -18,7 +20,6 @@ rfc822_sanitizer = "0.3.3" rss = "1.2.1" url = "1.6.0" xdg = "2.1.0" -derive_builder = "0.5.0" [dependencies.diesel] features = ["sqlite"] diff --git a/hammond-data/src/lib.rs b/hammond-data/src/lib.rs index 25994a8..b4465f1 100644 --- a/hammond-data/src/lib.rs +++ b/hammond-data/src/lib.rs @@ -46,6 +46,7 @@ extern crate rfc822_sanitizer; extern crate rss; extern crate url; extern crate xdg; +extern crate ammonia; #[allow(missing_docs)] pub mod dbqueries; diff --git a/hammond-data/src/parser.rs b/hammond-data/src/parser.rs index da04862..2cddc50 100644 --- a/hammond-data/src/parser.rs +++ b/hammond-data/src/parser.rs @@ -1,3 +1,4 @@ +use ammonia; use rss::{Channel, Item}; use rfc822_sanitizer::parse_from_rfc2822_with_fallback; @@ -10,7 +11,7 @@ use errors::*; /// Parses a `rss::Channel` into a `NewPodcast` Struct. pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast { let title = chan.title().trim(); - let description = chan.description().trim(); + let description = ammonia::clean(chan.description().trim()); let link = url_cleaner(chan.link()); let x = chan.itunes_ext().map(|s| s.image()); @@ -33,7 +34,7 @@ pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast { /// Parses an `rss::Item` into a `NewEpisode` Struct. pub(crate) fn new_episode(item: &Item, parent_id: i32) -> Result { let title = item.title().map(|s| s.trim().to_owned()); - let description = item.description().map(|s| s.trim().to_owned()); + let description = item.description().map(|s| ammonia::clean(s.trim())); let guid = item.guid().map(|s| s.value().trim().to_owned()); // Its kinda weird this being an Option type. @@ -276,10 +277,10 @@ mod tests { let channel = Channel::read_from(BufReader::new(file)).unwrap(); let firstitem = channel.items().first().unwrap(); - let descr = - "Audit your network with a couple of easy commands on Kali Linux. Chris decides to \ - blow off a little steam by attacking his IoT devices, Wes has the scope on Equifax \ - blaming open source & the Beard just saved the show. It’s a really packed episode!"; + let descr = "Audit your network with a couple of easy commands on Kali Linux. Chris \ + decides to blow off a little steam by attacking his IoT devices, Wes has \ + the scope on Equifax blaming open source & the Beard just saved the \ + show. It’s a really packed episode!"; let i = new_episode(&firstitem, 0).unwrap(); assert_eq!(i.title(), Some("Hacking Devices with Kali Linux | LUP 214")); @@ -301,7 +302,7 @@ mod tests { decisions for the next version of Gnome have us worried about the \ future.

\n\n

Plus we chat with Wimpy about the Ubuntu Rally in NYC, \ Microsoft’s sneaky move to turn Windows 10 into the “ULTIMATE LINUX \ - RUNTIME”, community news & more!

"; + RUNTIME”, community news & more!

"; assert_eq!(i2.title(), Some("Gnome Does it Again | LUP 213")); assert_eq!( i2.uri(), @@ -321,8 +322,8 @@ mod tests { let firstitem = channel.items().iter().nth(9).unwrap(); let descr = "This week we look at RFC 2094 \ - \"Non-lexical lifetimes\""; + href=\"https://github.com/rust-lang/rfcs/pull/2094\" rel=\"noopener \ + noreferrer\">RFC 2094 \"Non-lexical lifetimes\""; let i = new_episode(&firstitem, 0).unwrap(); assert_eq!(i.title(), Some("Episode #9 - A Once in a Lifetime RFC")); @@ -343,9 +344,9 @@ mod tests { let second = channel.items().iter().nth(8).unwrap(); let i2 = new_episode(&second, 0).unwrap(); - let descr2 = "This week we look at RFC 2071 \"Add \ - impl Trait type alias and variable declarations\""; + let descr2 = "This week we look at RFC 2071 \"Add impl Trait type alias and \ + variable declarations\""; assert_eq!(i2.title(), Some("Episode #8 - An Existential Crisis")); assert_eq!( i2.uri(), diff --git a/hammond-gtk/src/widgets/episode.rs b/hammond-gtk/src/widgets/episode.rs index 4d51d85..4d876df 100644 --- a/hammond-gtk/src/widgets/episode.rs +++ b/hammond-gtk/src/widgets/episode.rs @@ -1,23 +1,25 @@ +use glib; +use gtk; +use gtk::prelude::*; +use gtk::{ContainerExt, TextBufferExt}; + use open; +use dissolve::strip_html_tags; +use diesel::associations::Identifiable; + use hammond_data::dbqueries; use hammond_data::{Episode, Podcast}; use hammond_downloader::downloader; use hammond_data::utils::*; use hammond_data::errors::*; -use dissolve::strip_html_tags; -use diesel::associations::Identifiable; +// use utils::html_to_markup; use std::thread; use std::cell::RefCell; use std::sync::mpsc::{channel, Receiver}; use std::path::Path; -use glib; -use gtk; -use gtk::prelude::*; -use gtk::{ContainerExt, TextBufferExt}; - type Foo = RefCell)>>; thread_local!(static GLOBAL: Foo = RefCell::new(None)); @@ -32,6 +34,7 @@ struct EpisodeWidget { unplayed: gtk::Button, title: gtk::Label, description: gtk::TextView, + // description: gtk::Label, expander: gtk::Expander, } @@ -50,6 +53,7 @@ impl EpisodeWidget { let title: gtk::Label = builder.get_object("title_label").unwrap(); let expander: gtk::Expander = builder.get_object("expand_desc").unwrap(); let description: gtk::TextView = builder.get_object("desc_text_view").unwrap(); + // let description: gtk::Label = builder.get_object("desc_text").unwrap(); EpisodeWidget { container, @@ -78,12 +82,15 @@ impl EpisodeWidget { } if episode.description().is_some() { - let d = episode.description().unwrap().to_owned(); - + let text = episode.description().unwrap().to_owned(); let description = &self.description; self.expander - .connect_activate(clone!(description => move |_| { - let plain_text = strip_html_tags(&d).join(" "); + .connect_activate(clone!(description, text => move |_| { + // let mut text = text.clone(); + // html_to_markup(&mut text); + // description.set_markup(&text) + + let plain_text = strip_html_tags(&text).join(""); // TODO: handle unwrap let buff = description.get_buffer().unwrap(); buff.set_text(plain_text.trim()); diff --git a/scripts/release.sh b/scripts/release.sh index 9715f8b..cced5a7 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -12,14 +12,12 @@ mkdir -p $DIST cp -rf hammond-data $DIST cp -rf hammond-gtk $DIST cp -rf hammond-downloader $DIST -cp build.rs $DIST cp Cargo.toml $DIST cp configure $DIST cp meson.build $DIST cp Hammond.doap $DIST cp LICENSE $DIST cp README.md $DIST -# cp -rf assets/org.gnome.Hammond.desktop $DIST cp -rf assets $DIST cp -rf scripts $DIST