Sanitize html during feed parsing.

This commit is contained in:
Jordan Petridis 2017-12-07 06:38:31 +02:00
parent 05e056481f
commit c07d240532
No known key found for this signature in database
GPG Key ID: CEABAD9F5683B9A6
6 changed files with 56 additions and 26 deletions

22
Cargo.lock generated
View File

@ -20,6 +20,19 @@ dependencies = [
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ammonia"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"html5ever 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"maplit 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ansi_term"
version = "0.10.2"
@ -548,6 +561,7 @@ dependencies = [
name = "hammond-data"
version = "0.1.0"
dependencies = [
"ammonia 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"derive_builder 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"diesel 0.99.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -599,6 +613,7 @@ dependencies = [
"loggerv 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"open 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -752,6 +767,11 @@ name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "maplit"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "markup5ever"
version = "0.6.2"
@ -1583,6 +1603,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum adler32 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6cbd0b9af8587c72beadc9f72d35b9fbb070982c9e6203e46e93f10df25f8f45"
"checksum advapi32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e06588080cb19d0acb6739808aafa5f26bfb2ca015b2b6370028b44cf7cb8a9a"
"checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4"
"checksum ammonia 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2cc0ea12b4977283c563e78eaf227b024d89d72a6394040fad4063899bfcfb48"
"checksum ansi_term 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6b3568b48b7cefa6b8ce125f9bb4989e52fbcc29ebea88df04cc7c5f12f70455"
"checksum antidote 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "34fde25430d87a9388dadbe6e34d7f72a462c8b43ac8d309b42b0a8505d7e2a5"
"checksum atk-sys 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "33a67fd81e1922dddc335887516f2f5254534e89c9d39fa89bca5d79bd150d34"
@ -1658,6 +1679,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "880f77541efa6e5cc74e76910c9884d9859683118839d6a1dc3b11e63512565b"
"checksum loggerv 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b178879253fab6ddb4ea931e1e6f514d45ce6a53f7fe618a0a8751f43e42e4f1"
"checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
"checksum maplit 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5ed95049d40b8a1a7691adbabca028ad481f7e6a2921ce4846e1ee168b4e4ca5"
"checksum markup5ever 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2cf89d3e0486c32c9d99521455ddf9a438910a1ce2bd376936086edc15dff5fc"
"checksum matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "100aabe6b8ff4e4a7e32c1c13523379802df0772b82466207ac25b013f193376"
"checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d"

View File

@ -5,7 +5,9 @@ version = "0.1.0"
workspace = "../"
[dependencies]
ammonia = "1.0.0"
chrono = "0.4.0"
derive_builder = "0.5.0"
dotenv = "0.10.1"
error-chain = "0.11.0"
lazy_static = "1.0.0"
@ -18,7 +20,6 @@ rfc822_sanitizer = "0.3.3"
rss = "1.2.1"
url = "1.6.0"
xdg = "2.1.0"
derive_builder = "0.5.0"
[dependencies.diesel]
features = ["sqlite"]

View File

@ -46,6 +46,7 @@ extern crate rfc822_sanitizer;
extern crate rss;
extern crate url;
extern crate xdg;
extern crate ammonia;
#[allow(missing_docs)]
pub mod dbqueries;

View File

@ -1,3 +1,4 @@
use ammonia;
use rss::{Channel, Item};
use rfc822_sanitizer::parse_from_rfc2822_with_fallback;
@ -10,7 +11,7 @@ use errors::*;
/// Parses a `rss::Channel` into a `NewPodcast` Struct.
pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast {
let title = chan.title().trim();
let description = chan.description().trim();
let description = ammonia::clean(chan.description().trim());
let link = url_cleaner(chan.link());
let x = chan.itunes_ext().map(|s| s.image());
@ -33,7 +34,7 @@ pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast {
/// Parses an `rss::Item` into a `NewEpisode` Struct.
pub(crate) fn new_episode(item: &Item, parent_id: i32) -> Result<NewEpisode> {
let title = item.title().map(|s| s.trim().to_owned());
let description = item.description().map(|s| s.trim().to_owned());
let description = item.description().map(|s| ammonia::clean(s.trim()));
let guid = item.guid().map(|s| s.value().trim().to_owned());
// Its kinda weird this being an Option type.
@ -276,10 +277,10 @@ mod tests {
let channel = Channel::read_from(BufReader::new(file)).unwrap();
let firstitem = channel.items().first().unwrap();
let descr =
"Audit your network with a couple of easy commands on Kali Linux. Chris decides to \
blow off a little steam by attacking his IoT devices, Wes has the scope on Equifax \
blaming open source & the Beard just saved the show. Its a really packed episode!";
let descr = "Audit your network with a couple of easy commands on Kali Linux. Chris \
decides to blow off a little steam by attacking his IoT devices, Wes has \
the scope on Equifax blaming open source &amp; the Beard just saved the \
show. Its a really packed episode!";
let i = new_episode(&firstitem, 0).unwrap();
assert_eq!(i.title(), Some("Hacking Devices with Kali Linux | LUP 214"));
@ -301,7 +302,7 @@ mod tests {
decisions for the next version of Gnome have us worried about the \
future.</p>\n\n<p>Plus we chat with Wimpy about the Ubuntu Rally in NYC, \
Microsofts sneaky move to turn Windows 10 into the ULTIMATE LINUX \
RUNTIME, community news & more!</p>";
RUNTIME, community news &amp; more!</p>";
assert_eq!(i2.title(), Some("Gnome Does it Again | LUP 213"));
assert_eq!(
i2.uri(),
@ -321,8 +322,8 @@ mod tests {
let firstitem = channel.items().iter().nth(9).unwrap();
let descr = "This week we look at <a \
href=\"https://github.com/rust-lang/rfcs/pull/2094\">RFC 2094</a> \
\"Non-lexical lifetimes\"";
href=\"https://github.com/rust-lang/rfcs/pull/2094\" rel=\"noopener \
noreferrer\">RFC 2094</a> \"Non-lexical lifetimes\"";
let i = new_episode(&firstitem, 0).unwrap();
assert_eq!(i.title(), Some("Episode #9 - A Once in a Lifetime RFC"));
@ -343,9 +344,9 @@ mod tests {
let second = channel.items().iter().nth(8).unwrap();
let i2 = new_episode(&second, 0).unwrap();
let descr2 = "This week we look at <a \
href=\"https://github.com/rust-lang/rfcs/pull/2071\">RFC 2071</a> \"Add \
impl Trait type alias and variable declarations\"";
let descr2 = "This week we look at <a href=\"https://github.com/rust-lang/rfcs/pull/2071\" \
rel=\"noopener noreferrer\">RFC 2071</a> \"Add impl Trait type alias and \
variable declarations\"";
assert_eq!(i2.title(), Some("Episode #8 - An Existential Crisis"));
assert_eq!(
i2.uri(),

View File

@ -1,23 +1,25 @@
use glib;
use gtk;
use gtk::prelude::*;
use gtk::{ContainerExt, TextBufferExt};
use open;
use dissolve::strip_html_tags;
use diesel::associations::Identifiable;
use hammond_data::dbqueries;
use hammond_data::{Episode, Podcast};
use hammond_downloader::downloader;
use hammond_data::utils::*;
use hammond_data::errors::*;
use dissolve::strip_html_tags;
use diesel::associations::Identifiable;
// use utils::html_to_markup;
use std::thread;
use std::cell::RefCell;
use std::sync::mpsc::{channel, Receiver};
use std::path::Path;
use glib;
use gtk;
use gtk::prelude::*;
use gtk::{ContainerExt, TextBufferExt};
type Foo = RefCell<Option<(gtk::Button, gtk::Button, gtk::Button, Receiver<bool>)>>;
thread_local!(static GLOBAL: Foo = RefCell::new(None));
@ -32,6 +34,7 @@ struct EpisodeWidget {
unplayed: gtk::Button,
title: gtk::Label,
description: gtk::TextView,
// description: gtk::Label,
expander: gtk::Expander,
}
@ -50,6 +53,7 @@ impl EpisodeWidget {
let title: gtk::Label = builder.get_object("title_label").unwrap();
let expander: gtk::Expander = builder.get_object("expand_desc").unwrap();
let description: gtk::TextView = builder.get_object("desc_text_view").unwrap();
// let description: gtk::Label = builder.get_object("desc_text").unwrap();
EpisodeWidget {
container,
@ -78,12 +82,15 @@ impl EpisodeWidget {
}
if episode.description().is_some() {
let d = episode.description().unwrap().to_owned();
let text = episode.description().unwrap().to_owned();
let description = &self.description;
self.expander
.connect_activate(clone!(description => move |_| {
let plain_text = strip_html_tags(&d).join(" ");
.connect_activate(clone!(description, text => move |_| {
// let mut text = text.clone();
// html_to_markup(&mut text);
// description.set_markup(&text)
let plain_text = strip_html_tags(&text).join("");
// TODO: handle unwrap
let buff = description.get_buffer().unwrap();
buff.set_text(plain_text.trim());

View File

@ -12,14 +12,12 @@ mkdir -p $DIST
cp -rf hammond-data $DIST
cp -rf hammond-gtk $DIST
cp -rf hammond-downloader $DIST
cp build.rs $DIST
cp Cargo.toml $DIST
cp configure $DIST
cp meson.build $DIST
cp Hammond.doap $DIST
cp LICENSE $DIST
cp README.md $DIST
# cp -rf assets/org.gnome.Hammond.desktop $DIST
cp -rf assets $DIST
cp -rf scripts $DIST