From 7be0b5bbf8a24a40a7c7e14f5e973c596d2669b1 Mon Sep 17 00:00:00 2001 From: Jordan Petridis Date: Mon, 20 Nov 2017 22:06:29 +0200 Subject: [PATCH] Added a url cleanr. Closes #4. --- Cargo.lock | 1 + hammond-data/Cargo.toml | 1 + hammond-data/src/dbqueries.rs | 16 +++---- hammond-data/src/feed.rs | 1 - hammond-data/src/lib.rs | 1 + hammond-data/src/models/insertables.rs | 4 +- hammond-data/src/parser.rs | 63 +++++++++++++++++--------- hammond-data/src/utils.rs | 11 +++++ hammond-gtk/src/headerbar.rs | 2 + 9 files changed, 65 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b198188..4be2696 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -557,6 +557,7 @@ dependencies = [ "rfc822_sanitizer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "rss 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "url 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)", "xdg 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/hammond-data/Cargo.toml b/hammond-data/Cargo.toml index 211db5f..166abcf 100644 --- a/hammond-data/Cargo.toml +++ b/hammond-data/Cargo.toml @@ -17,6 +17,7 @@ reqwest = "0.8.1" rfc822_sanitizer = "0.3.3" rss = "1.1.0" xdg = "2.1.0" +url = "1.6.0" [dependencies.diesel] features = ["sqlite"] diff --git a/hammond-data/src/dbqueries.rs b/hammond-data/src/dbqueries.rs index e37e3b6..c231e07 100644 --- a/hammond-data/src/dbqueries.rs +++ b/hammond-data/src/dbqueries.rs @@ -1,7 +1,7 @@ use diesel::prelude::*; use diesel; -use models::{Episode, Podcast, Source, NewSource, NewEpisode, NewPodcast}; +use models::{Episode, NewEpisode, NewPodcast, NewSource, Podcast, Source}; use chrono::prelude::*; /// Random db querries helper functions. @@ -226,10 +226,8 @@ pub fn replace_podcast(pd: &NewPodcast) -> QueryResult { let db = connection(); let tempdb = db.lock().unwrap(); - - diesel::replace_into(podcast) - .values(pd) - .execute(&*tempdb) + + diesel::replace_into(podcast).values(pd).execute(&*tempdb) } pub fn replace_episode(ep: &NewEpisode) -> QueryResult { @@ -237,8 +235,6 @@ pub fn replace_episode(ep: &NewEpisode) -> QueryResult { let db = connection(); let tempdb = db.lock().unwrap(); - - diesel::replace_into(episode) - .values(ep) - .execute(&*tempdb) -} \ No newline at end of file + + diesel::replace_into(episode).values(ep).execute(&*tempdb) +} diff --git a/hammond-data/src/feed.rs b/hammond-data/src/feed.rs index 1ac5b5d..00f480a 100644 --- a/hammond-data/src/feed.rs +++ b/hammond-data/src/feed.rs @@ -1,6 +1,5 @@ use rayon::prelude::*; use diesel::Identifiable; - use rss; use dbqueries; diff --git a/hammond-data/src/lib.rs b/hammond-data/src/lib.rs index 48100f7..c170b77 100644 --- a/hammond-data/src/lib.rs +++ b/hammond-data/src/lib.rs @@ -22,6 +22,7 @@ extern crate rayon; extern crate reqwest; extern crate rfc822_sanitizer; extern crate rss; +extern crate url; extern crate xdg; pub mod dbqueries; diff --git a/hammond-data/src/models/insertables.rs b/hammond-data/src/models/insertables.rs index 0699ca2..ccd7748 100644 --- a/hammond-data/src/models/insertables.rs +++ b/hammond-data/src/models/insertables.rs @@ -43,7 +43,7 @@ impl<'a> NewSource<'a> { #[derive(Debug, Clone)] pub struct NewEpisode<'a> { pub title: Option<&'a str>, - pub uri: Option<&'a str>, + pub uri: Option, pub description: Option<&'a str>, pub published_date: Option, pub length: Option, @@ -57,7 +57,7 @@ impl<'a> NewEpisode<'a> { // Watch out for v0.99.0 beta and change the toml. // TODO: Refactor into batch indexes instead. pub fn index(&self) -> QueryResult<()> { - let ep = dbqueries::get_episode_from_uri(self.uri.unwrap()); + let ep = dbqueries::get_episode_from_uri(&self.uri.clone().unwrap()); match ep { Ok(foo) => if foo.title() != self.title diff --git a/hammond-data/src/parser.rs b/hammond-data/src/parser.rs index 69e76b7..de58a31 100644 --- a/hammond-data/src/parser.rs +++ b/hammond-data/src/parser.rs @@ -1,25 +1,24 @@ use rss::{Channel, Item}; use rfc822_sanitizer::parse_from_rfc2822_with_fallback; -use models; +use models::{NewEpisode, NewPodcast}; +use utils::url_cleaner; // TODO: Extend the support for parsing itunes extensions /// Parses a `rss::Channel` into a `NewPodcast` Struct. -pub fn new_podcast(chan: &Channel, source_id: i32) -> models::NewPodcast { +pub fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast { let title = chan.title().trim().to_owned(); - let link = chan.link().trim().to_owned(); let description = chan.description().trim().to_owned(); - // Some feeds miss baseurl and/or http:// - // TODO: Sanitize the url, - // could also be reuse to sanitize the new-url gui entrybox. + + let link = url_cleaner(chan.link()).to_owned(); let x = chan.itunes_ext().map(|s| s.image()); let image_uri = if let Some(img) = x { - img.map(|s| s.to_string()) + img.map(|s| url_cleaner(s)) } else { - chan.image().map(|foo| foo.url().to_owned()) + chan.image().map(|foo| url_cleaner(foo.url())) }; - models::NewPodcast { + NewPodcast { title, link, description, @@ -29,20 +28,20 @@ pub fn new_podcast(chan: &Channel, source_id: i32) -> models::NewPodcast { } /// Parses an `rss::Item` into a `NewEpisode` Struct. -pub fn new_episode(item: &Item, parent_id: i32) -> models::NewEpisode { +pub fn new_episode(item: &Item, parent_id: i32) -> NewEpisode { let title = item.title().map(|s| s.trim()); let description = item.description().map(|s| s.trim()); - let guid = item.guid().map(|x| x.value().trim()); + let guid = item.guid().map(|s| s.value().trim()); // Its kinda weird this being an Option type. // Rss 2.0 specified that it's optional. // Though the db scema has a requirment of episode uri being Unique && Not Null. // TODO: Restructure - let x = item.enclosure().map(|x| x.url().trim()); + let x = item.enclosure().map(|s| url_cleaner(s.url())); let uri = if x.is_some() { x } else if item.link().is_some() { - item.link() + item.link().map(|s| url_cleaner(s)) } else { None }; @@ -59,7 +58,7 @@ pub fn new_episode(item: &Item, parent_id: i32) -> models::NewEpisode { let length = item.enclosure().map(|x| x.length().parse().unwrap_or(0)); - models::NewEpisode { + NewEpisode { title, uri, description, @@ -178,7 +177,10 @@ mod tests { let i = new_episode(&firstitem, 0); assert_eq!(i.title, Some("The Super Bowl of Racism")); - assert_eq!(i.uri, Some("http://traffic.megaphone.fm/PPY6458293736.mp3")); + assert_eq!( + i.uri, + Some("http://traffic.megaphone.fm/PPY6458293736.mp3".to_string()) + ); assert_eq!(i.description, Some(descr)); assert_eq!(i.length, Some(66738886)); assert_eq!(i.guid, Some("7df4070a-9832-11e7-adac-cb37b05d5e24")); @@ -203,7 +205,10 @@ mod tests { i2.title, Some("Atlas Golfed — U.S.-Backed Think Tanks Target Latin America",) ); - assert_eq!(i2.uri, Some("http://traffic.megaphone.fm/FL5331443769.mp3")); + assert_eq!( + i2.uri, + Some("http://traffic.megaphone.fm/FL5331443769.mp3".to_string()) + ); assert_eq!(i2.description, Some(descr2)); assert_eq!(i2.length, Some(67527575)); assert_eq!(i2.guid, Some("7c207a24-e33f-11e6-9438-eb45dcf36a1d")); @@ -230,7 +235,10 @@ mod tests { ); assert_eq!( i.uri, - Some("http://tracking.feedpress.it/link/10581/6726758/20170908-cliff-levy.mp3",) + Some( + "http://tracking.feedpress.it/link/10581/6726758/20170908-cliff-levy.mp3" + .to_string(), + ) ); assert_eq!(i.description, Some(descr)); assert_eq!(i.length, Some(33396551)); @@ -261,7 +269,10 @@ mod tests { ); assert_eq!( i2.uri, - Some("http://tracking.feedpress.it/link/10581/6726759/16_JohnAllen-CRAFT.mp3",) + Some( + "http://tracking.feedpress.it/link/10581/6726759/16_JohnAllen-CRAFT.mp3" + .to_string(), + ) ); assert_eq!(i2.description, Some(descr2)); assert_eq!(i2.length, Some(17964071)); @@ -294,7 +305,10 @@ mod tests { assert_eq!(i.title, Some("Hacking Devices with Kali Linux | LUP 214")); assert_eq!( i.uri, - Some("http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0214.mp3",) + Some( + "http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0214.mp3" + .to_string(), + ) ); assert_eq!(i.description, Some(descr)); assert_eq!(i.length, Some(46479789)); @@ -317,7 +331,10 @@ mod tests { assert_eq!(i2.title, Some("Gnome Does it Again | LUP 213")); assert_eq!( i2.uri, - Some("http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0213.mp3",) + Some( + "http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0213.mp3" + .to_string(), + ) ); assert_eq!(i2.description, Some(descr2)); assert_eq!(i2.length, Some(36544272)); @@ -345,7 +362,8 @@ mod tests { i.uri, Some( "http://request-for-explanation.github.\ - io/podcast/ep9-a-once-in-a-lifetime-rfc/episode.mp3", + io/podcast/ep9-a-once-in-a-lifetime-rfc/episode.mp3" + .to_string(), ) ); assert_eq!(i.description, Some(descr)); @@ -371,7 +389,8 @@ mod tests { i2.uri, Some( "http://request-for-explanation.github.\ - io/podcast/ep8-an-existential-crisis/episode.mp3", + io/podcast/ep8-an-existential-crisis/episode.mp3" + .to_string(), ) ); assert_eq!(i2.description, Some(descr2)); diff --git a/hammond-data/src/utils.rs b/hammond-data/src/utils.rs index 4cd75de..4d80076 100644 --- a/hammond-data/src/utils.rs +++ b/hammond-data/src/utils.rs @@ -2,6 +2,7 @@ use rayon::prelude::*; use chrono::prelude::*; use diesel::sqlite::SqliteConnection; +use url::{Position, Url}; use errors::*; use dbqueries; @@ -97,3 +98,13 @@ pub fn checkup() -> Result<()> { played_cleaner()?; Ok(()) } + +pub fn url_cleaner(s: &str) -> String { + // Copied from the cookbook. + // https://rust-lang-nursery.github.io/rust-cookbook/net.html + // #remove-fragment-identifiers-and-query-pairs-from-a-url + match Url::parse(s) { + Ok(parsed) => parsed[..Position::AfterPath].to_owned(), + _ => s.trim().to_owned(), + } +} diff --git a/hammond-gtk/src/headerbar.rs b/hammond-gtk/src/headerbar.rs index 438d814..aeffea5 100644 --- a/hammond-gtk/src/headerbar.rs +++ b/hammond-gtk/src/headerbar.rs @@ -2,6 +2,7 @@ use gtk; use gtk::prelude::*; use hammond_data::models::NewSource; +use hammond_data::utils::url_cleaner; use podcasts_view::update_podcasts_view; use utils; @@ -24,6 +25,7 @@ pub fn get_headerbar(stack: >k::Stack) -> gtk::HeaderBar { add_button.connect_clicked(clone!(stack, add_popover => move |_| { let url = new_url.get_text().unwrap_or_default(); + let url = url_cleaner(&url); on_add_bttn_clicked(&stack, &url); // TODO: lock the button instead of hiding and add notification of feed added.