Added a url cleanr. Closes #4.

This commit is contained in:
Jordan Petridis 2017-11-20 22:06:29 +02:00
parent 5890fe2bb4
commit 7be0b5bbf8
No known key found for this signature in database
GPG Key ID: CEABAD9F5683B9A6
9 changed files with 65 additions and 35 deletions

1
Cargo.lock generated
View File

@ -557,6 +557,7 @@ dependencies = [
"rfc822_sanitizer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rss 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"xdg 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]

View File

@ -17,6 +17,7 @@ reqwest = "0.8.1"
rfc822_sanitizer = "0.3.3"
rss = "1.1.0"
xdg = "2.1.0"
url = "1.6.0"
[dependencies.diesel]
features = ["sqlite"]

View File

@ -1,7 +1,7 @@
use diesel::prelude::*;
use diesel;
use models::{Episode, Podcast, Source, NewSource, NewEpisode, NewPodcast};
use models::{Episode, NewEpisode, NewPodcast, NewSource, Podcast, Source};
use chrono::prelude::*;
/// Random db querries helper functions.
@ -226,10 +226,8 @@ pub fn replace_podcast(pd: &NewPodcast) -> QueryResult<usize> {
let db = connection();
let tempdb = db.lock().unwrap();
diesel::replace_into(podcast)
.values(pd)
.execute(&*tempdb)
diesel::replace_into(podcast).values(pd).execute(&*tempdb)
}
pub fn replace_episode(ep: &NewEpisode) -> QueryResult<usize> {
@ -237,8 +235,6 @@ pub fn replace_episode(ep: &NewEpisode) -> QueryResult<usize> {
let db = connection();
let tempdb = db.lock().unwrap();
diesel::replace_into(episode)
.values(ep)
.execute(&*tempdb)
}
diesel::replace_into(episode).values(ep).execute(&*tempdb)
}

View File

@ -1,6 +1,5 @@
use rayon::prelude::*;
use diesel::Identifiable;
use rss;
use dbqueries;

View File

@ -22,6 +22,7 @@ extern crate rayon;
extern crate reqwest;
extern crate rfc822_sanitizer;
extern crate rss;
extern crate url;
extern crate xdg;
pub mod dbqueries;

View File

@ -43,7 +43,7 @@ impl<'a> NewSource<'a> {
#[derive(Debug, Clone)]
pub struct NewEpisode<'a> {
pub title: Option<&'a str>,
pub uri: Option<&'a str>,
pub uri: Option<String>,
pub description: Option<&'a str>,
pub published_date: Option<String>,
pub length: Option<i32>,
@ -57,7 +57,7 @@ impl<'a> NewEpisode<'a> {
// Watch out for v0.99.0 beta and change the toml.
// TODO: Refactor into batch indexes instead.
pub fn index(&self) -> QueryResult<()> {
let ep = dbqueries::get_episode_from_uri(self.uri.unwrap());
let ep = dbqueries::get_episode_from_uri(&self.uri.clone().unwrap());
match ep {
Ok(foo) => if foo.title() != self.title

View File

@ -1,25 +1,24 @@
use rss::{Channel, Item};
use rfc822_sanitizer::parse_from_rfc2822_with_fallback;
use models;
use models::{NewEpisode, NewPodcast};
use utils::url_cleaner;
// TODO: Extend the support for parsing itunes extensions
/// Parses a `rss::Channel` into a `NewPodcast` Struct.
pub fn new_podcast(chan: &Channel, source_id: i32) -> models::NewPodcast {
pub fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast {
let title = chan.title().trim().to_owned();
let link = chan.link().trim().to_owned();
let description = chan.description().trim().to_owned();
// Some feeds miss baseurl and/or http://
// TODO: Sanitize the url,
// could also be reuse to sanitize the new-url gui entrybox.
let link = url_cleaner(chan.link()).to_owned();
let x = chan.itunes_ext().map(|s| s.image());
let image_uri = if let Some(img) = x {
img.map(|s| s.to_string())
img.map(|s| url_cleaner(s))
} else {
chan.image().map(|foo| foo.url().to_owned())
chan.image().map(|foo| url_cleaner(foo.url()))
};
models::NewPodcast {
NewPodcast {
title,
link,
description,
@ -29,20 +28,20 @@ pub fn new_podcast(chan: &Channel, source_id: i32) -> models::NewPodcast {
}
/// Parses an `rss::Item` into a `NewEpisode` Struct.
pub fn new_episode(item: &Item, parent_id: i32) -> models::NewEpisode {
pub fn new_episode(item: &Item, parent_id: i32) -> NewEpisode {
let title = item.title().map(|s| s.trim());
let description = item.description().map(|s| s.trim());
let guid = item.guid().map(|x| x.value().trim());
let guid = item.guid().map(|s| s.value().trim());
// Its kinda weird this being an Option type.
// Rss 2.0 specified that it's optional.
// Though the db scema has a requirment of episode uri being Unique && Not Null.
// TODO: Restructure
let x = item.enclosure().map(|x| x.url().trim());
let x = item.enclosure().map(|s| url_cleaner(s.url()));
let uri = if x.is_some() {
x
} else if item.link().is_some() {
item.link()
item.link().map(|s| url_cleaner(s))
} else {
None
};
@ -59,7 +58,7 @@ pub fn new_episode(item: &Item, parent_id: i32) -> models::NewEpisode {
let length = item.enclosure().map(|x| x.length().parse().unwrap_or(0));
models::NewEpisode {
NewEpisode {
title,
uri,
description,
@ -178,7 +177,10 @@ mod tests {
let i = new_episode(&firstitem, 0);
assert_eq!(i.title, Some("The Super Bowl of Racism"));
assert_eq!(i.uri, Some("http://traffic.megaphone.fm/PPY6458293736.mp3"));
assert_eq!(
i.uri,
Some("http://traffic.megaphone.fm/PPY6458293736.mp3".to_string())
);
assert_eq!(i.description, Some(descr));
assert_eq!(i.length, Some(66738886));
assert_eq!(i.guid, Some("7df4070a-9832-11e7-adac-cb37b05d5e24"));
@ -203,7 +205,10 @@ mod tests {
i2.title,
Some("Atlas Golfed — U.S.-Backed Think Tanks Target Latin America",)
);
assert_eq!(i2.uri, Some("http://traffic.megaphone.fm/FL5331443769.mp3"));
assert_eq!(
i2.uri,
Some("http://traffic.megaphone.fm/FL5331443769.mp3".to_string())
);
assert_eq!(i2.description, Some(descr2));
assert_eq!(i2.length, Some(67527575));
assert_eq!(i2.guid, Some("7c207a24-e33f-11e6-9438-eb45dcf36a1d"));
@ -230,7 +235,10 @@ mod tests {
);
assert_eq!(
i.uri,
Some("http://tracking.feedpress.it/link/10581/6726758/20170908-cliff-levy.mp3",)
Some(
"http://tracking.feedpress.it/link/10581/6726758/20170908-cliff-levy.mp3"
.to_string(),
)
);
assert_eq!(i.description, Some(descr));
assert_eq!(i.length, Some(33396551));
@ -261,7 +269,10 @@ mod tests {
);
assert_eq!(
i2.uri,
Some("http://tracking.feedpress.it/link/10581/6726759/16_JohnAllen-CRAFT.mp3",)
Some(
"http://tracking.feedpress.it/link/10581/6726759/16_JohnAllen-CRAFT.mp3"
.to_string(),
)
);
assert_eq!(i2.description, Some(descr2));
assert_eq!(i2.length, Some(17964071));
@ -294,7 +305,10 @@ mod tests {
assert_eq!(i.title, Some("Hacking Devices with Kali Linux | LUP 214"));
assert_eq!(
i.uri,
Some("http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0214.mp3",)
Some(
"http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0214.mp3"
.to_string(),
)
);
assert_eq!(i.description, Some(descr));
assert_eq!(i.length, Some(46479789));
@ -317,7 +331,10 @@ mod tests {
assert_eq!(i2.title, Some("Gnome Does it Again | LUP 213"));
assert_eq!(
i2.uri,
Some("http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0213.mp3",)
Some(
"http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0213.mp3"
.to_string(),
)
);
assert_eq!(i2.description, Some(descr2));
assert_eq!(i2.length, Some(36544272));
@ -345,7 +362,8 @@ mod tests {
i.uri,
Some(
"http://request-for-explanation.github.\
io/podcast/ep9-a-once-in-a-lifetime-rfc/episode.mp3",
io/podcast/ep9-a-once-in-a-lifetime-rfc/episode.mp3"
.to_string(),
)
);
assert_eq!(i.description, Some(descr));
@ -371,7 +389,8 @@ mod tests {
i2.uri,
Some(
"http://request-for-explanation.github.\
io/podcast/ep8-an-existential-crisis/episode.mp3",
io/podcast/ep8-an-existential-crisis/episode.mp3"
.to_string(),
)
);
assert_eq!(i2.description, Some(descr2));

View File

@ -2,6 +2,7 @@ use rayon::prelude::*;
use chrono::prelude::*;
use diesel::sqlite::SqliteConnection;
use url::{Position, Url};
use errors::*;
use dbqueries;
@ -97,3 +98,13 @@ pub fn checkup() -> Result<()> {
played_cleaner()?;
Ok(())
}
pub fn url_cleaner(s: &str) -> String {
// Copied from the cookbook.
// https://rust-lang-nursery.github.io/rust-cookbook/net.html
// #remove-fragment-identifiers-and-query-pairs-from-a-url
match Url::parse(s) {
Ok(parsed) => parsed[..Position::AfterPath].to_owned(),
_ => s.trim().to_owned(),
}
}

View File

@ -2,6 +2,7 @@ use gtk;
use gtk::prelude::*;
use hammond_data::models::NewSource;
use hammond_data::utils::url_cleaner;
use podcasts_view::update_podcasts_view;
use utils;
@ -24,6 +25,7 @@ pub fn get_headerbar(stack: &gtk::Stack) -> gtk::HeaderBar {
add_button.connect_clicked(clone!(stack, add_popover => move |_| {
let url = new_url.get_text().unwrap_or_default();
let url = url_cleaner(&url);
on_add_bttn_clicked(&stack, &url);
// TODO: lock the button instead of hiding and add notification of feed added.