hammond_data: Move parse_episode from parser into a NewEpisode method.
This commit is contained in:
parent
bd9844f012
commit
8174fe0bac
@ -85,7 +85,7 @@ impl Feed {
|
|||||||
let items = self.channel.items();
|
let items = self.channel.items();
|
||||||
let new_episodes: Vec<_> = items
|
let new_episodes: Vec<_> = items
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.filter_map(|item| parser::new_episode(item, pd.id()).ok())
|
.filter_map(|item| NewEpisode::new(item, pd.id()).ok())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
new_episodes
|
new_episodes
|
||||||
@ -97,7 +97,7 @@ impl Feed {
|
|||||||
let episodes = self.channel
|
let episodes = self.channel
|
||||||
.items()
|
.items()
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.map(|item| result(parser::new_episode(item, pd.id())))
|
.map(|item| result(NewEpisode::new(item, pd.id())))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Box::new(episodes)
|
Box::new(episodes)
|
||||||
|
|||||||
@ -210,6 +210,12 @@ impl Update for NewEpisode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl NewEpisode {
|
impl NewEpisode {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
/// Parses an `rss::Item` into a `NewEpisode` Struct.
|
||||||
|
pub(crate) fn new(item: &rss::Item, podcast_id: i32) -> Result<Self> {
|
||||||
|
NewEpisodeMinimal::new(item, podcast_id).map(|ep| ep.into_new_episode(item))
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Refactor into batch indexes instead.
|
// TODO: Refactor into batch indexes instead.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub(crate) fn into_episode(self, con: &SqliteConnection) -> Result<Episode> {
|
pub(crate) fn into_episode(self, con: &SqliteConnection) -> Result<Episode> {
|
||||||
@ -227,7 +233,7 @@ impl NewEpisode {
|
|||||||
match ep {
|
match ep {
|
||||||
Ok(foo) => {
|
Ok(foo) => {
|
||||||
if foo.podcast_id() != self.podcast_id {
|
if foo.podcast_id() != self.podcast_id {
|
||||||
error!("NEP pid: {}, EP pid: {}", self.podcast_id, foo.podcast_id());
|
error!("NEP pid: {}\nEP pid: {}", self.podcast_id, foo.podcast_id());
|
||||||
};
|
};
|
||||||
|
|
||||||
if foo.title() != self.title.as_str() || foo.epoch() != self.epoch
|
if foo.title() != self.title.as_str() || foo.epoch() != self.epoch
|
||||||
|
|||||||
@ -1,12 +1,11 @@
|
|||||||
use ammonia;
|
use ammonia;
|
||||||
use rss::{Channel, Item};
|
use rss::{Channel, Item};
|
||||||
use rfc822_sanitizer::parse_from_rfc2822_with_fallback;
|
|
||||||
|
|
||||||
use models::insertables::{NewEpisode, NewEpisodeBuilder, NewPodcast, NewPodcastBuilder};
|
use models::insertables::{NewPodcast, NewPodcastBuilder};
|
||||||
use utils::url_cleaner;
|
use utils::url_cleaner;
|
||||||
use utils::replace_extra_spaces;
|
use utils::replace_extra_spaces;
|
||||||
|
|
||||||
use errors::*;
|
// use errors::*;
|
||||||
|
|
||||||
/// Parses a `rss::Channel` into a `NewPodcast` Struct.
|
/// Parses a `rss::Channel` into a `NewPodcast` Struct.
|
||||||
pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast {
|
pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast {
|
||||||
@ -38,57 +37,6 @@ pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast {
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses an `rss::Item` into a `NewEpisode` Struct.
|
|
||||||
pub(crate) fn new_episode(item: &Item, parent_id: i32) -> Result<NewEpisode> {
|
|
||||||
if item.title().is_none() {
|
|
||||||
bail!("No title specified for the item.")
|
|
||||||
}
|
|
||||||
|
|
||||||
let title = item.title().unwrap().trim().to_owned();
|
|
||||||
let guid = item.guid().map(|s| s.value().trim().to_owned());
|
|
||||||
|
|
||||||
// Prefer itunes summary over rss.description since many feeds put html into rss.description.
|
|
||||||
let summary = item.itunes_ext().map(|s| s.summary()).and_then(|s| s);
|
|
||||||
let description = if summary.is_some() {
|
|
||||||
summary.map(|s| replace_extra_spaces(&ammonia::clean(s)))
|
|
||||||
} else {
|
|
||||||
item.description()
|
|
||||||
.map(|s| replace_extra_spaces(&ammonia::clean(s)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let uri = if let Some(url) = item.enclosure().map(|s| url_cleaner(s.url())) {
|
|
||||||
Some(url)
|
|
||||||
} else if item.link().is_some() {
|
|
||||||
item.link().map(|s| url_cleaner(s))
|
|
||||||
} else {
|
|
||||||
bail!("No url specified for the item.")
|
|
||||||
};
|
|
||||||
|
|
||||||
let date = parse_from_rfc2822_with_fallback(
|
|
||||||
// Default to rfc2822 represantation of epoch 0.
|
|
||||||
item.pub_date().unwrap_or("Thu, 1 Jan 1970 00:00:00 +0000"),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Should treat information from the rss feeds as invalid by default.
|
|
||||||
// Case: Thu, 05 Aug 2016 06:00:00 -0400 <-- Actually that was friday.
|
|
||||||
let epoch = date.map(|x| x.timestamp() as i32).unwrap_or(0);
|
|
||||||
|
|
||||||
let length = || -> Option<i32> { item.enclosure().map(|x| x.length().parse().ok())? }();
|
|
||||||
let duration = parse_itunes_duration(item);
|
|
||||||
|
|
||||||
Ok(NewEpisodeBuilder::default()
|
|
||||||
.title(title)
|
|
||||||
.uri(uri)
|
|
||||||
.description(description)
|
|
||||||
.length(length)
|
|
||||||
.duration(duration)
|
|
||||||
.epoch(epoch)
|
|
||||||
.guid(guid)
|
|
||||||
.podcast_id(parent_id)
|
|
||||||
.build()
|
|
||||||
.unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses an Item Itunes extension and returns it's duration value in seconds.
|
/// Parses an Item Itunes extension and returns it's duration value in seconds.
|
||||||
// FIXME: Rafactor
|
// FIXME: Rafactor
|
||||||
// TODO: Write tests
|
// TODO: Write tests
|
||||||
@ -124,6 +72,7 @@ mod tests {
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
use rss;
|
use rss;
|
||||||
|
use models::insertables::{NewEpisode, NewEpisodeBuilder};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@ -291,7 +240,7 @@ mod tests {
|
|||||||
campaign to bring violent neo-Nazis to justice. Rapper Open Mike Eagle \
|
campaign to bring violent neo-Nazis to justice. Rapper Open Mike Eagle \
|
||||||
performs.";
|
performs.";
|
||||||
|
|
||||||
let ep = new_episode(&firstitem, 0).unwrap();
|
let ep = NewEpisode::new(&firstitem, 0).unwrap();
|
||||||
let expected = NewEpisodeBuilder::default()
|
let expected = NewEpisodeBuilder::default()
|
||||||
.title("The Super Bowl of Racism")
|
.title("The Super Bowl of Racism")
|
||||||
.uri(Some(String::from(
|
.uri(Some(String::from(
|
||||||
@ -308,7 +257,7 @@ mod tests {
|
|||||||
assert_eq!(ep, expected);
|
assert_eq!(ep, expected);
|
||||||
|
|
||||||
let second = channel.items().iter().nth(1).unwrap();
|
let second = channel.items().iter().nth(1).unwrap();
|
||||||
let ep = new_episode(&second, 0).unwrap();
|
let ep = NewEpisode::new(&second, 0).unwrap();
|
||||||
|
|
||||||
let descr = "This week on Intercepted: Jeremy gives an update on the aftermath of \
|
let descr = "This week on Intercepted: Jeremy gives an update on the aftermath of \
|
||||||
Blackwater’s 2007 massacre of Iraqi civilians. Intercept reporter Lee Fang \
|
Blackwater’s 2007 massacre of Iraqi civilians. Intercept reporter Lee Fang \
|
||||||
@ -344,7 +293,7 @@ mod tests {
|
|||||||
let descr =
|
let descr =
|
||||||
"A reporter finds that homes meant to replace New York’s troubled psychiatric \
|
"A reporter finds that homes meant to replace New York’s troubled psychiatric \
|
||||||
hospitals might be just as bad.";
|
hospitals might be just as bad.";
|
||||||
let ep = new_episode(&firstitem, 0).unwrap();
|
let ep = NewEpisode::new(&firstitem, 0).unwrap();
|
||||||
|
|
||||||
let expected = NewEpisodeBuilder::default()
|
let expected = NewEpisodeBuilder::default()
|
||||||
.title("The Breakthrough: Hopelessness and Exploitation Inside Homes for Mentally Ill")
|
.title("The Breakthrough: Hopelessness and Exploitation Inside Homes for Mentally Ill")
|
||||||
@ -361,7 +310,7 @@ mod tests {
|
|||||||
assert_eq!(ep, expected);
|
assert_eq!(ep, expected);
|
||||||
|
|
||||||
let second = channel.items().iter().nth(1).unwrap();
|
let second = channel.items().iter().nth(1).unwrap();
|
||||||
let ep = new_episode(&second, 0).unwrap();
|
let ep = NewEpisode::new(&second, 0).unwrap();
|
||||||
let descr =
|
let descr =
|
||||||
"Jonathan Allen and Amie Parnes didn’t know their book would be called \
|
"Jonathan Allen and Amie Parnes didn’t know their book would be called \
|
||||||
‘Shattered,’ or that their extraordinary access would let them chronicle the \
|
‘Shattered,’ or that their extraordinary access would let them chronicle the \
|
||||||
@ -400,7 +349,7 @@ mod tests {
|
|||||||
decides to blow off a little steam by attacking his IoT devices, Wes has the \
|
decides to blow off a little steam by attacking his IoT devices, Wes has the \
|
||||||
scope on Equifax blaming open source & the Beard just saved the show. \
|
scope on Equifax blaming open source & the Beard just saved the show. \
|
||||||
It’s a really packed episode!";
|
It’s a really packed episode!";
|
||||||
let ep = new_episode(&firstitem, 0).unwrap();
|
let ep = NewEpisode::new(&firstitem, 0).unwrap();
|
||||||
|
|
||||||
let expected = NewEpisodeBuilder::default()
|
let expected = NewEpisodeBuilder::default()
|
||||||
.title("Hacking Devices with Kali Linux | LUP 214")
|
.title("Hacking Devices with Kali Linux | LUP 214")
|
||||||
@ -418,7 +367,7 @@ mod tests {
|
|||||||
assert_eq!(ep, expected);
|
assert_eq!(ep, expected);
|
||||||
|
|
||||||
let second = channel.items().iter().nth(1).unwrap();
|
let second = channel.items().iter().nth(1).unwrap();
|
||||||
let ep = new_episode(&second, 0).unwrap();
|
let ep = NewEpisode::new(&second, 0).unwrap();
|
||||||
|
|
||||||
let descr =
|
let descr =
|
||||||
"The Gnome project is about to solve one of our audience's biggest Wayland’s \
|
"The Gnome project is about to solve one of our audience's biggest Wayland’s \
|
||||||
@ -451,7 +400,7 @@ mod tests {
|
|||||||
let firstitem = channel.items().iter().nth(9).unwrap();
|
let firstitem = channel.items().iter().nth(9).unwrap();
|
||||||
let descr = "This week we look at <a href=\"https://github.com/rust-lang/rfcs/pull/2094\" \
|
let descr = "This week we look at <a href=\"https://github.com/rust-lang/rfcs/pull/2094\" \
|
||||||
rel=\"noopener noreferrer\">RFC 2094</a> \"Non-lexical lifetimes\"";
|
rel=\"noopener noreferrer\">RFC 2094</a> \"Non-lexical lifetimes\"";
|
||||||
let ep = new_episode(&firstitem, 0).unwrap();
|
let ep = NewEpisode::new(&firstitem, 0).unwrap();
|
||||||
|
|
||||||
let expected = NewEpisodeBuilder::default()
|
let expected = NewEpisodeBuilder::default()
|
||||||
.title("Episode #9 - A Once in a Lifetime RFC")
|
.title("Episode #9 - A Once in a Lifetime RFC")
|
||||||
@ -472,7 +421,7 @@ mod tests {
|
|||||||
assert_eq!(ep, expected);
|
assert_eq!(ep, expected);
|
||||||
|
|
||||||
let second = channel.items().iter().nth(8).unwrap();
|
let second = channel.items().iter().nth(8).unwrap();
|
||||||
let ep = new_episode(&second, 0).unwrap();
|
let ep = NewEpisode::new(&second, 0).unwrap();
|
||||||
|
|
||||||
let descr = "This week we look at <a href=\"https://github.com/rust-lang/rfcs/pull/2071\" \
|
let descr = "This week we look at <a href=\"https://github.com/rust-lang/rfcs/pull/2071\" \
|
||||||
rel=\"noopener noreferrer\">RFC 2071</a> \"Add impl Trait type alias and \
|
rel=\"noopener noreferrer\">RFC 2071</a> \"Add impl Trait type alias and \
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user