Parser: Add NewEpisodeMinimal struct.

Parsing whole episodes can be expensive and we only need
a subset to determine if it should be indexed/updated or ignored.
This commit is contained in:
Jordan Petridis 2018-01-16 18:33:32 +02:00
parent 978e5a61f6
commit f64779f70a
No known key found for this signature in database
GPG Key ID: CEABAD9F5683B9A6
2 changed files with 108 additions and 4 deletions

View File

@ -1,15 +1,27 @@
#![allow(unused_mut)]
use diesel::prelude::*;
use diesel;
use rss;
use ammonia;
use rfc822_sanitizer::parse_from_rfc2822_with_fallback as parse_rfc822;
use utils::{replace_extra_spaces, url_cleaner};
use schema::{episode, podcast, source};
use models::queryables::{Episode, Podcast, Source};
use dbqueries;
use database::connection;
use parser;
use errors::*;
use dbqueries;
use diesel;
use database::connection;
#[allow(dead_code)]
enum IndexState<T> {
Index(T),
Update(T),
NotChanged,
}
trait Insert {
fn insert(&self, &SqliteConnection) -> QueryResult<usize>;
@ -166,6 +178,19 @@ pub(crate) struct NewEpisode {
podcast_id: i32,
}
impl From<NewEpisodeMinimal> for NewEpisode {
fn from(e: NewEpisodeMinimal) -> Self {
NewEpisodeBuilder::default()
.title(e.title)
.uri(e.uri)
.duration(e.duration)
.epoch(e.epoch)
.podcast_id(e.podcast_id)
.build()
.unwrap()
}
}
impl Insert for NewEpisode {
fn insert(&self, con: &SqliteConnection) -> QueryResult<usize> {
use schema::episode::dsl::*;
@ -251,3 +276,82 @@ impl NewEpisode {
self.podcast_id
}
}
#[derive(Insertable, AsChangeset)]
#[table_name = "episode"]
#[derive(Debug, Clone, Default, Builder, PartialEq)]
#[builder(derive(Debug))]
#[builder(setter(into))]
pub(crate) struct NewEpisodeMinimal {
title: String,
uri: Option<String>,
duration: Option<i32>,
epoch: i32,
podcast_id: i32,
}
impl NewEpisodeMinimal {
#[allow(dead_code)]
fn new(item: &rss::Item, parent_id: i32) -> Result<Self> {
if item.title().is_none() {
bail!("No title specified for the item.")
}
let title = item.title().unwrap().trim().to_owned();
let uri = if let Some(url) = item.enclosure().map(|s| url_cleaner(s.url())) {
Some(url)
} else if item.link().is_some() {
item.link().map(|s| url_cleaner(s))
} else {
bail!("No url specified for the item.")
};
let date = parse_rfc822(
// Default to rfc2822 represantation of epoch 0.
item.pub_date().unwrap_or("Thu, 1 Jan 1970 00:00:00 +0000"),
);
// Should treat information from the rss feeds as invalid by default.
// Case: Thu, 05 Aug 2016 06:00:00 -0400 <-- Actually that was friday.
let epoch = date.map(|x| x.timestamp() as i32).unwrap_or(0);
let duration = parser::parse_itunes_duration(item);
Ok(NewEpisodeMinimalBuilder::default()
.title(title)
.uri(uri)
.duration(duration)
.epoch(epoch)
.podcast_id(parent_id)
.build()
.unwrap())
}
#[allow(dead_code)]
fn into_new_episode(self, item: &rss::Item) -> NewEpisode {
let guid = item.guid().map(|s| s.value().trim().to_owned());
let length = || -> Option<i32> { item.enclosure().map(|x| x.length().parse().ok())? }();
// Prefer itunes summary over rss.description since many feeds put html into
// rss.description.
let summary = item.itunes_ext().map(|s| s.summary()).and_then(|s| s);
let description = if summary.is_some() {
summary.map(|s| replace_extra_spaces(&ammonia::clean(s)))
} else {
item.description()
.map(|s| replace_extra_spaces(&ammonia::clean(s)))
};
NewEpisodeBuilder::default()
.title(self.title)
.uri(self.uri)
.duration(self.duration)
.epoch(self.epoch)
.podcast_id(self.podcast_id)
.guid(guid)
.length(length)
.description(description)
.build()
.unwrap()
}
}

View File

@ -93,7 +93,7 @@ pub(crate) fn new_episode(item: &Item, parent_id: i32) -> Result<NewEpisode> {
// FIXME: Rafactor
// TODO: Write tests
#[allow(non_snake_case)]
fn parse_itunes_duration(item: &Item) -> Option<i32> {
pub(crate) fn parse_itunes_duration(item: &Item) -> Option<i32> {
let duration = item.itunes_ext().map(|s| s.duration())??;
// FOR SOME FUCKING REASON, IN THE APPLE EXTENSION SPEC