NewEpidode: Use parse rss.description instead of itunes.summary.

We can deal with(sort of) html now, so we should start indexing
the proper rss description. Also cleanup commented out code.
This commit is contained in:
Jordan Petridis 2018-04-02 23:49:03 +03:00
parent a946ddfab1
commit a463753c84
4 changed files with 15 additions and 75 deletions

View File

@ -35,7 +35,6 @@ extern crate lazy_static;
#[macro_use] #[macro_use]
extern crate log; extern crate log;
extern crate ammonia;
extern crate chrono; extern crate chrono;
extern crate futures; extern crate futures;
extern crate futures_cpupool; extern crate futures_cpupool;

View File

@ -1,4 +1,3 @@
use ammonia;
use diesel; use diesel;
use diesel::prelude::*; use diesel::prelude::*;
use rfc822_sanitizer::parse_from_rfc2822_with_fallback as parse_rfc822; use rfc822_sanitizer::parse_from_rfc2822_with_fallback as parse_rfc822;
@ -10,7 +9,7 @@ use errors::DataError;
use models::{Episode, EpisodeMinimal, Index, Insert, Update}; use models::{Episode, EpisodeMinimal, Index, Insert, Update};
use parser; use parser;
use schema::episode; use schema::episode;
use utils::{replace_extra_spaces, url_cleaner}; use utils::url_cleaner;
#[derive(Insertable, AsChangeset)] #[derive(Insertable, AsChangeset)]
#[table_name = "episode"] #[table_name = "episode"]
@ -231,15 +230,7 @@ impl NewEpisodeMinimal {
pub(crate) fn into_new_episode(self, item: &rss::Item) -> NewEpisode { pub(crate) fn into_new_episode(self, item: &rss::Item) -> NewEpisode {
let length = || -> Option<i32> { item.enclosure().map(|x| x.length().parse().ok())? }(); let length = || -> Option<i32> { item.enclosure().map(|x| x.length().parse().ok())? }();
// Prefer itunes summary over rss.description since many feeds put html into let description = item.description().map(|s| s.to_owned());
// rss.description.
let summary = item.itunes_ext().map(|s| s.summary()).and_then(|s| s);
let description = if summary.is_some() {
summary.map(|s| replace_extra_spaces(&ammonia::clean(s)))
} else {
item.description()
.map(|s| replace_extra_spaces(&ammonia::clean(s)))
};
NewEpisodeBuilder::default() NewEpisodeBuilder::default()
.title(self.title) .title(self.title)
@ -413,7 +404,7 @@ mod tests {
static ref EXPECTED_LUP_1: NewEpisode = { static ref EXPECTED_LUP_1: NewEpisode = {
let descr = "Audit your network with a couple of easy commands on Kali Linux. Chris \ let descr = "Audit your network with a couple of easy commands on Kali Linux. Chris \
decides to blow off a little steam by attacking his IoT devices, Wes has \ decides to blow off a little steam by attacking his IoT devices, Wes has \
the scope on Equifax blaming open source &amp; the Beard just saved the \ the scope on Equifax blaming open source & the Beard just saved the \
show. Its a really packed episode!"; show. Its a really packed episode!";
NewEpisodeBuilder::default() NewEpisodeBuilder::default()
@ -431,12 +422,12 @@ mod tests {
.unwrap() .unwrap()
}; };
static ref EXPECTED_LUP_2: NewEpisode = { static ref EXPECTED_LUP_2: NewEpisode = {
let descr = "The Gnome project is about to solve one of our audience's biggest \ let descr =
Waylands concerns. But as the project takes on a new level of \ "<p>The Gnome project is about to solve one of our audience's biggest Waylands \
relevance, decisions for the next version of Gnome have us worried about \ concerns. But as the project takes on a new level of relevance, decisions for \
the future.\nPlus we chat with Wimpy about the Ubuntu Rally in NYC, \ the next version of Gnome have us worried about the future.</p>\n\n<p>Plus we \
Microsofts sneaky move to turn Windows 10 into the ULTIMATE LINUX \ chat with Wimpy about the Ubuntu Rally in NYC, Microsofts sneaky move to turn \
RUNTIME, community news &amp; more!"; Windows 10 into the ULTIMATE LINUX RUNTIME, community news & more!</p>";
NewEpisodeBuilder::default() NewEpisodeBuilder::default()
.title("Gnome Does it Again | LUP 213") .title("Gnome Does it Again | LUP 213")

View File

@ -11,7 +11,6 @@ use schema::podcast;
use database::connection; use database::connection;
use dbqueries; use dbqueries;
// use utils::{replace_extra_spaces, url_cleaner};
use utils::url_cleaner; use utils::url_cleaner;
#[derive(Insertable, AsChangeset)] #[derive(Insertable, AsChangeset)]
@ -92,15 +91,6 @@ impl NewPodcast {
let title = chan.title().trim(); let title = chan.title().trim();
let description = chan.description().trim(); let description = chan.description().trim();
// Prefer itunes summary over rss.description since many feeds put html into
// rss.description.
// let summary = chan.itunes_ext().map(|s| s.summary()).and_then(|s| s);
// let description = if let Some(sum) = summary {
// replace_extra_spaces(&ammonia::clean(sum))
// } else {
// replace_extra_spaces(&ammonia::clean(chan.description()))
// };
let link = url_cleaner(chan.link()); let link = url_cleaner(chan.link());
let itunes_img = chan.itunes_ext() let itunes_img = chan.itunes_ext()
.and_then(|s| s.image()) .and_then(|s| s.image())
@ -171,7 +161,7 @@ mod tests {
let descr = "The people behind The Intercepts fearless reporting and incisive \ let descr = "The people behind The Intercepts fearless reporting and incisive \
commentaryJeremy Scahill, Glenn Greenwald, Betsy Reed and \ commentaryJeremy Scahill, Glenn Greenwald, Betsy Reed and \
othersdiscuss the crucial issues of our time: national security, civil \ othersdiscuss the crucial issues of our time: national security, civil \
liberties, foreign policy, and criminal justice. Plus interviews with \ liberties, foreign policy, and criminal justice. Plus interviews with \
artists, thinkers, and newsmakers who challenge our preconceptions about \ artists, thinkers, and newsmakers who challenge our preconceptions about \
the world we live in."; the world we live in.";
@ -205,15 +195,15 @@ mod tests {
.unwrap() .unwrap()
}; };
static ref EXPECTED_TIPOFF: NewPodcast = { static ref EXPECTED_TIPOFF: NewPodcast = {
let desc = "Welcome to The Tip Off- the podcast where we take you behind the scenes \ let desc = "<p>Welcome to The Tip Off- the podcast where we take you behind the \
of some of the best investigative journalism from recent years. Each \ scenes of some of the best investigative journalism from recent years. \
episode well be digging into an investigative scoop- hearing from the \ Each episode well be digging into an investigative scoop- hearing from \
journalists behind the work as they tell us about the leads, the \ the journalists behind the work as they tell us about the leads, the \
dead-ends and of course, the tip offs. Therell be car chases, slammed \ dead-ends and of course, the tip offs. Therell be car chases, slammed \
doors, terrorist cells, meetings in dimly lit bars and cafes, wrangling \ doors, terrorist cells, meetings in dimly lit bars and cafes, wrangling \
with despotic regimes and much more. So if youre curious about the fun, \ with despotic regimes and much more. So if youre curious about the fun, \
complicated detective work that goes into doing great investigative \ complicated detective work that goes into doing great investigative \
journalism- then this is the podcast for you."; journalism- then this is the podcast for you.</p>";
NewPodcastBuilder::default() NewPodcastBuilder::default()
.title("The Tip Off") .title("The Tip Off")

View File

@ -327,44 +327,4 @@ mod tests {
let id = 000000000; let id = 000000000;
assert!(lookup_id(id).is_err()); assert!(lookup_id(id).is_err());
} }
#[test]
fn test_markup() {
let markup = html_to_pango_markup;
let m = markup("this is parsed");
assert_eq!(&m, "this is parsed");
let m = markup("this is <span>parsed</span>");
assert_eq!(&m, "this is &lt;span&gt;parsed&lt;/span&gt;");
let m = markup("this is &ssdf;");
assert_eq!(&m, "this is &amp;ssdf;");
let url = "http://url.com/test?param1&param2=test&param3#hashing";
let m = markup(&format!("this is &ssdf; {}", url));
assert_eq!(
&m,
&format!(
"this is &amp;ssdf; <a href=\"{0}\">{0}</a>",
url.replace('&', "&amp;")
)
);
for l in &[
("with links: http://gnome.org :D", "http://gnome.org"),
(
"with links: http://url.com/test.html&stuff :D",
"http://url.com/test.html&stuff",
),
] {
let m = markup(l.0);
assert_eq!(
&m,
&format!(
"with links: <a href=\"{0}\">{0}</a> :D",
l.1.replace('&', "&amp;")
)
);
}
}
} }