NewEpidode: Use parse rss.description instead of itunes.summary.

We can deal with(sort of) html now, so we should start indexing
the proper rss description. Also cleanup commented out code.
This commit is contained in:
Jordan Petridis 2018-04-02 23:49:03 +03:00
parent a946ddfab1
commit a463753c84
4 changed files with 15 additions and 75 deletions

View File

@ -35,7 +35,6 @@ extern crate lazy_static;
#[macro_use]
extern crate log;
extern crate ammonia;
extern crate chrono;
extern crate futures;
extern crate futures_cpupool;

View File

@ -1,4 +1,3 @@
use ammonia;
use diesel;
use diesel::prelude::*;
use rfc822_sanitizer::parse_from_rfc2822_with_fallback as parse_rfc822;
@ -10,7 +9,7 @@ use errors::DataError;
use models::{Episode, EpisodeMinimal, Index, Insert, Update};
use parser;
use schema::episode;
use utils::{replace_extra_spaces, url_cleaner};
use utils::url_cleaner;
#[derive(Insertable, AsChangeset)]
#[table_name = "episode"]
@ -231,15 +230,7 @@ impl NewEpisodeMinimal {
pub(crate) fn into_new_episode(self, item: &rss::Item) -> NewEpisode {
let length = || -> Option<i32> { item.enclosure().map(|x| x.length().parse().ok())? }();
// Prefer itunes summary over rss.description since many feeds put html into
// rss.description.
let summary = item.itunes_ext().map(|s| s.summary()).and_then(|s| s);
let description = if summary.is_some() {
summary.map(|s| replace_extra_spaces(&ammonia::clean(s)))
} else {
item.description()
.map(|s| replace_extra_spaces(&ammonia::clean(s)))
};
let description = item.description().map(|s| s.to_owned());
NewEpisodeBuilder::default()
.title(self.title)
@ -413,7 +404,7 @@ mod tests {
static ref EXPECTED_LUP_1: NewEpisode = {
let descr = "Audit your network with a couple of easy commands on Kali Linux. Chris \
decides to blow off a little steam by attacking his IoT devices, Wes has \
the scope on Equifax blaming open source &amp; the Beard just saved the \
the scope on Equifax blaming open source & the Beard just saved the \
show. Its a really packed episode!";
NewEpisodeBuilder::default()
@ -431,12 +422,12 @@ mod tests {
.unwrap()
};
static ref EXPECTED_LUP_2: NewEpisode = {
let descr = "The Gnome project is about to solve one of our audience's biggest \
Waylands concerns. But as the project takes on a new level of \
relevance, decisions for the next version of Gnome have us worried about \
the future.\nPlus we chat with Wimpy about the Ubuntu Rally in NYC, \
Microsofts sneaky move to turn Windows 10 into the ULTIMATE LINUX \
RUNTIME, community news &amp; more!";
let descr =
"<p>The Gnome project is about to solve one of our audience's biggest Waylands \
concerns. But as the project takes on a new level of relevance, decisions for \
the next version of Gnome have us worried about the future.</p>\n\n<p>Plus we \
chat with Wimpy about the Ubuntu Rally in NYC, Microsofts sneaky move to turn \
Windows 10 into the ULTIMATE LINUX RUNTIME, community news & more!</p>";
NewEpisodeBuilder::default()
.title("Gnome Does it Again | LUP 213")

View File

@ -11,7 +11,6 @@ use schema::podcast;
use database::connection;
use dbqueries;
// use utils::{replace_extra_spaces, url_cleaner};
use utils::url_cleaner;
#[derive(Insertable, AsChangeset)]
@ -92,15 +91,6 @@ impl NewPodcast {
let title = chan.title().trim();
let description = chan.description().trim();
// Prefer itunes summary over rss.description since many feeds put html into
// rss.description.
// let summary = chan.itunes_ext().map(|s| s.summary()).and_then(|s| s);
// let description = if let Some(sum) = summary {
// replace_extra_spaces(&ammonia::clean(sum))
// } else {
// replace_extra_spaces(&ammonia::clean(chan.description()))
// };
let link = url_cleaner(chan.link());
let itunes_img = chan.itunes_ext()
.and_then(|s| s.image())
@ -171,7 +161,7 @@ mod tests {
let descr = "The people behind The Intercepts fearless reporting and incisive \
commentaryJeremy Scahill, Glenn Greenwald, Betsy Reed and \
othersdiscuss the crucial issues of our time: national security, civil \
liberties, foreign policy, and criminal justice. Plus interviews with \
liberties, foreign policy, and criminal justice. Plus interviews with \
artists, thinkers, and newsmakers who challenge our preconceptions about \
the world we live in.";
@ -205,15 +195,15 @@ mod tests {
.unwrap()
};
static ref EXPECTED_TIPOFF: NewPodcast = {
let desc = "Welcome to The Tip Off- the podcast where we take you behind the scenes \
of some of the best investigative journalism from recent years. Each \
episode well be digging into an investigative scoop- hearing from the \
journalists behind the work as they tell us about the leads, the \
let desc = "<p>Welcome to The Tip Off- the podcast where we take you behind the \
scenes of some of the best investigative journalism from recent years. \
Each episode well be digging into an investigative scoop- hearing from \
the journalists behind the work as they tell us about the leads, the \
dead-ends and of course, the tip offs. Therell be car chases, slammed \
doors, terrorist cells, meetings in dimly lit bars and cafes, wrangling \
with despotic regimes and much more. So if youre curious about the fun, \
complicated detective work that goes into doing great investigative \
journalism- then this is the podcast for you.";
journalism- then this is the podcast for you.</p>";
NewPodcastBuilder::default()
.title("The Tip Off")

View File

@ -327,44 +327,4 @@ mod tests {
let id = 000000000;
assert!(lookup_id(id).is_err());
}
#[test]
fn test_markup() {
let markup = html_to_pango_markup;
let m = markup("this is parsed");
assert_eq!(&m, "this is parsed");
let m = markup("this is <span>parsed</span>");
assert_eq!(&m, "this is &lt;span&gt;parsed&lt;/span&gt;");
let m = markup("this is &ssdf;");
assert_eq!(&m, "this is &amp;ssdf;");
let url = "http://url.com/test?param1&param2=test&param3#hashing";
let m = markup(&format!("this is &ssdf; {}", url));
assert_eq!(
&m,
&format!(
"this is &amp;ssdf; <a href=\"{0}\">{0}</a>",
url.replace('&', "&amp;")
)
);
for l in &[
("with links: http://gnome.org :D", "http://gnome.org"),
(
"with links: http://url.com/test.html&stuff :D",
"http://url.com/test.html&stuff",
),
] {
let m = markup(l.0);
assert_eq!(
&m,
&format!(
"with links: <a href=\"{0}\">{0}</a> :D",
l.1.replace('&', "&amp;")
)
);
}
}
}