use ammonia; use rss::{Channel, Item}; use rfc822_sanitizer::parse_from_rfc2822_with_fallback; use models::insertables::{NewEpisode, NewEpisodeBuilder, NewPodcast, NewPodcastBuilder}; use utils::url_cleaner; use errors::*; // TODO: Extend the support for parsing itunes extensions /// Parses a `rss::Channel` into a `NewPodcast` Struct. pub(crate) fn new_podcast(chan: &Channel, source_id: i32) -> NewPodcast { let title = chan.title().trim(); let description = ammonia::clean(chan.description().trim()); let link = url_cleaner(chan.link()); let x = chan.itunes_ext().map(|s| s.image()); let image_uri = if let Some(img) = x { img.map(|s| url_cleaner(s)) } else { chan.image().map(|foo| url_cleaner(foo.url())) }; NewPodcastBuilder::default() .title(title) .description(description) .link(link) .image_uri(image_uri) .source_id(source_id) .build() .unwrap() } /// Parses an `rss::Item` into a `NewEpisode` Struct. pub(crate) fn new_episode(item: &Item, parent_id: i32) -> Result { let title = item.title().map(|s| s.trim().to_owned()); let description = item.description().map(|s| ammonia::clean(s.trim())); let guid = item.guid().map(|s| s.value().trim().to_owned()); // Its kinda weird this being an Option type. // Rss 2.0 specified that it's optional. // Though the db scema has a requirment of episode uri being Unique && Not Null. // TODO: Restructure let x = item.enclosure().map(|s| url_cleaner(s.url())); let uri = if x.is_some() { x.unwrap() } else if item.link().is_some() { item.link().map(|s| url_cleaner(s)).unwrap() } else { bail!("No url specified for the item.") }; let date = parse_from_rfc2822_with_fallback( // Default to rfc2822 represantation of epoch 0. item.pub_date().unwrap_or("Thu, 1 Jan 1970 00:00:00 +0000"), ); // Should treat information from the rss feeds as invalid by default. // Case: Thu, 05 Aug 2016 06:00:00 -0400 <-- Actually that was friday. let pub_date = date.map(|x| x.to_rfc2822()).ok(); let epoch = date.map(|x| x.timestamp() as i32).unwrap_or(0); let length = item.enclosure().map(|x| x.length().parse().unwrap_or(0)); Ok( NewEpisodeBuilder::default() .title(title) .uri(uri) .description(description) .length(length) .published_date(pub_date) .epoch(epoch) .guid(guid) .podcast_id(parent_id) .build() .unwrap() ) } #[cfg(test)] mod tests { use std::fs::File; use std::io::BufReader; use rss::Channel; use super::*; #[test] fn test_new_podcast_intercepted() { let file = File::open("tests/feeds/Intercepted.xml").unwrap(); let channel = Channel::read_from(BufReader::new(file)).unwrap(); let descr = "The people behind The Intercept’s fearless reporting and incisive \ commentary—Jeremy Scahill, Glenn Greenwald, Betsy Reed and others—discuss \ the crucial issues of our time: national security, civil liberties, foreign \ policy, and criminal justice. Plus interviews with artists, thinkers, and \ newsmakers who challenge our preconceptions about the world we live in."; let pd = new_podcast(&channel, 0); assert_eq!(pd.title(), "Intercepted with Jeremy Scahill"); assert_eq!(pd.link(), "https://theintercept.com/podcasts"); assert_eq!(pd.description(), descr); assert_eq!( pd.image_uri(), Some( "http://static.megaphone.fm/podcasts/d5735a50-d904-11e6-8532-73c7de466ea6/image/\ uploads_2F1484252190700-qhn5krasklbce3dh-a797539282700ea0298a3a26f7e49b0b_\ 2FIntercepted_COVER%2B_281_29.png" ) ); } #[test] fn test_new_podcast_breakthrough() { let file = File::open("tests/feeds/TheBreakthrough.xml").unwrap(); let channel = Channel::read_from(BufReader::new(file)).unwrap(); let descr = "Latest Articles and Investigations from ProPublica, an independent, \ non-profit newsroom that produces investigative journalism in the public \ interest."; let pd = new_podcast(&channel, 0); assert_eq!(pd.title(), "The Breakthrough"); assert_eq!(pd.link(), "http://www.propublica.org/podcast"); assert_eq!(pd.description(), descr); assert_eq!( pd.image_uri(), Some("http://www.propublica.org/images/podcast_logo_2.png") ); } #[test] fn test_new_podcast_lup() { let file = File::open("tests/feeds/LinuxUnplugged.xml").unwrap(); let channel = Channel::read_from(BufReader::new(file)).unwrap(); let descr = "An open show powered by community LINUX Unplugged takes the best attributes \ of open collaboration and focuses them into a weekly lifestyle show about \ Linux."; let pd = new_podcast(&channel, 0); assert_eq!(pd.title(), "LINUX Unplugged Podcast"); assert_eq!(pd.link(), "http://www.jupiterbroadcasting.com/"); assert_eq!(pd.description(), descr); assert_eq!( pd.image_uri(), Some("http://www.jupiterbroadcasting.com/images/LASUN-Badge1400.jpg") ); } #[test] fn test_new_podcast_r4explanation() { let file = File::open("tests/feeds/R4Explanation.xml").unwrap(); let channel = Channel::read_from(BufReader::new(file)).unwrap(); let pd = new_podcast(&channel, 0); let descr = "A weekly discussion of Rust RFCs"; assert_eq!(pd.title(), "Request For Explanation"); assert_eq!( pd.link(), "https://request-for-explanation.github.io/podcast/" ); assert_eq!(pd.description(), descr); assert_eq!( pd.image_uri(), Some("https://request-for-explanation.github.io/podcast/podcast.png") ); } #[test] fn test_new_episode_intercepted() { let file = File::open("tests/feeds/Intercepted.xml").unwrap(); let channel = Channel::read_from(BufReader::new(file)).unwrap(); let firstitem = channel.items().first().unwrap(); let descr = "NSA whistleblower Edward Snowden discusses the massive Equifax data breach \ and allegations of Russian interference in the US election. Commentator \ Shaun King explains his call for a boycott of the NFL and talks about his \ campaign to bring violent neo-Nazis to justice. Rapper Open Mike Eagle \ performs."; let i = new_episode(&firstitem, 0).unwrap(); assert_eq!(i.title(), Some("The Super Bowl of Racism")); assert_eq!(i.uri(), "http://traffic.megaphone.fm/PPY6458293736.mp3"); assert_eq!(i.description(), Some(descr)); assert_eq!(i.length(), Some(66738886)); assert_eq!(i.guid(), Some("7df4070a-9832-11e7-adac-cb37b05d5e24")); assert_eq!(i.published_date(), Some("Wed, 13 Sep 2017 10:00:00 +0000")); assert_eq!(i.epoch(), 1505296800); let second = channel.items().iter().nth(1).unwrap(); let i2 = new_episode(&second, 0).unwrap(); let descr2 = "This week on Intercepted: Jeremy gives an update on the aftermath of \ Blackwater’s 2007 massacre of Iraqi civilians. Intercept reporter Lee Fang \ lays out how a network of libertarian think tanks called the Atlas Network \ is insidiously shaping political infrastructure in Latin America. We speak \ with attorney and former Hugo Chavez adviser Eva Golinger about the \ Venezuela\'s political turmoil.And we hear Claudia Lizardo of the \ Caracas-based band, La Pequeña Revancha, talk about her music and hopes for \ Venezuela."; assert_eq!( i2.title(), Some("Atlas Golfed — U.S.-Backed Think Tanks Target Latin America") ); assert_eq!(i2.uri(), "http://traffic.megaphone.fm/FL5331443769.mp3"); assert_eq!(i2.description(), Some(descr2)); assert_eq!(i2.length(), Some(67527575)); assert_eq!(i2.guid(), Some("7c207a24-e33f-11e6-9438-eb45dcf36a1d")); assert_eq!(i2.published_date(), Some("Wed, 9 Aug 2017 10:00:00 +0000")); assert_eq!(i2.epoch(), 1502272800); } #[test] fn test_new_episode_breakthrough() { let file = File::open("tests/feeds/TheBreakthrough.xml").unwrap(); let channel = Channel::read_from(BufReader::new(file)).unwrap(); let firstitem = channel.items().first().unwrap(); let descr = "

A reporter finds that homes meant to replace New York’s troubled \ psychiatric hospitals might be just as bad.

"; let i = new_episode(&firstitem, 0).unwrap(); assert_eq!( i.title(), Some("The Breakthrough: Hopelessness and Exploitation Inside Homes for Mentally Ill") ); assert_eq!( i.uri(), "http://tracking.feedpress.it/link/10581/6726758/20170908-cliff-levy.mp3" ); assert_eq!(i.description(), Some(descr)); assert_eq!(i.length(), Some(33396551)); assert_eq!( i.guid(), Some( "https://www.propublica.org/podcast/\ the-breakthrough-hopelessness-exploitation-homes-for-mentally-ill#134472" ) ); assert_eq!(i.published_date(), Some("Fri, 8 Sep 2017 12:00:00 +0000")); assert_eq!(i.epoch(), 1504872000); let second = channel.items().iter().nth(1).unwrap(); let i2 = new_episode(&second, 0).unwrap(); let descr2 = "

Jonathan Allen and Amie Parnes didn’t know their book would be called \ ‘Shattered,’ or that their extraordinary access would let them chronicle \ the mounting signs of a doomed campaign.

"; assert_eq!( i2.title(), Some("The Breakthrough: Behind the Scenes of Hillary Clinton’s Failed Bid for \ President") ); assert_eq!( i2.uri(), "http://tracking.feedpress.it/link/10581/6726759/16_JohnAllen-CRAFT.mp3".to_string() ); assert_eq!(i2.description(), Some(descr2)); assert_eq!(i2.length(), Some(17964071)); assert_eq!( i2.guid(), Some( "https://www.propublica.\ org/podcast/the-breakthrough-hillary-clinton-failed-presidential-bid#133721" ) ); assert_eq!(i2.published_date(), Some("Fri, 25 Aug 2017 12:00:00 +0000")); assert_eq!(i2.epoch(), 1503662400); } #[test] fn test_new_episode_lup() { let file = File::open("tests/feeds/LinuxUnplugged.xml").unwrap(); let channel = Channel::read_from(BufReader::new(file)).unwrap(); let firstitem = channel.items().first().unwrap(); let descr = "Audit your network with a couple of easy commands on Kali Linux. Chris \ decides to blow off a little steam by attacking his IoT devices, Wes has \ the scope on Equifax blaming open source & the Beard just saved the \ show. It’s a really packed episode!"; let i = new_episode(&firstitem, 0).unwrap(); assert_eq!(i.title(), Some("Hacking Devices with Kali Linux | LUP 214")); assert_eq!( i.uri(), "http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0214.mp3" ); assert_eq!(i.description(), Some(descr)); assert_eq!(i.length(), Some(46479789)); assert_eq!(i.guid(), Some("78A682B4-73E8-47B8-88C0-1BE62DD4EF9D")); assert_eq!(i.published_date(), Some("Tue, 12 Sep 2017 22:24:42 -0700")); assert_eq!(i.epoch(), 1505280282); let second = channel.items().iter().nth(1).unwrap(); let i2 = new_episode(&second, 0).unwrap(); let descr2 = "

The Gnome project is about to solve one of our audience's biggest \ Wayland’s concerns. But as the project takes on a new level of relevance, \ decisions for the next version of Gnome have us worried about the \ future.

\n\n

Plus we chat with Wimpy about the Ubuntu Rally in NYC, \ Microsoft’s sneaky move to turn Windows 10 into the “ULTIMATE LINUX \ RUNTIME”, community news & more!

"; assert_eq!(i2.title(), Some("Gnome Does it Again | LUP 213")); assert_eq!( i2.uri(), "http://www.podtrac.com/pts/redirect.mp3/traffic.libsyn.com/jnite/lup-0213.mp3" ); assert_eq!(i2.description(), Some(descr2)); assert_eq!(i2.length(), Some(36544272)); assert_eq!(i2.guid(), Some("1CE57548-B36C-4F14-832A-5D5E0A24E35B")); assert_eq!(i2.published_date(), Some("Tue, 5 Sep 2017 20:57:27 -0700")); assert_eq!(i2.epoch(), 1504670247); } #[test] fn test_new_episode_r4expanation() { let file = File::open("tests/feeds/R4Explanation.xml").unwrap(); let channel = Channel::read_from(BufReader::new(file)).unwrap(); let firstitem = channel.items().iter().nth(9).unwrap(); let descr = "This week we look at RFC 2094 \"Non-lexical lifetimes\""; let i = new_episode(&firstitem, 0).unwrap(); assert_eq!(i.title(), Some("Episode #9 - A Once in a Lifetime RFC")); assert_eq!( i.uri(), "http://request-for-explanation.github.\ io/podcast/ep9-a-once-in-a-lifetime-rfc/episode.mp3" ); assert_eq!(i.description(), Some(descr)); assert_eq!(i.length(), Some(15077388)); assert_eq!( i.guid(), Some("https://request-for-explanation.github.io/podcast/ep9-a-once-in-a-lifetime-rfc/") ); assert_eq!(i.published_date(), Some("Mon, 28 Aug 2017 15:00:00 -0700")); assert_eq!(i.epoch(), 1503957600); let second = channel.items().iter().nth(8).unwrap(); let i2 = new_episode(&second, 0).unwrap(); let descr2 = "This week we look at RFC 2071 \"Add impl Trait type alias and \ variable declarations\""; assert_eq!(i2.title(), Some("Episode #8 - An Existential Crisis")); assert_eq!( i2.uri(), "http://request-for-explanation.github.io/podcast/ep8-an-existential-crisis/episode.\ mp3" ); assert_eq!(i2.description(), Some(descr2)); assert_eq!(i2.length(), Some(13713219)); assert_eq!( i2.guid(), Some("https://request-for-explanation.github.io/podcast/ep8-an-existential-crisis/") ); assert_eq!(i2.published_date(), Some("Tue, 15 Aug 2017 17:00:00 -0700")); assert_eq!(i2.epoch(), 1502841600); } }