diff --git a/Cargo.lock b/Cargo.lock index efb816d..4b8cd7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -685,6 +685,7 @@ dependencies = [ "hyper-tls 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", + "maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "native-tls 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -699,6 +700,7 @@ dependencies = [ "tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)", "url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "xdg 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "xml-rs 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2141,6 +2143,14 @@ name = "xdg" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "xml-rs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [metadata] "checksum adler32 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6cbd0b9af8587c72beadc9f72d35b9fbb070982c9e6203e46e93f10df25f8f45" "checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4" @@ -2377,3 +2387,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" "checksum ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" "checksum xdg 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a66b7c2281ebde13cf4391d70d4c7e5946c3c25e72a7b859ca8f677dcd0b0c61" +"checksum xml-rs 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3c1cb601d29fe2c2ac60a2b2e5e293994d87a1f6fa9687a31a15270f909be9c2" diff --git a/hammond-data/Cargo.toml b/hammond-data/Cargo.toml index 665351c..b8779df 100644 --- a/hammond-data/Cargo.toml +++ b/hammond-data/Cargo.toml @@ -16,6 +16,7 @@ rfc822_sanitizer = "0.3.3" rss = "1.5.0" url = "1.7.0" xdg = "2.1.0" +xml-rs = "0.7.0" futures = "0.1.21" hyper = "0.11.25" tokio-core = "0.1.17" @@ -43,6 +44,7 @@ rand = "0.4.2" tempdir = "0.3.7" criterion = "0.2.3" pretty_assertions = "0.5.1" +maplit = "1.0.1" [[bench]] name = "bench" diff --git a/hammond-data/src/errors.rs b/hammond-data/src/errors.rs index 6a0dd21..c2ea15f 100644 --- a/hammond-data/src/errors.rs +++ b/hammond-data/src/errors.rs @@ -5,6 +5,7 @@ use hyper; use native_tls; use rss; use url; +use xml; use std::io; @@ -49,6 +50,8 @@ pub enum DataError { IOError(#[cause] io::Error), #[fail(display = "RSS Error: {}", _0)] RssError(#[cause] rss::Error), + #[fail(display = "XML Reader Error: {}", _0)] + XmlReaderError(#[cause] xml::reader::Error), #[fail(display = "Error: {}", _0)] Bail(String), #[fail(display = "{}", _0)] @@ -115,6 +118,12 @@ impl From for DataError { } } +impl From for DataError { + fn from(err: xml::reader::Error) -> Self { + DataError::XmlReaderError(err) + } +} + impl From for DataError { fn from(err: String) -> Self { DataError::Bail(err) diff --git a/hammond-data/src/lib.rs b/hammond-data/src/lib.rs index 3c087b0..13847d5 100644 --- a/hammond-data/src/lib.rs +++ b/hammond-data/src/lib.rs @@ -29,6 +29,10 @@ #[macro_use] extern crate pretty_assertions; +#[cfg(test)] +#[macro_use] +extern crate maplit; + #[macro_use] extern crate derive_builder; #[macro_use] @@ -58,6 +62,8 @@ extern crate rss; extern crate tokio_core; extern crate url; extern crate xdg; +#[allow(unused)] +extern crate xml; pub mod database; #[allow(missing_docs)] @@ -66,6 +72,7 @@ pub mod dbqueries; pub mod errors; mod feed; pub(crate) mod models; +pub mod opml; mod parser; pub mod pipeline; mod schema; diff --git a/hammond-data/src/opml.rs b/hammond-data/src/opml.rs new file mode 100644 index 0000000..9da756a --- /dev/null +++ b/hammond-data/src/opml.rs @@ -0,0 +1,144 @@ +//! FIXME: Docs + +// #![allow(unused)] + +use errors::DataError; +use models::Source; +use xml::reader; + +use std::collections::HashSet; +use std::io::Read; + +// use std::fs::{File, OpenOptions}; +// use std::io::BufReader; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +// FIXME: Make it a Diesel model +/// Represents an `outline` xml element as per the `OPML` [specification][spec] +/// not `RSS` related sub-elements are ommited. +/// +/// [spec]: http://dev.opml.org/spec2.html +pub struct Opml { + title: String, + description: String, + url: String, +} + +/// Import feed url's from a `R` into the `Source` table. +pub fn opml_import(reader: R) -> Result>, DataError> { + let feeds = extract_sources(reader)?; + Ok(feeds + .iter() + .map(|opml| Source::from_url(&opml.url)) + .collect()) +} + +/// Extracts the `outline` elemnts from a reader `R` and returns a `HashSet` of `Opml` structs. +pub fn extract_sources(reader: R) -> Result, reader::Error> { + let mut list = HashSet::new(); + let parser = reader::EventReader::new(reader); + + parser + .into_iter() + .map(|e| match e { + Ok(reader::XmlEvent::StartElement { + name, attributes, .. + }) => { + if name.local_name == "outline" { + let mut title = String::new(); + let mut url = String::new(); + let mut description = String::new(); + + attributes.into_iter().for_each(|attribute| { + match attribute.name.local_name.as_str() { + "title" => title = attribute.value, + "xmlUrl" => url = attribute.value, + "description" => description = attribute.value, + _ => {} + } + }); + + let feed = Opml { + title, + description, + url, + }; + list.insert(feed); + } + Ok(()) + } + Err(err) => Err(err), + _ => Ok(()), + }) + .collect::, reader::Error>>()?; + + Ok(list) +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Local; + + #[test] + fn test_extract() { + let int_title = String::from("Intercepted with Jeremy Scahill"); + let int_url = String::from("https://feeds.feedburner.com/InterceptedWithJeremyScahill"); + let int_desc = + String::from( + "The people behind The Intercept’s fearless reporting and incisive \ + commentary—Jeremy Scahill, Glenn Greenwald, Betsy Reed and others—discuss the \ + crucial issues of our time: national security, civil liberties, foreign policy, \ + and criminal justice. Plus interviews with artists, thinkers, and newsmakers \ + who challenge our preconceptions about the world we live in.", + ); + + let dec_title = String::from("Deconstructed with Mehdi Hasan"); + let dec_url = String::from("https://rss.prod.firstlook.media/deconstructed/podcast.rss"); + let dec_desc = String::from( + "Journalist Mehdi Hasan is known around the world for his televised takedowns of \ + presidents and prime ministers. In this new podcast from The Intercept, Mehdi \ + unpacks a game-changing news event of the week while challenging the conventional \ + wisdom. As a Brit, a Muslim and an immigrant based in Donald Trump's Washington \ + D.C., Mehdi gives a refreshingly provocative perspective on the ups and downs of \ + American—and global—politics.", + ); + + #[cfg_attr(rustfmt, rustfmt_skip)] + let sample1 = format!( + " \ + \ + \ + Test OPML File \ + {} \ + http://www.opml.org/spec2 \ + \ + \ + \ + \ + \ + ", + Local::now().format("%a, %d %b %Y %T %Z"), + int_title, + int_desc, + int_url, + dec_title, + dec_desc, + dec_url, + ); + + let map = hashset![ + Opml { + title: int_title, + description: int_desc, + url: int_url + }, + Opml { + title: dec_title, + description: dec_desc, + url: dec_url + }, + ]; + assert_eq!(extract_sources(sample1.as_bytes()).unwrap(), map); + } +}