h-data: Implement a tail-recursion loop to follow redirects.

Follow http 301 permanent redirects by using a future::loop_fn.
It's kinda funcky, match_status still returns status_codes as erros
and a new DataError Variant had to be added to distiguise when we
should Loop::Continue. This could be cleaned up a lot.
This commit is contained in:
Jordan Petridis 2018-04-14 05:30:29 +03:00
parent 87421ce74d
commit c6a24e839a
No known key found for this signature in database
GPG Key ID: CEABAD9F5683B9A6
4 changed files with 71 additions and 48 deletions

View File

@ -8,6 +8,26 @@ use url;
use std::io;
use models::Source;
#[fail(display = "Request to {} returned {}. Context: {}", url, status_code, context)]
#[derive(Fail, Debug)]
pub struct HttpStatusError {
url: String,
status_code: hyper::StatusCode,
context: String,
}
impl HttpStatusError {
pub fn new(url: String, code: hyper::StatusCode, context: String) -> Self {
HttpStatusError {
url,
status_code: code,
context,
}
}
}
#[derive(Fail, Debug)]
pub enum DataError {
#[fail(display = "SQL Query failed: {}", _0)]
@ -31,12 +51,10 @@ pub enum DataError {
RssError(#[cause] rss::Error),
#[fail(display = "Error: {}", _0)]
Bail(String),
#[fail(display = "Request to {} returned {}. Context: {}", url, status_code, context)]
HttpStatusError {
url: String,
status_code: hyper::StatusCode,
context: String,
},
#[fail(display = "{}", _0)]
HttpStatusGeneral(HttpStatusError),
#[fail(display = "FIXME: This should be better")]
F301(Source),
#[fail(display = "Error occured while Parsing an Episode. Reason: {}", reason)]
ParseEpisodeError { reason: String, parent_id: i32 },
#[fail(display = "No Futures where produced to be run.")]

View File

@ -10,10 +10,11 @@ use hyper::{Client, Method, Request, Response, StatusCode, Uri};
use hyper_tls::HttpsConnector;
// use futures::future::ok;
use futures::future::{loop_fn, Future, Loop};
use futures::prelude::*;
use database::connection;
use errors::DataError;
use errors::*;
use feed::{Feed, FeedBuilder};
use models::{NewSource, Save};
use schema::source;
@ -102,11 +103,7 @@ impl Source {
}
fn make_err(self, context: &str, code: StatusCode) -> DataError {
DataError::HttpStatusError {
url: self.uri,
status_code: code,
context: context.into(),
}
DataError::HttpStatusGeneral(HttpStatusError::new(self.uri, code, context.into()))
}
// TODO match on more stuff
@ -128,7 +125,7 @@ impl Source {
StatusCode::MovedPermanently => {
error!("Feed was moved permanently.");
self.handle_301(&res)?;
return Err(self.make_err("301: Feed was moved permanently.", code));
return Err(DataError::F301(self));
}
StatusCode::TemporaryRedirect => debug!("307: Temporary Redirect."),
StatusCode::PermanentRedirect => warn!("308: Permanent Redirect."),
@ -151,8 +148,6 @@ impl Source {
self.last_modified = None;
self.save()?;
info!("Feed url was updated succesfully.");
// TODO: Refresh in place instead of next time, Not a priority.
info!("New content will be fetched with the next refesh.");
}
Ok(())
@ -177,12 +172,27 @@ impl Source {
// Refactor into TryInto once it lands on stable.
pub fn into_feed(
self,
client: &Client<HttpsConnector<HttpConnector>>,
client: Client<HttpsConnector<HttpConnector>>,
ignore_etags: bool,
) -> Box<Future<Item = Feed, Error = DataError>> {
let id = self.id();
let feed = self.request_constructor(client, ignore_etags)
.and_then(move |(_, res)| response_to_channel(res))
let response = loop_fn(self, move |source| {
source
.request_constructor(client.clone(), ignore_etags)
.then(|res| match res {
Ok(s) => Ok(Loop::Break(s)),
Err(err) => match err {
DataError::F301(s) => {
info!("Following redirect...");
Ok(Loop::Continue(s))
}
e => Err(e),
},
})
});
let feed = response
.and_then(|(_, res)| response_to_channel(res))
.and_then(move |chan| {
FeedBuilder::default()
.channel(chan)
@ -198,7 +208,7 @@ impl Source {
// #bools_are_just_2variant_enmus
fn request_constructor(
self,
client: &Client<HttpsConnector<HttpConnector>>,
client: Client<HttpsConnector<HttpConnector>>,
ignore_etags: bool,
) -> Box<Future<Item = (Self, Response), Error = DataError>> {
// FIXME: remove unwrap somehow
@ -230,7 +240,6 @@ impl Source {
let work = client
.request(req)
.map_err(From::from)
// TODO: tail recursion loop that would follow redirects directly
.and_then(move |res| self.match_status(res));
Box::new(work)
}
@ -271,7 +280,7 @@ mod tests {
let source = Source::from_url(url).unwrap();
let id = source.id();
let feed = source.into_feed(&client, true);
let feed = source.into_feed(client, true);
let feed = core.run(feed).unwrap();
let expected = get_feed("tests/feeds/2018-01-20-Intercepted.xml", id);

View File

@ -19,6 +19,25 @@ use Source;
// use std::sync::{Arc, Mutex};
// http://gtk-rs.org/tuto/closures
#[macro_export]
macro_rules! clone {
(@param _) => ( _ );
(@param $x:ident) => ( $x );
($($n:ident),+ => move || $body:expr) => (
{
$( let $n = $n.clone(); )+
move || $body
}
);
($($n:ident),+ => move |$($p:tt),+| $body:expr) => (
{
$( let $n = $n.clone(); )+
move |$(clone!(@param $p),)+| $body
}
);
}
/// The pipline to be run for indexing and updating a Podcast feed that originates from
/// `Source.uri`.
///
@ -33,7 +52,7 @@ pub fn pipeline<S: IntoIterator<Item = Source>>(
) -> Result<(), DataError> {
let list: Vec<_> = sources
.into_iter()
.map(move |s| s.into_feed(&client, ignore_etags))
.map(clone!(client => move |s| s.into_feed(client.clone(), ignore_etags)))
.map(|fut| fut.and_then(|feed| feed.index()))
.map(|fut| fut.map(|_| ()).map_err(|err| error!("Error: {}", err)))
.collect();
@ -73,7 +92,7 @@ pub fn index_single_source(s: Source, ignore_etags: bool) -> Result<(), DataErro
.connector(HttpsConnector::new(num_cpus::get(), &handle)?)
.build(&handle);
let work = s.into_feed(&client, ignore_etags)
let work = s.into_feed(client, ignore_etags)
.and_then(move |feed| feed.index())
.map(|_| ());

View File

@ -140,34 +140,11 @@ fn refresh_feed(source: Option<Vec<Source>>, sender: Sender<Action>) -> Result<(
sender.send(Action::HeaderBarShowUpdateIndicator)?;
rayon::spawn(move || {
let mut sources = source.unwrap_or_else(|| {
let sources = source.unwrap_or_else(|| {
dbqueries::get_sources().expect("Failed to retrieve Sources from the database.")
});
// Work around to improve the feed addition experience.
// Many times links to rss feeds are just redirects(usually to an https
// version). Sadly I haven't figured yet a nice way to follow up links
// redirects without getting to lifetime hell with futures and hyper.
// So the requested refresh is only of 1 feed, and the feed fails to be indexed,
// (as a 301 redict would update the source entry and exit), another refresh is
// run. For more see hammond_data/src/models/source.rs `fn
// request_constructor`. also ping me on irc if or open an issue if you
// want to tackle it.
if sources.len() == 1 {
let source = sources.remove(0);
let id = source.id();
if let Err(err) = pipeline::index_single_source(source, false) {
error!("Error While trying to update the database.");
error!("Error msg: {}", err);
if let Ok(source) = dbqueries::get_source_from_id(id) {
if let Err(err) = pipeline::index_single_source(source, false) {
error!("Error While trying to update the database.");
error!("Error msg: {}", err);
}
}
}
// This is what would normally run
} else if let Err(err) = pipeline::run(sources, false) {
if let Err(err) = pipeline::run(sources, false) {
error!("Error While trying to update the database.");
error!("Error msg: {}", err);
}