From a286014a62b9e18675913420a4ce15878c4b45e3 Mon Sep 17 00:00:00 2001 From: Jordan Petridis Date: Tue, 14 Nov 2017 17:56:28 +0200 Subject: [PATCH] Refactored indexing functions of the Diesel models. --- Cargo.lock | 14 ++++- hammond-data/Cargo.toml | 2 +- hammond-data/benches/bench.rs | 96 +++++++++++++++++++--------------- hammond-data/src/index_feed.rs | 49 ++++++++--------- hammond-downloader/Cargo.toml | 2 +- 5 files changed, 88 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6f944f9..2b59315 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -250,6 +250,15 @@ dependencies = [ "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "diesel" +version = "0.16.0" +source = "git+https://github.com/diesel-rs/diesel.git#07f80c3a0d07daa26efff3166fbf0297dc0f0a7b" +dependencies = [ + "byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libsqlite3-sys 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "diesel" version = "0.16.0" @@ -538,7 +547,7 @@ name = "hammond-data" version = "0.1.0" dependencies = [ "chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "diesel 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", + "diesel 0.16.0 (git+https://github.com/diesel-rs/diesel.git)", "diesel_codegen 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", "dotenv 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", "error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -557,7 +566,7 @@ dependencies = [ name = "hammond-downloader" version = "0.1.0" dependencies = [ - "diesel 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", + "diesel 0.16.0 (git+https://github.com/diesel-rs/diesel.git)", "error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "hammond-data 0.1.0", "hyper 0.11.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1559,6 +1568,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum derive-error-chain 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3c9ca9ade651388daad7c993f005d0d20c4f6fe78c1cdc93e95f161c6f5ede4a" "checksum derive_builder 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "03600ae366b6eb2314e54d62adc833d9866da03798acc61c61789654ceaa227a" "checksum derive_builder_core 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eed37eae64daa5511467b1a55cebdf472deeaef108d22f62f25e8bbcaffd56ac" +"checksum diesel 0.16.0 (git+https://github.com/diesel-rs/diesel.git)" = "" "checksum diesel 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "304226fa7a3982b0405f6bb95dd9c10c3e2000709f194038a60ec2c277150951" "checksum diesel_codegen 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18a42ca5c9b660add51d58bc5a50a87123380e1e458069c5504528a851ed7384" "checksum diesel_infer_schema 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bf1957ff5cd3b04772e43c162c2f69c2aa918080ff9b020276792d236be8be52" diff --git a/hammond-data/Cargo.toml b/hammond-data/Cargo.toml index cba7bdb..4bf716e 100644 --- a/hammond-data/Cargo.toml +++ b/hammond-data/Cargo.toml @@ -18,7 +18,7 @@ xdg = "2.1.0" [dependencies.diesel] features = ["sqlite"] -version = "0.16.0" +git = "https://github.com/diesel-rs/diesel.git" [dependencies.diesel_codegen] features = ["sqlite"] diff --git a/hammond-data/benches/bench.rs b/hammond-data/benches/bench.rs index f69126d..be88041 100644 --- a/hammond-data/benches/bench.rs +++ b/hammond-data/benches/bench.rs @@ -15,15 +15,29 @@ use rand::Rng; use test::Bencher; use hammond_data::run_migration_on; -use hammond_data::index_feed::{complete_index, insert_return_source}; +use hammond_data::index_feed::{complete_index, insert_return_source, Database}; -use std::io::BufReader; +// use std::io::BufRead; use std::path::PathBuf; use std::sync::{Arc, Mutex}; -use std::fs; struct TempDB(tempdir::TempDir, PathBuf, SqliteConnection); +// Big rss feed +const PCPER: &[u8] = include_bytes!("feeds/pcpermp3.xml"); +const UNPLUGGED: &[u8] = include_bytes!("feeds/linuxunplugged.xml"); +const RADIO: &[u8] = include_bytes!("feeds/coderradiomp3.xml"); +const SNAP: &[u8] = include_bytes!("feeds/techsnapmp3.xml"); +const LAS: &[u8] = include_bytes!("feeds/TheLinuxActionShow.xml"); + +static URLS: &[(&[u8], &str)] = &[ + (PCPER, "https://www.pcper.com/rss/podcasts-mp3.rss"), + (UNPLUGGED, "http://feeds.feedburner.com/linuxunplugged"), + (RADIO, "https://feeds.feedburner.com/coderradiomp3"), + (SNAP, "https://feeds.feedburner.com/techsnapmp3"), + (LAS, "https://feeds2.feedburner.com/TheLinuxActionShow"), +]; + /// Create and return a Temporary DB. /// Will be destroed once the returned variable(s) is dropped. fn get_temp_db() -> TempDB { @@ -40,46 +54,42 @@ fn get_temp_db() -> TempDB { TempDB(tmp_dir, db_path, db) } -#[bench] -fn bench_index_test_files(b: &mut Bencher) { - let TempDB(_tmp_dir, _db_path, db) = get_temp_db(); - // complete_index runs in parallel so it requires a mutex as argument. - let m = Arc::new(Mutex::new(db)); +fn index_urls(m: &Database) { + URLS.par_iter().for_each(|&(buff, url)| { + // Create and insert a Source into db + let s = { + let temp = m.lock().unwrap(); + insert_return_source(&temp, url).unwrap() + }; + // parse it into a channel + let chan = rss::Channel::read_from(buff).unwrap(); - // include them in the binary to avoid loading from disk making file open syscalls. - let pcper = include_bytes!("feeds/pcpermp3.xml"); - let unplugged = include_bytes!("feeds/linuxunplugged.xml"); - let radio = include_bytes!("feeds/coderradiomp3.xml"); - let snap = include_bytes!("feeds/techsnapmp3.xml"); - let las = include_bytes!("feeds/TheLinuxActionShow.xml"); - - // vec of (&vec, url) tuples. - let urls = vec![ - (pcper.as_ref(), "https://www.pcper.com/rss/podcasts-mp3.rss"), - ( - unplugged.as_ref(), - "http://feeds.feedburner.com/linuxunplugged", - ), - (radio.as_ref(), "https://feeds.feedburner.com/coderradiomp3"), - (snap.as_ref(), "https://feeds.feedburner.com/techsnapmp3"), - ( - las.as_ref(), - "https://feeds2.feedburner.com/TheLinuxActionShow", - ), - ]; - - b.iter(|| { - urls.par_iter().for_each(|&(buff, url)| { - // Create and insert a Source into db - let s = { - let temp = m.lock().unwrap(); - insert_return_source(&temp, url).unwrap() - }; - // parse it into a channel - let chan = rss::Channel::read_from(buff).unwrap(); - - // Index the channel - complete_index(&m, &chan, &s).unwrap(); - }); + // Index the channel + complete_index(m, &chan, &s).unwrap(); + }); +} + +#[bench] +fn bench_index_feeds(b: &mut Bencher) { + let TempDB(_tmp_dir, _db_path, db) = get_temp_db(); + let m = Arc::new(Mutex::new(db)); + + b.iter(|| { + index_urls(&Arc::clone(&m)); + }); +} + +#[bench] +fn bench_index_unchanged_feeds(b: &mut Bencher) { + let TempDB(_tmp_dir, _db_path, db) = get_temp_db(); + let m = Arc::new(Mutex::new(db)); + + // Index first so it will only bench the comparison test case. + index_urls(&Arc::clone(&m)); + + b.iter(|| { + for _ in 0..10 { + index_urls(&Arc::clone(&m)); + } }); } diff --git a/hammond-data/src/index_feed.rs b/hammond-data/src/index_feed.rs index ac36a6d..b8e03a5 100644 --- a/hammond-data/src/index_feed.rs +++ b/hammond-data/src/index_feed.rs @@ -4,7 +4,6 @@ use rss; use reqwest; use rayon::prelude::*; -use schema; use dbqueries; use models::*; use errors::*; @@ -17,47 +16,41 @@ pub struct Feed(pub reqwest::Response, pub Source); pub type Database = Arc>; -fn index_source(con: &SqliteConnection, foo: &NewSource) -> QueryResult { - match dbqueries::get_source_from_uri(con, foo.uri) { - Ok(_) => Ok(1), - Err(_) => diesel::insert(foo).into(schema::source::table).execute(con), - } +fn index_source(con: &SqliteConnection, foo: &NewSource) { + use schema::source::dsl::*; + + // Throw away the result like `insert or ignore` + // Diesel deos not support `insert or ignore` yet. + let _ = diesel::insert_into(source).values(foo).execute(con); } fn index_podcast(con: &SqliteConnection, pd: &NewPodcast) -> Result<()> { + use schema::podcast::dsl::*; + match dbqueries::get_podcast_from_title(con, &pd.title) { - Ok(mut foo) => if foo.link() != pd.link || foo.description() != pd.description { - foo.set_link(&pd.link); - foo.set_description(&pd.description); - foo.set_image_uri(pd.image_uri.as_ref().map(|s| s.as_str())); - foo.save_changes::(con)?; + Ok(foo) => if foo.link() != pd.link || foo.description() != pd.description { + diesel::replace_into(podcast).values(pd).execute(con)?; }, Err(_) => { - diesel::insert(pd) - .into(schema::podcast::table) - .execute(con)?; + diesel::insert_into(podcast).values(pd).execute(con)?; } } Ok(()) } -fn index_episode(con: &SqliteConnection, ep: &NewEpisode) -> Result<()> { +// TODO: Currently using diesel from master git. +// Watch out for v0.99.0 beta and change the toml. +fn index_episode(con: &SqliteConnection, ep: &NewEpisode) -> QueryResult<()> { + use schema::episode::dsl::*; + match dbqueries::get_episode_from_uri(con, ep.uri.unwrap()) { - Ok(mut foo) => if foo.title() != ep.title + Ok(foo) => if foo.title() != ep.title || foo.published_date() != ep.published_date.as_ref().map(|x| x.as_str()) { - foo.set_title(ep.title); - foo.set_description(ep.description); - foo.set_published_date(ep.published_date.as_ref().map(|x| x.as_str())); - foo.set_guid(ep.guid); - foo.set_length(ep.length); - foo.set_epoch(ep.epoch); - foo.save_changes::(con)?; + diesel::replace_into(episode).values(ep).execute(con)?; }, Err(_) => { - diesel::insert(ep) - .into(schema::episode::table) - .execute(con)?; + diesel::insert_into(episode).values(ep).execute(con)?; } } Ok(()) @@ -65,7 +58,7 @@ fn index_episode(con: &SqliteConnection, ep: &NewEpisode) -> Result<()> { pub fn insert_return_source(con: &SqliteConnection, url: &str) -> Result { let foo = NewSource::new_with_uri(url); - index_source(con, &foo)?; + index_source(con, &foo); Ok(dbqueries::get_source_from_uri(con, foo.uri)?) } @@ -270,7 +263,7 @@ mod tests { inpt.iter().for_each(|feed| { let tempdb = db.lock().unwrap(); - index_source(&tempdb, &NewSource::new_with_uri(feed)).unwrap(); + index_source(&tempdb, &NewSource::new_with_uri(feed)); }); full_index_loop(&db).unwrap(); diff --git a/hammond-downloader/Cargo.toml b/hammond-downloader/Cargo.toml index f6347d3..14dc21d 100644 --- a/hammond-downloader/Cargo.toml +++ b/hammond-downloader/Cargo.toml @@ -15,7 +15,7 @@ tempdir = "0.3.5" [dependencies.diesel] features = ["sqlite"] -version = "0.16.0" +git = "https://github.com/diesel-rs/diesel.git" [dependencies.hammond-data] path = "../hammond-data"