diff --git a/rust/Cargo.lock b/rust/Cargo.lock index d1460b0..585c3ba 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -97,6 +97,17 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "bstr" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -441,9 +452,19 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] [[package]] name = "memmap2" @@ -975,10 +996,22 @@ checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" name = "rust" version = "0.1.0" dependencies = [ + "bstr", + "fast-float", "hashbrown", + "memchr", + "memmap", "polars", + "rayon", + "rustc-hash", ] +[[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + [[package]] name = "rustversion" version = "1.0.15" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 682f426..af4ec78 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -6,5 +6,14 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bstr = "1.9.1" +fast-float = "0.2.0" hashbrown = "0.14.3" +memchr = "2.7.4" +memmap = "0.7.0" polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]} +rayon = "1.10.0" +rustc-hash = "2.0.0" + +[build] +rustflags = ["-C target-cpu=native"] diff --git a/rust/src/bin/multi_threaded.rs b/rust/src/bin/multi_threaded.rs index 1d07386..519b99e 100644 --- a/rust/src/bin/multi_threaded.rs +++ b/rust/src/bin/multi_threaded.rs @@ -15,9 +15,11 @@ struct StationMeasurements { sum: f64, } +const DEFAULT_HASHMAP_LENGTH: usize = 10000; + fn main() { let stations: Arc>> = - Arc::new(Mutex::new(HashMap::new())); + Arc::new(Mutex::new(HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH))); let cores: usize = std::thread::available_parallelism().unwrap().into(); @@ -29,7 +31,8 @@ fn main() { let line_chunk = reader.lines().skip(chunk_length * i).take(chunk_length); let stations_clone = stations.clone(); let handle = thread::spawn(move || { - let mut t_stations: HashMap = HashMap::new(); + let mut t_stations: HashMap = + HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let mut line_num = 0; for line in line_chunk { line_num += 1; diff --git a/rust/src/bin/referenceImpl.rs b/rust/src/bin/referenceImpl.rs new file mode 100644 index 0000000..5569fc9 --- /dev/null +++ b/rust/src/bin/referenceImpl.rs @@ -0,0 +1,116 @@ +use bstr::{BStr, ByteSlice}; +use memmap::MmapOptions; +use rustc_hash::FxHashMap as HashMap; +use std::{fmt::Display, fs::File}; + +use rayon::prelude::*; + +#[derive(Debug)] +struct State { + min: f64, + max: f64, + count: u64, + sum: f64, +} + +impl Default for State { + fn default() -> Self { + Self { + min: f64::MAX, + max: f64::MIN, + count: 0, + sum: 0.0, + } + } +} + +impl Display for State { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let avg = self.sum / (self.count as f64); + write!(f, "{:.1}/{avg:.1}/{:.1}", self.min, self.max) + } +} + +impl State { + fn update(&mut self, v: f64) { + self.min = self.min.min(v); + self.max = self.max.max(v); + self.count += 1; + self.sum += v; + } + + fn merge(&mut self, other: &Self) { + self.min = self.min.min(other.min); + self.max = self.max.max(other.max); + self.count += other.count; + self.sum += other.sum; + } +} + +fn make_map<'a>(i: impl Iterator) -> HashMap<&'a BStr, State> { + let mut state: HashMap<&'a BStr, State> = Default::default(); + for line in i { + let (name, value) = line.split_once_str(&[b';']).unwrap(); + let value = fast_float::parse(value).unwrap(); + state.entry(name.into()).or_default().update(value); + } + state +} + +fn solve_for_part((start, end): (usize, usize), mem: &[u8]) -> HashMap<&BStr, State> { + make_map((&mem[start..end]).lines()) +} + +fn merge<'a>(a: &mut HashMap<&'a BStr, State>, b: &HashMap<&'a BStr, State>) { + for (k, v) in b { + a.entry(k).or_default().merge(v); + } +} + +fn main() { + let cores: usize = std::thread::available_parallelism().unwrap().into(); + let path = match std::env::args().skip(1).next() { + Some(path) => path, + None => "measurements.txt".to_owned(), + }; + let file = File::open(path).unwrap(); + let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; + + let chunk_size = mmap.len() / cores; + let mut chunks: Vec<(usize, usize)> = vec![]; + let mut start = 0; + for _ in 0..cores { + let end = (start + chunk_size).min(mmap.len()); + let next_new_line = match memchr::memchr(b'\n', &mmap[end..]) { + Some(v) => v, + None => { + assert_eq!(end, mmap.len()); + 0 + } + }; + let end = end + next_new_line; + chunks.push((start, end)); + start = end + 1; + } + let parts: Vec<_> = chunks + .par_iter() + .map(|r| solve_for_part(*r, &mmap)) + .collect(); + + let state: HashMap<&BStr, State> = parts.into_iter().fold(Default::default(), |mut a, b| { + merge(&mut a, &b); + a + }); + + let mut all: Vec<_> = state.into_iter().collect(); + all.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + print!("{{"); + for (i, (name, state)) in all.into_iter().enumerate() { + if i == 0 { + print!("{name}={state}"); + } else { + print!(", {name}={state}"); + } + } + println!("}}"); +} diff --git a/rust/src/bin/single_thread.rs b/rust/src/bin/single_thread.rs index 4f07e84..d29546b 100644 --- a/rust/src/bin/single_thread.rs +++ b/rust/src/bin/single_thread.rs @@ -11,8 +11,11 @@ struct StationMeasurements { temps: Vec, } +const DEFAULT_HASHMAP_LENGTH: usize = 10000; + fn main() { - let mut stations: HashMap = HashMap::new(); + let mut stations: HashMap = + HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let file = File::open("../measurements.txt").expect("File measurements.txt not found"); let reader = BufReader::new(file);