From 608cbb59e5acca8aad5ad9c2bae9f38b2fb2c1f9 Mon Sep 17 00:00:00 2001 From: Fabian Schmidt Date: Tue, 27 Aug 2024 10:57:23 +0200 Subject: [PATCH] managed to get faster again by searching hashmap using &str and only converting to String on insertion. Removed FxHashMap again --- .../rust/src/implementations/libraries.rs | 31 ++++++++++--------- .../src/implementations/multi_threaded.rs | 4 +-- .../implementations/multi_threaded_smol.rs | 31 ++++++++++--------- .../rust/src/implementations/single_thread.rs | 6 ++-- src/main/rust/src/implementations/smol.rs | 4 +-- 5 files changed, 39 insertions(+), 37 deletions(-) diff --git a/src/main/rust/src/implementations/libraries.rs b/src/main/rust/src/implementations/libraries.rs index 3285766..37b7d73 100644 --- a/src/main/rust/src/implementations/libraries.rs +++ b/src/main/rust/src/implementations/libraries.rs @@ -1,10 +1,11 @@ use crate::models::station_measurements::StationMeasurements; use crate::utils::parse; use memmap2::MmapOptions; -use rustc_hash::{FxBuildHasher, FxHashMap as HashMap}; use std::sync::mpsc; use std::time::Instant; use std::{fs::File, thread}; +use std::collections::HashMap; +use crate::utils::parse::hashstr; const DEFAULT_HASHMAP_LENGTH: usize = 10000; @@ -14,9 +15,8 @@ pub fn run() { let file = File::open(FILE_PATH).expect("File measurements.txt not found"); let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; let file_length = mmap.len(); - let hasher = FxBuildHasher::default(); - let mut stations: HashMap = - HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); + let mut stations: HashMap = + HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let (tx, rx) = mpsc::channel(); let cores = thread::available_parallelism().unwrap().into(); let chunk_length = file_length / cores; @@ -41,17 +41,18 @@ pub fn run() { let (start, end) = *bounds.get(i).unwrap(); let mmap_slice = &mmap[start..end]; s.spawn(move || { - let mut t_stations: HashMap = - HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); + let mut t_stations: HashMap = + HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); for line in mmap_slice.split(|&byte| { byte == b'\n' }) { if line.len() == 0 { break; } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); - let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; + let hash = hashstr(station); + let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp); - let measurements_option = t_stations.get_mut(&station); - if let Some(measurements) = measurements_option { + let measurements_option = t_stations.get_mut(&hash); + if let Some((_, measurements)) = measurements_option { measurements.update(temp); } else { let measurements = StationMeasurements { @@ -60,7 +61,7 @@ pub fn run() { count: 1, sum: temp, }; - t_stations.insert(station, measurements); + t_stations.insert(hash, (station.to_string(), measurements)); } } let _ = tx.send(t_stations); @@ -68,18 +69,18 @@ pub fn run() { } drop(tx); while let Ok(t_stations) = rx.recv() { - for (station, measurements) in t_stations.iter() { - let joined_measurements_options = stations.get_mut(station); - if let Some(joined_measurements) = joined_measurements_options { + for (hash, (station, measurements)) in t_stations.iter() { + let joined_measurements_options = stations.get_mut(hash); + if let Some((_, joined_measurements)) = joined_measurements_options { joined_measurements.merge(measurements); } else { - stations.insert(station.to_owned(), *measurements); + stations.insert(*hash, (station.to_owned(), *measurements)); } } } let mut stations: Vec = stations .iter() - .map(|(station, measurements)| { + .map(|(_, (station, measurements))| { let measurements = measurements.to_string(); #[cfg(feature = "json")] { diff --git a/src/main/rust/src/implementations/multi_threaded.rs b/src/main/rust/src/implementations/multi_threaded.rs index 109743f..66cd623 100644 --- a/src/main/rust/src/implementations/multi_threaded.rs +++ b/src/main/rust/src/implementations/multi_threaded.rs @@ -57,7 +57,7 @@ pub fn run() { } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let hash = hashstr(station); - let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; + let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp.split_last().unwrap().1); let measurements_option = t_stations.get_mut(&hash); if let Some((_, measurements)) = measurements_option { @@ -69,7 +69,7 @@ pub fn run() { count: 1, sum: temp, }; - t_stations.insert(hash, (station, measurements)); + t_stations.insert(hash, (station.to_string(), measurements)); } currposition += line_len as u64; if currposition >= end { diff --git a/src/main/rust/src/implementations/multi_threaded_smol.rs b/src/main/rust/src/implementations/multi_threaded_smol.rs index 914df71..6d0b4b9 100644 --- a/src/main/rust/src/implementations/multi_threaded_smol.rs +++ b/src/main/rust/src/implementations/multi_threaded_smol.rs @@ -1,12 +1,13 @@ +use std::collections::HashMap; use smol::fs::File; use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom}; -use rustc_hash::{FxHashMap as HashMap, FxBuildHasher}; use crate::models::station_measurements::StationMeasurements; use crate::utils::parse; use std::sync::mpsc; use std::thread; use std::time::Instant; +use crate::utils::parse::hashstr; const DEFAULT_HASHMAP_LENGTH: usize = 10000; @@ -14,9 +15,8 @@ pub fn run() { const FILE_PATH: &str = "../../../measurements.txt"; let now = Instant::now(); thread::scope(|s| { - let hasher = FxBuildHasher::default(); - let mut stations: HashMap = - HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); + let mut stations: HashMap = + HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let (tx, rx) = mpsc::channel(); let cores = thread::available_parallelism().unwrap().into(); let bounds = smol::block_on(async { @@ -57,8 +57,8 @@ pub fn run() { .expect("File measurements.txt not found"); let mut reader = BufReader::new(&mut file); reader.seek(SeekFrom::Start(currposition)).await.unwrap(); - let mut t_stations: HashMap = - HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); + let mut t_stations: HashMap = + HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let mut line = Vec::with_capacity(108); loop { let line_len = reader @@ -69,10 +69,11 @@ pub fn run() { break; } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); - let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; + let hash = hashstr(station); + let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp.split_last().unwrap().1); - let measurements_option = t_stations.get_mut(&station); - if let Some(measurements) = measurements_option { + let measurements_option = t_stations.get_mut(&hash); + if let Some((_, measurements)) = measurements_option { measurements.update(temp); } else { let measurements = StationMeasurements { @@ -81,7 +82,7 @@ pub fn run() { count: 1, sum: temp, }; - t_stations.insert(station, measurements); + t_stations.insert(hash, (station.to_string(), measurements)); } currposition += line_len as u64; if currposition >= end { @@ -95,18 +96,18 @@ pub fn run() { } drop(tx); while let Ok(t_stations) = rx.recv() { - for (station, measurements) in t_stations.iter() { - let joined_measurements_options = stations.get_mut(station); - if let Some(joined_measurements) = joined_measurements_options { + for (hash, (station, measurements)) in t_stations.iter() { + let joined_measurements_options = stations.get_mut(hash); + if let Some((_, joined_measurements)) = joined_measurements_options { joined_measurements.merge(measurements); } else { - stations.insert(station.to_owned(), *measurements); + stations.insert(*hash, (station.to_owned(), *measurements)); } } } let mut stations: Vec = stations .iter() - .map(|(station, measurements)| { + .map(|(_, (station, measurements))| { let measurements = measurements.to_string(); #[cfg(feature = "json")] { diff --git a/src/main/rust/src/implementations/single_thread.rs b/src/main/rust/src/implementations/single_thread.rs index 2367974..fcc90d9 100644 --- a/src/main/rust/src/implementations/single_thread.rs +++ b/src/main/rust/src/implementations/single_thread.rs @@ -25,7 +25,7 @@ pub fn run() { } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let hash = hashstr(station); - let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; + let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp.split_last().unwrap().1); let measurements_option = stations.get_mut(&hash); if let Some((_, measurements)) = measurements_option { @@ -37,7 +37,7 @@ pub fn run() { count: 1, sum: temp, }; - stations.insert(hash, (station, measurements)); + stations.insert(hash, (station.to_string(), measurements)); } line.clear(); } @@ -51,5 +51,5 @@ pub fn run() { stations.sort(); let stations = stations.join(","); println!("{{{stations}}}"); - println!("Time={} μs", now.elapsed().as_micros()); + println!("Time={} ms", now.elapsed().as_millis()); } diff --git a/src/main/rust/src/implementations/smol.rs b/src/main/rust/src/implementations/smol.rs index cd0e887..27f26c5 100644 --- a/src/main/rust/src/implementations/smol.rs +++ b/src/main/rust/src/implementations/smol.rs @@ -30,7 +30,7 @@ pub fn run() { } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); let hash = hashstr(station); - let station = unsafe { String::from_utf8_unchecked(Vec::from(station)) }; + let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp.split_last().unwrap().1); let measurements_option = stations.get_mut(&hash); if let Some((_, measurements)) = measurements_option { @@ -42,7 +42,7 @@ pub fn run() { count: 1, sum: temp, }; - stations.insert(hash, (station, measurements)); + stations.insert(hash, (station.to_string(), measurements)); } line.clear(); }