From ac5c45f8d54f48dfab7668f1fea6358e55c3eb70 Mon Sep 17 00:00:00 2001 From: Fabian Schmidt Date: Wed, 28 Aug 2024 08:52:40 +0200 Subject: [PATCH] fxhashmap faster afterall... --- src/main/rust/src/implementations/libraries.rs | 11 ++++++----- .../rust/src/implementations/multi_threaded.rs | 4 ++-- .../src/implementations/multi_threaded_smol.rs | 4 ++-- src/main/rust/src/implementations/single_thread.rs | 2 +- src/main/rust/src/implementations/smol.rs | 2 +- src/main/rust/src/utils/hash.rs | 14 ++++++++++---- 6 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/main/rust/src/implementations/libraries.rs b/src/main/rust/src/implementations/libraries.rs index 209328a..6bf54f2 100644 --- a/src/main/rust/src/implementations/libraries.rs +++ b/src/main/rust/src/implementations/libraries.rs @@ -1,10 +1,10 @@ use crate::models::station_measurements::StationMeasurements; use crate::utils::{hash, parse}; use memmap2::MmapOptions; -use std::collections::HashMap; use std::sync::mpsc; use std::time::Instant; use std::{fs::File, thread}; +use rustc_hash::{FxHashMap as HashMap, FxBuildHasher}; const DEFAULT_HASHMAP_LENGTH: usize = 10000; @@ -14,8 +14,9 @@ pub fn run() { let file = File::open(FILE_PATH).expect("File measurements.txt not found"); let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; let file_length = mmap.len(); - let mut stations: HashMap = - HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); + let hasher = FxBuildHasher::default(); + let mut stations: HashMap = + HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); let (tx, rx) = mpsc::channel(); let cores = thread::available_parallelism().unwrap().into(); let chunk_length = file_length / cores; @@ -40,8 +41,8 @@ pub fn run() { let (start, end) = *bounds.get(i).unwrap(); let mmap_slice = &mmap[start..end]; s.spawn(move || { - let mut t_stations: HashMap = - HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); + let mut t_stations: HashMap = + HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); for line in mmap_slice.split(|&byte| byte == b'\n') { if line.len() == 0 { break; diff --git a/src/main/rust/src/implementations/multi_threaded.rs b/src/main/rust/src/implementations/multi_threaded.rs index 107fce0..78ea9d8 100644 --- a/src/main/rust/src/implementations/multi_threaded.rs +++ b/src/main/rust/src/implementations/multi_threaded.rs @@ -12,7 +12,7 @@ pub fn run() { const FILE_PATH: &str = "../../../measurements.txt"; let now = Instant::now(); thread::scope(|s| { - let mut stations: HashMap = + let mut stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let (tx, rx) = mpsc::channel(); let cores = thread::available_parallelism().unwrap().into(); @@ -44,7 +44,7 @@ pub fn run() { let file = File::open(FILE_PATH).expect("File measurements.txt not found"); let mut reader = BufReader::new(&file); reader.seek(SeekFrom::Start(currposition)).unwrap(); - let mut t_stations: HashMap = + let mut t_stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let mut line = Vec::with_capacity(108); loop { diff --git a/src/main/rust/src/implementations/multi_threaded_smol.rs b/src/main/rust/src/implementations/multi_threaded_smol.rs index 9625d71..d4ac627 100644 --- a/src/main/rust/src/implementations/multi_threaded_smol.rs +++ b/src/main/rust/src/implementations/multi_threaded_smol.rs @@ -13,7 +13,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000; pub fn run() { const FILE_PATH: &str = "../../../measurements.txt"; let now = Instant::now(); - let mut stations: HashMap = + let mut stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let cores = thread::available_parallelism().unwrap().into(); let bounds = smol::block_on(async { @@ -53,7 +53,7 @@ pub fn run() { .expect("File measurements.txt not found"); let mut reader = BufReader::new(&mut file); reader.seek(SeekFrom::Start(currposition)).await.unwrap(); - let mut t_stations: HashMap = + let mut t_stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let mut line = Vec::with_capacity(108); loop { diff --git a/src/main/rust/src/implementations/single_thread.rs b/src/main/rust/src/implementations/single_thread.rs index eb6875a..3866702 100644 --- a/src/main/rust/src/implementations/single_thread.rs +++ b/src/main/rust/src/implementations/single_thread.rs @@ -9,7 +9,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000; pub fn run() { let now = Instant::now(); - let mut stations: HashMap = + let mut stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); diff --git a/src/main/rust/src/implementations/smol.rs b/src/main/rust/src/implementations/smol.rs index 22f9b45..dc0fb34 100644 --- a/src/main/rust/src/implementations/smol.rs +++ b/src/main/rust/src/implementations/smol.rs @@ -10,7 +10,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000; pub fn run() { let now = Instant::now(); - let mut stations: HashMap = + let mut stations: HashMap = HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); smol::block_on(async { diff --git a/src/main/rust/src/utils/hash.rs b/src/main/rust/src/utils/hash.rs index 7a26a42..cc6ae73 100644 --- a/src/main/rust/src/utils/hash.rs +++ b/src/main/rust/src/utils/hash.rs @@ -1,9 +1,15 @@ #[inline] -pub fn bytes(bytes: &[u8]) -> usize { - let mut hash: usize = 0; +pub fn bytes(bytes: &[u8]) -> u64 { + // hash from https://curiouscoding.nl/posts/1brc/ still wrong for measurements3.txt (and slower?) + //let mut key = [0u8; 8]; + //let l = bytes.len().min(8); + //key[..l].copy_from_slice(&bytes[..l]); + //key[0] ^= bytes.len() as u8; + //u64::from_ne_bytes(key) + let mut hash: u64 = 0; let (chunks, remainder) = bytes.as_chunks::<8>(); for &chunk in chunks { - hash = hash.wrapping_add(usize::from_be_bytes(chunk)); + hash = hash.wrapping_add(u64::from_be_bytes(chunk)); } let mut r = [0_u8; 8]; r[0] = remainder.len() as u8; @@ -12,7 +18,7 @@ pub fn bytes(bytes: &[u8]) -> usize { r[idx] = byte; idx += 1; } - hash += usize::from_be_bytes(r); + hash += u64::from_be_bytes(r); hash }