From b8f589096f442f1dde7f31b0ae8a4ada35ef15d0 Mon Sep 17 00:00:00 2001 From: Fabian Schmidt Date: Tue, 27 Aug 2024 13:54:23 +0200 Subject: [PATCH] extract hash into own module --- .../rust/src/implementations/libraries.rs | 5 ++- .../src/implementations/multi_threaded.rs | 5 ++- .../implementations/multi_threaded_smol.rs | 5 ++- .../rust/src/implementations/single_thread.rs | 5 ++- src/main/rust/src/implementations/smol.rs | 5 ++- src/main/rust/src/utils.rs | 1 + src/main/rust/src/utils/hash.rs | 33 +++++++++++++++++++ src/main/rust/src/utils/parse.rs | 31 +---------------- 8 files changed, 45 insertions(+), 45 deletions(-) create mode 100644 src/main/rust/src/utils/hash.rs diff --git a/src/main/rust/src/implementations/libraries.rs b/src/main/rust/src/implementations/libraries.rs index bac457f..209328a 100644 --- a/src/main/rust/src/implementations/libraries.rs +++ b/src/main/rust/src/implementations/libraries.rs @@ -1,6 +1,5 @@ use crate::models::station_measurements::StationMeasurements; -use crate::utils::parse; -use crate::utils::parse::hashstr; +use crate::utils::{hash, parse}; use memmap2::MmapOptions; use std::collections::HashMap; use std::sync::mpsc; @@ -48,7 +47,7 @@ pub fn run() { break; } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); - let hash = hashstr(station); + let hash = hash::bytes(station); let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp); let measurements_option = t_stations.get_mut(&hash); diff --git a/src/main/rust/src/implementations/multi_threaded.rs b/src/main/rust/src/implementations/multi_threaded.rs index 15f95d7..107fce0 100644 --- a/src/main/rust/src/implementations/multi_threaded.rs +++ b/src/main/rust/src/implementations/multi_threaded.rs @@ -1,6 +1,5 @@ use crate::models::station_measurements::StationMeasurements; -use crate::utils::parse; -use crate::utils::parse::hashstr; +use crate::utils::{hash, parse}; use std::collections::HashMap; use std::io::{BufRead, Seek, SeekFrom}; use std::sync::mpsc; @@ -56,7 +55,7 @@ pub fn run() { break; } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); - let hash = hashstr(station); + let hash = hash::bytes(station); let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp.split_last().unwrap().1); let measurements_option = t_stations.get_mut(&hash); diff --git a/src/main/rust/src/implementations/multi_threaded_smol.rs b/src/main/rust/src/implementations/multi_threaded_smol.rs index 4f0f3bd..9625d71 100644 --- a/src/main/rust/src/implementations/multi_threaded_smol.rs +++ b/src/main/rust/src/implementations/multi_threaded_smol.rs @@ -3,8 +3,7 @@ use smol::io::{AsyncBufReadExt, AsyncSeekExt, BufReader, SeekFrom}; use std::collections::HashMap; use crate::models::station_measurements::StationMeasurements; -use crate::utils::parse; -use crate::utils::parse::hashstr; +use crate::utils::{hash, parse}; use easy_parallel::Parallel; use std::thread; use std::time::Instant; @@ -66,7 +65,7 @@ pub fn run() { break; } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); - let hash = hashstr(station); + let hash = hash::bytes(station); let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp.split_last().unwrap().1); let measurements_option = t_stations.get_mut(&hash); diff --git a/src/main/rust/src/implementations/single_thread.rs b/src/main/rust/src/implementations/single_thread.rs index b38f95e..eb6875a 100644 --- a/src/main/rust/src/implementations/single_thread.rs +++ b/src/main/rust/src/implementations/single_thread.rs @@ -1,6 +1,5 @@ use crate::models::station_measurements::StationMeasurements; -use crate::utils::parse; -use crate::utils::parse::hashstr; +use crate::utils::{hash, parse}; use std::collections::HashMap; use std::fs::File; use std::io::{BufRead, BufReader}; @@ -24,7 +23,7 @@ pub fn run() { break; } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); - let hash = hashstr(station); + let hash = hash::bytes(station); let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp.split_last().unwrap().1); let measurements_option = stations.get_mut(&hash); diff --git a/src/main/rust/src/implementations/smol.rs b/src/main/rust/src/implementations/smol.rs index 27f26c5..22f9b45 100644 --- a/src/main/rust/src/implementations/smol.rs +++ b/src/main/rust/src/implementations/smol.rs @@ -2,8 +2,7 @@ use smol::fs::File; use smol::io::{AsyncBufReadExt, BufReader}; use crate::models::station_measurements::StationMeasurements; -use crate::utils::parse; -use crate::utils::parse::hashstr; +use crate::utils::{hash, parse}; use std::collections::HashMap; use std::time::Instant; @@ -29,7 +28,7 @@ pub fn run() { break; } let (station, temp) = line.rsplit_once(|&byte| byte == b';').unwrap(); - let hash = hashstr(station); + let hash = hash::bytes(station); let station = unsafe { std::str::from_utf8_unchecked(station) }; let temp = parse::temp(temp.split_last().unwrap().1); let measurements_option = stations.get_mut(&hash); diff --git a/src/main/rust/src/utils.rs b/src/main/rust/src/utils.rs index b7342c1..c0816f5 100644 --- a/src/main/rust/src/utils.rs +++ b/src/main/rust/src/utils.rs @@ -1,4 +1,5 @@ pub mod byte_pos; +pub mod hash; pub mod parse; pub mod write_structured_measurements; diff --git a/src/main/rust/src/utils/hash.rs b/src/main/rust/src/utils/hash.rs new file mode 100644 index 0000000..7a26a42 --- /dev/null +++ b/src/main/rust/src/utils/hash.rs @@ -0,0 +1,33 @@ +#[inline] +pub fn bytes(bytes: &[u8]) -> usize { + let mut hash: usize = 0; + let (chunks, remainder) = bytes.as_chunks::<8>(); + for &chunk in chunks { + hash = hash.wrapping_add(usize::from_be_bytes(chunk)); + } + let mut r = [0_u8; 8]; + r[0] = remainder.len() as u8; + let mut idx = 1; + for &byte in remainder { + r[idx] = byte; + idx += 1; + } + hash += usize::from_be_bytes(r); + hash +} + +#[cfg(test)] +mod tests { + use crate::utils::hash; + + #[test] + fn test_hashstr() { + let hash_1 = hash::bytes(b"abcdefghijk"); + let hash_2 = hash::bytes(b"kjihgfedcba"); + let hash_3 = hash::bytes(b"abba"); + let hash_4 = hash::bytes(b"baab"); + + assert_ne!(hash_1, hash_2); + assert_ne!(hash_3, hash_4); + } +} \ No newline at end of file diff --git a/src/main/rust/src/utils/parse.rs b/src/main/rust/src/utils/parse.rs index 8eb9069..fdedd2f 100644 --- a/src/main/rust/src/utils/parse.rs +++ b/src/main/rust/src/utils/parse.rs @@ -69,27 +69,9 @@ pub fn temp_simd(bytes: &[u8]) -> isize { } } -#[inline] -pub fn hashstr(bytes: &[u8]) -> usize { - let mut hash: usize = 0; - let (chunks, remainder) = bytes.as_chunks::<8>(); - for &chunk in chunks { - hash = hash.wrapping_add(usize::from_be_bytes(chunk)); - } - let mut r = [0_u8; 8]; - r[0] = remainder.len() as u8; - let mut idx = 1; - for &byte in remainder { - r[idx] = byte; - idx += 1; - } - hash += usize::from_be_bytes(r); - hash -} - #[cfg(test)] mod tests { - use crate::utils::parse::{hashstr, temp_new}; + use crate::utils::parse::temp_new; #[test] fn test_temp_new_max() { @@ -120,15 +102,4 @@ mod tests { let temp_neg_10 = temp_new("-9.9".as_bytes()); assert_eq!(temp_neg_10, -99); } - - #[test] - fn test_hashstr() { - let hash_1 = hashstr(b"abcdefghijk"); - let hash_2 = hashstr(b"kjihgfedcba"); - let hash_3 = hashstr(b"abba"); - let hash_4 = hashstr(b"baab"); - - assert_ne!(hash_1, hash_2); - assert_ne!(hash_3, hash_4); - } }