diff --git a/src/main/rust/src/utils/hash.rs b/src/main/rust/src/utils/hash.rs index cc6ae73..4deb239 100644 --- a/src/main/rust/src/utils/hash.rs +++ b/src/main/rust/src/utils/hash.rs @@ -1,25 +1,12 @@ #[inline] pub fn bytes(bytes: &[u8]) -> u64 { - // hash from https://curiouscoding.nl/posts/1brc/ still wrong for measurements3.txt (and slower?) - //let mut key = [0u8; 8]; - //let l = bytes.len().min(8); - //key[..l].copy_from_slice(&bytes[..l]); - //key[0] ^= bytes.len() as u8; - //u64::from_ne_bytes(key) - let mut hash: u64 = 0; - let (chunks, remainder) = bytes.as_chunks::<8>(); - for &chunk in chunks { - hash = hash.wrapping_add(u64::from_be_bytes(chunk)); - } - let mut r = [0_u8; 8]; - r[0] = remainder.len() as u8; - let mut idx = 1; - for &byte in remainder { - r[idx] = byte; - idx += 1; - } - hash += u64::from_be_bytes(r); - hash + // inspired by https://curiouscoding.nl/posts/1brc/ + let head: [u8; 8] = unsafe { bytes.get_unchecked(..8).as_chunks::<8>().0[0] }; + let tail: [u8; 8] = unsafe { bytes.get_unchecked(bytes.len() - 8..).as_chunks::<8>().0[0] }; + let shift = 64usize.saturating_sub(8 * bytes.len()); + let khead = u64::from_ne_bytes(head) << shift; + let ktail = u64::from_ne_bytes(tail) >> shift; + khead + ktail //let mut hash: u64 = 0; } #[cfg(test)]