fxhashmap faster afterall...

This commit is contained in:
Fabian Schmidt 2024-08-28 08:52:40 +02:00
parent b8f589096f
commit ac5c45f8d5
6 changed files with 22 additions and 15 deletions

View File

@ -1,10 +1,10 @@
use crate::models::station_measurements::StationMeasurements;
use crate::utils::{hash, parse};
use memmap2::MmapOptions;
use std::collections::HashMap;
use std::sync::mpsc;
use std::time::Instant;
use std::{fs::File, thread};
use rustc_hash::{FxHashMap as HashMap, FxBuildHasher};
const DEFAULT_HASHMAP_LENGTH: usize = 10000;
@ -14,8 +14,9 @@ pub fn run() {
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
let file_length = mmap.len();
let mut stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let hasher = FxBuildHasher::default();
let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
let (tx, rx) = mpsc::channel();
let cores = thread::available_parallelism().unwrap().into();
let chunk_length = file_length / cores;
@ -40,8 +41,8 @@ pub fn run() {
let (start, end) = *bounds.get(i).unwrap();
let mmap_slice = &mmap[start..end];
s.spawn(move || {
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
for line in mmap_slice.split(|&byte| byte == b'\n') {
if line.len() == 0 {
break;

View File

@ -12,7 +12,7 @@ pub fn run() {
const FILE_PATH: &str = "../../../measurements.txt";
let now = Instant::now();
thread::scope(|s| {
let mut stations: HashMap<usize, (String, StationMeasurements)> =
let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let (tx, rx) = mpsc::channel();
let cores = thread::available_parallelism().unwrap().into();
@ -44,7 +44,7 @@ pub fn run() {
let file = File::open(FILE_PATH).expect("File measurements.txt not found");
let mut reader = BufReader::new(&file);
reader.seek(SeekFrom::Start(currposition)).unwrap();
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let mut line = Vec::with_capacity(108);
loop {

View File

@ -13,7 +13,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
pub fn run() {
const FILE_PATH: &str = "../../../measurements.txt";
let now = Instant::now();
let mut stations: HashMap<usize, (String, StationMeasurements)> =
let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let cores = thread::available_parallelism().unwrap().into();
let bounds = smol::block_on(async {
@ -53,7 +53,7 @@ pub fn run() {
.expect("File measurements.txt not found");
let mut reader = BufReader::new(&mut file);
reader.seek(SeekFrom::Start(currposition)).await.unwrap();
let mut t_stations: HashMap<usize, (String, StationMeasurements)> =
let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let mut line = Vec::with_capacity(108);
loop {

View File

@ -9,7 +9,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
pub fn run() {
let now = Instant::now();
let mut stations: HashMap<usize, (String, StationMeasurements)> =
let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");

View File

@ -10,7 +10,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
pub fn run() {
let now = Instant::now();
let mut stations: HashMap<usize, (String, StationMeasurements)> =
let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
smol::block_on(async {

View File

@ -1,9 +1,15 @@
#[inline]
pub fn bytes(bytes: &[u8]) -> usize {
let mut hash: usize = 0;
pub fn bytes(bytes: &[u8]) -> u64 {
// hash from https://curiouscoding.nl/posts/1brc/ still wrong for measurements3.txt (and slower?)
//let mut key = [0u8; 8];
//let l = bytes.len().min(8);
//key[..l].copy_from_slice(&bytes[..l]);
//key[0] ^= bytes.len() as u8;
//u64::from_ne_bytes(key)
let mut hash: u64 = 0;
let (chunks, remainder) = bytes.as_chunks::<8>();
for &chunk in chunks {
hash = hash.wrapping_add(usize::from_be_bytes(chunk));
hash = hash.wrapping_add(u64::from_be_bytes(chunk));
}
let mut r = [0_u8; 8];
r[0] = remainder.len() as u8;
@ -12,7 +18,7 @@ pub fn bytes(bytes: &[u8]) -> usize {
r[idx] = byte;
idx += 1;
}
hash += usize::from_be_bytes(r);
hash += u64::from_be_bytes(r);
hash
}