fxhashmap faster afterall...

This commit is contained in:
Fabian Schmidt 2024-08-28 08:52:40 +02:00
parent b8f589096f
commit ac5c45f8d5
6 changed files with 22 additions and 15 deletions

View File

@ -1,10 +1,10 @@
use crate::models::station_measurements::StationMeasurements; use crate::models::station_measurements::StationMeasurements;
use crate::utils::{hash, parse}; use crate::utils::{hash, parse};
use memmap2::MmapOptions; use memmap2::MmapOptions;
use std::collections::HashMap;
use std::sync::mpsc; use std::sync::mpsc;
use std::time::Instant; use std::time::Instant;
use std::{fs::File, thread}; use std::{fs::File, thread};
use rustc_hash::{FxHashMap as HashMap, FxBuildHasher};
const DEFAULT_HASHMAP_LENGTH: usize = 10000; const DEFAULT_HASHMAP_LENGTH: usize = 10000;
@ -14,8 +14,9 @@ pub fn run() {
let file = File::open(FILE_PATH).expect("File measurements.txt not found"); let file = File::open(FILE_PATH).expect("File measurements.txt not found");
let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
let file_length = mmap.len(); let file_length = mmap.len();
let mut stations: HashMap<usize, (String, StationMeasurements)> = let hasher = FxBuildHasher::default();
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
let (tx, rx) = mpsc::channel(); let (tx, rx) = mpsc::channel();
let cores = thread::available_parallelism().unwrap().into(); let cores = thread::available_parallelism().unwrap().into();
let chunk_length = file_length / cores; let chunk_length = file_length / cores;
@ -40,8 +41,8 @@ pub fn run() {
let (start, end) = *bounds.get(i).unwrap(); let (start, end) = *bounds.get(i).unwrap();
let mmap_slice = &mmap[start..end]; let mmap_slice = &mmap[start..end];
s.spawn(move || { s.spawn(move || {
let mut t_stations: HashMap<usize, (String, StationMeasurements)> = let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher);
for line in mmap_slice.split(|&byte| byte == b'\n') { for line in mmap_slice.split(|&byte| byte == b'\n') {
if line.len() == 0 { if line.len() == 0 {
break; break;

View File

@ -12,7 +12,7 @@ pub fn run() {
const FILE_PATH: &str = "../../../measurements.txt"; const FILE_PATH: &str = "../../../measurements.txt";
let now = Instant::now(); let now = Instant::now();
thread::scope(|s| { thread::scope(|s| {
let mut stations: HashMap<usize, (String, StationMeasurements)> = let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let (tx, rx) = mpsc::channel(); let (tx, rx) = mpsc::channel();
let cores = thread::available_parallelism().unwrap().into(); let cores = thread::available_parallelism().unwrap().into();
@ -44,7 +44,7 @@ pub fn run() {
let file = File::open(FILE_PATH).expect("File measurements.txt not found"); let file = File::open(FILE_PATH).expect("File measurements.txt not found");
let mut reader = BufReader::new(&file); let mut reader = BufReader::new(&file);
reader.seek(SeekFrom::Start(currposition)).unwrap(); reader.seek(SeekFrom::Start(currposition)).unwrap();
let mut t_stations: HashMap<usize, (String, StationMeasurements)> = let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let mut line = Vec::with_capacity(108); let mut line = Vec::with_capacity(108);
loop { loop {

View File

@ -13,7 +13,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
pub fn run() { pub fn run() {
const FILE_PATH: &str = "../../../measurements.txt"; const FILE_PATH: &str = "../../../measurements.txt";
let now = Instant::now(); let now = Instant::now();
let mut stations: HashMap<usize, (String, StationMeasurements)> = let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let cores = thread::available_parallelism().unwrap().into(); let cores = thread::available_parallelism().unwrap().into();
let bounds = smol::block_on(async { let bounds = smol::block_on(async {
@ -53,7 +53,7 @@ pub fn run() {
.expect("File measurements.txt not found"); .expect("File measurements.txt not found");
let mut reader = BufReader::new(&mut file); let mut reader = BufReader::new(&mut file);
reader.seek(SeekFrom::Start(currposition)).await.unwrap(); reader.seek(SeekFrom::Start(currposition)).await.unwrap();
let mut t_stations: HashMap<usize, (String, StationMeasurements)> = let mut t_stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let mut line = Vec::with_capacity(108); let mut line = Vec::with_capacity(108);
loop { loop {

View File

@ -9,7 +9,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
pub fn run() { pub fn run() {
let now = Instant::now(); let now = Instant::now();
let mut stations: HashMap<usize, (String, StationMeasurements)> = let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); let file = File::open("../../../measurements.txt").expect("File measurements.txt not found");

View File

@ -10,7 +10,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000;
pub fn run() { pub fn run() {
let now = Instant::now(); let now = Instant::now();
let mut stations: HashMap<usize, (String, StationMeasurements)> = let mut stations: HashMap<u64, (String, StationMeasurements)> =
HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH);
smol::block_on(async { smol::block_on(async {

View File

@ -1,9 +1,15 @@
#[inline] #[inline]
pub fn bytes(bytes: &[u8]) -> usize { pub fn bytes(bytes: &[u8]) -> u64 {
let mut hash: usize = 0; // hash from https://curiouscoding.nl/posts/1brc/ still wrong for measurements3.txt (and slower?)
//let mut key = [0u8; 8];
//let l = bytes.len().min(8);
//key[..l].copy_from_slice(&bytes[..l]);
//key[0] ^= bytes.len() as u8;
//u64::from_ne_bytes(key)
let mut hash: u64 = 0;
let (chunks, remainder) = bytes.as_chunks::<8>(); let (chunks, remainder) = bytes.as_chunks::<8>();
for &chunk in chunks { for &chunk in chunks {
hash = hash.wrapping_add(usize::from_be_bytes(chunk)); hash = hash.wrapping_add(u64::from_be_bytes(chunk));
} }
let mut r = [0_u8; 8]; let mut r = [0_u8; 8];
r[0] = remainder.len() as u8; r[0] = remainder.len() as u8;
@ -12,7 +18,7 @@ pub fn bytes(bytes: &[u8]) -> usize {
r[idx] = byte; r[idx] = byte;
idx += 1; idx += 1;
} }
hash += usize::from_be_bytes(r); hash += u64::from_be_bytes(r);
hash hash
} }