fxhashmap faster afterall...
This commit is contained in:
		| @@ -1,10 +1,10 @@ | ||||
| use crate::models::station_measurements::StationMeasurements; | ||||
| use crate::utils::{hash, parse}; | ||||
| use memmap2::MmapOptions; | ||||
| use std::collections::HashMap; | ||||
| use std::sync::mpsc; | ||||
| use std::time::Instant; | ||||
| use std::{fs::File, thread}; | ||||
| use rustc_hash::{FxHashMap as HashMap, FxBuildHasher}; | ||||
|  | ||||
| const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||
|  | ||||
| @@ -14,8 +14,9 @@ pub fn run() { | ||||
|     let file = File::open(FILE_PATH).expect("File measurements.txt not found"); | ||||
|     let mmap = unsafe { MmapOptions::new().map(&file).unwrap() }; | ||||
|     let file_length = mmap.len(); | ||||
|     let mut stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|     let hasher = FxBuildHasher::default(); | ||||
|     let mut stations: HashMap<u64, (String, StationMeasurements)> = | ||||
|         HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); | ||||
|     let (tx, rx) = mpsc::channel(); | ||||
|     let cores = thread::available_parallelism().unwrap().into(); | ||||
|     let chunk_length = file_length / cores; | ||||
| @@ -40,8 +41,8 @@ pub fn run() { | ||||
|             let (start, end) = *bounds.get(i).unwrap(); | ||||
|             let mmap_slice = &mmap[start..end]; | ||||
|             s.spawn(move || { | ||||
|                 let mut t_stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|                     HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|                 let mut t_stations: HashMap<u64, (String, StationMeasurements)> = | ||||
|                     HashMap::with_capacity_and_hasher(DEFAULT_HASHMAP_LENGTH, hasher); | ||||
|                 for line in mmap_slice.split(|&byte| byte == b'\n') { | ||||
|                     if line.len() == 0 { | ||||
|                         break; | ||||
|   | ||||
| @@ -12,7 +12,7 @@ pub fn run() { | ||||
|     const FILE_PATH: &str = "../../../measurements.txt"; | ||||
|     let now = Instant::now(); | ||||
|     thread::scope(|s| { | ||||
|         let mut stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|         let mut stations: HashMap<u64, (String, StationMeasurements)> = | ||||
|             HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|         let (tx, rx) = mpsc::channel(); | ||||
|         let cores = thread::available_parallelism().unwrap().into(); | ||||
| @@ -44,7 +44,7 @@ pub fn run() { | ||||
|                 let file = File::open(FILE_PATH).expect("File measurements.txt not found"); | ||||
|                 let mut reader = BufReader::new(&file); | ||||
|                 reader.seek(SeekFrom::Start(currposition)).unwrap(); | ||||
|                 let mut t_stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|                 let mut t_stations: HashMap<u64, (String, StationMeasurements)> = | ||||
|                     HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|                 let mut line = Vec::with_capacity(108); | ||||
|                 loop { | ||||
|   | ||||
| @@ -13,7 +13,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||
| pub fn run() { | ||||
|     const FILE_PATH: &str = "../../../measurements.txt"; | ||||
|     let now = Instant::now(); | ||||
|     let mut stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|     let mut stations: HashMap<u64, (String, StationMeasurements)> = | ||||
|         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|     let cores = thread::available_parallelism().unwrap().into(); | ||||
|     let bounds = smol::block_on(async { | ||||
| @@ -53,7 +53,7 @@ pub fn run() { | ||||
|                     .expect("File measurements.txt not found"); | ||||
|                 let mut reader = BufReader::new(&mut file); | ||||
|                 reader.seek(SeekFrom::Start(currposition)).await.unwrap(); | ||||
|                 let mut t_stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|                 let mut t_stations: HashMap<u64, (String, StationMeasurements)> = | ||||
|                     HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|                 let mut line = Vec::with_capacity(108); | ||||
|                 loop { | ||||
|   | ||||
| @@ -9,7 +9,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||
|  | ||||
| pub fn run() { | ||||
|     let now = Instant::now(); | ||||
|     let mut stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|     let mut stations: HashMap<u64, (String, StationMeasurements)> = | ||||
|         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|  | ||||
|     let file = File::open("../../../measurements.txt").expect("File measurements.txt not found"); | ||||
|   | ||||
| @@ -10,7 +10,7 @@ const DEFAULT_HASHMAP_LENGTH: usize = 10000; | ||||
|  | ||||
| pub fn run() { | ||||
|     let now = Instant::now(); | ||||
|     let mut stations: HashMap<usize, (String, StationMeasurements)> = | ||||
|     let mut stations: HashMap<u64, (String, StationMeasurements)> = | ||||
|         HashMap::with_capacity(DEFAULT_HASHMAP_LENGTH); | ||||
|  | ||||
|     smol::block_on(async { | ||||
|   | ||||
| @@ -1,9 +1,15 @@ | ||||
| #[inline] | ||||
| pub fn bytes(bytes: &[u8]) -> usize { | ||||
|     let mut hash: usize = 0; | ||||
| pub fn bytes(bytes: &[u8]) -> u64 { | ||||
|     // hash from https://curiouscoding.nl/posts/1brc/ still wrong for measurements3.txt (and slower?) | ||||
|     //let mut key = [0u8; 8]; | ||||
|     //let l = bytes.len().min(8); | ||||
|     //key[..l].copy_from_slice(&bytes[..l]); | ||||
|     //key[0] ^= bytes.len() as u8; | ||||
|     //u64::from_ne_bytes(key) | ||||
|     let mut hash: u64 = 0; | ||||
|     let (chunks, remainder) = bytes.as_chunks::<8>(); | ||||
|     for &chunk in chunks { | ||||
|         hash = hash.wrapping_add(usize::from_be_bytes(chunk)); | ||||
|         hash = hash.wrapping_add(u64::from_be_bytes(chunk)); | ||||
|     } | ||||
|     let mut r = [0_u8; 8]; | ||||
|     r[0] = remainder.len() as u8; | ||||
| @@ -12,7 +18,7 @@ pub fn bytes(bytes: &[u8]) -> usize { | ||||
|         r[idx] = byte; | ||||
|         idx += 1; | ||||
|     } | ||||
|     hash += usize::from_be_bytes(r); | ||||
|     hash += u64::from_be_bytes(r); | ||||
|     hash | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user